changeset 39:8b9c4713b049 draft

Introduce ParseFileState
author Lewin Bormann <lbo@spheniscida.de>
date Thu, 23 May 2019 19:54:44 +0200
parents 74801fe3d0dc
children e9cb1d6b12d3
files pcombinators/state.py
diffstat 1 files changed, 56 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/pcombinators/state.py	Thu May 23 19:53:50 2019 +0200
+++ b/pcombinators/state.py	Thu May 23 19:54:44 2019 +0200
@@ -14,11 +14,13 @@
 class _State:
     """Generic parsing state representation."""
 
-    _holds = [] # List of indices that are still marked as needed. Ascending
-
     def next(self):
         pass
 
+    def advance(self, n):
+        for i in range(0, n):
+            self.next()
+
     def peek(self):
         raise NotImplementedError()
 
@@ -43,8 +45,23 @@
         return self.ParserHold(self.index())
 
     def release(self, hold):
-        self._holds.pop(hold.total_index)
+        """Release a hold. Generally called when a parser was successful."""
+        assert hold.total_index >= 0, 'double release'
+        assert self._holds[-1] == hold.total_index
+        self._holds.pop()
         self._maybe_collect()
+        hold.total_index = -1
+
+    def reset(self, hold):
+        """Release hold and reset index to its position."""
+        # Reset is only allowed when this hold is the latest hold or later.
+        # It is possible that a caller accidentally released a hold that it
+        # now wants to reset to.
+        assert hold.total_index >= 0, 'double reset'
+        assert self._holds[-1] == hold.total_index
+        self._reset_index(hold.total_index)
+        self._holds.pop()
+        hold.total_index = -2
 
     def __iter__(self):
         return self
@@ -64,18 +81,21 @@
     def error(self, msg):
         raise ParseException(msg)
 
-    def reset(self):
-        raise NotImplementedError('use holds!')
 
 class ParseFileState(_State):
     """A lazy parsing state implementation, reading from stream."""
-    _fobj = None
-    _buf = [] # List of characters.
-
     _index = 0 # Index in current _buf
     _total_offset = 0 # Index of first _buf entry in stream since start
 
+    def __repr__(self):
+        return 'PFS(ix={}, to={}, buf={})'.format(self._index, self._total_offset, self._buf)
+
     def __init__(self, f):
+        self._stream_finished = False
+        self._holds = []
+        self._buf = []
+        self._index = 0
+        self._total_offset = 0
         if type(f) is str:
             self._fobj = open(f, 'r')
         elif isinstance(f, io.IOBase):
@@ -86,17 +106,21 @@
     def __del__(self):
         if self._fobj:
             self._fobj.close()
+
     def _maybe_collect(self):
         # No holds left, forget everything up to now.
         if len(self._holds) == 0:
             self._buf = self._buf[self._index:]
-        else: # Find oldest hold and update buffer.
+        else: # Find oldest hold and update buffer to hold everything from the oldest hold onwards.
             assert sorted(self._holds) == self._holds
             to_clean = self._holds[0]-self._total_offset
-            self._buf = self._buf[:to_clean]
+            self._buf = self._buf[to_clean:]
             self._total_offset += to_clean
             self._index -= to_clean
-            self._holds.pop(0)
+
+    def _reset_index(self, i):
+        assert i >= self._total_offset and i <= self._total_offset + self._index
+        self._index = i - self._total_offset
 
     def index(self):
         return self._total_offset + self._index
@@ -105,17 +129,28 @@
 
     def fill_buffer(self, min=0):
         if len(self._buf)-self._index <= min:
-            self._buf.extend(self._fobj.read(self.PREFILL))
+            new = self._fobj.read(self.PREFILL)
+            self._buf.extend(new)
+            if len(new) == 0:
+                self._stream_finished = True
 
     def peek(self):
         self.fill_buffer()
+        if self.finished():
+            return None
         return self._buf[self._index]
 
     def next(self):
+        if self.finished():
+            return None
         self.fill_buffer()
         self._index += 1
         return self._buf[self._index-1]
 
+    def advance(self, n):
+        self.fill_buffer(self._index + n)
+        self._index += n
+
     def remaining(self):
         print('warning: remaining() on ParseFileState is only accurate to up to {} characters lookahead and expensive'.format(self.PREFIL))
         self.fill_buffer(self.PREFILL)
@@ -125,15 +160,18 @@
         print('warning: len() is inaccurate on ParseFileState, returning only past and present state')
         return self._total_offset + len(self._buf)
 
+    def finished(self):
+        return self._stream_finished and self._index == len(self._buf)
+
 class ParseState(_State):
     """Encapsulates state as the parser goes through input supplied as string."""
 
-    _input = ''
-    _index = 0
 
     def __init__(self, s):
         """Create a ParseState object from str s, representing the input to be parsed."""
+        self._holds = []
         self._input = s
+        self._index = 0
 
     def __repr__(self):
         if self._index < len(self._input):
@@ -147,6 +185,9 @@
         self._index += 1
         return current
 
+    def advance(self, n):
+        self._index += n
+
     def peek(self):
         return self._input[self._index]
 
@@ -156,7 +197,7 @@
     def len(self):
         return len(self._input)
 
-    def reset(self, ix):
+    def _reset_index(self, ix):
         self._index = ix
 
     def __iter__(self):