Mercurial > lbo > hg > pcombinators
changeset 39:8b9c4713b049 draft
Introduce ParseFileState
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Thu, 23 May 2019 19:54:44 +0200 |
parents | 74801fe3d0dc |
children | e9cb1d6b12d3 |
files | pcombinators/state.py |
diffstat | 1 files changed, 56 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/pcombinators/state.py Thu May 23 19:53:50 2019 +0200 +++ b/pcombinators/state.py Thu May 23 19:54:44 2019 +0200 @@ -14,11 +14,13 @@ class _State: """Generic parsing state representation.""" - _holds = [] # List of indices that are still marked as needed. Ascending - def next(self): pass + def advance(self, n): + for i in range(0, n): + self.next() + def peek(self): raise NotImplementedError() @@ -43,8 +45,23 @@ return self.ParserHold(self.index()) def release(self, hold): - self._holds.pop(hold.total_index) + """Release a hold. Generally called when a parser was successful.""" + assert hold.total_index >= 0, 'double release' + assert self._holds[-1] == hold.total_index + self._holds.pop() self._maybe_collect() + hold.total_index = -1 + + def reset(self, hold): + """Release hold and reset index to its position.""" + # Reset is only allowed when this hold is the latest hold or later. + # It is possible that a caller accidentally released a hold that it + # now wants to reset to. + assert hold.total_index >= 0, 'double reset' + assert self._holds[-1] == hold.total_index + self._reset_index(hold.total_index) + self._holds.pop() + hold.total_index = -2 def __iter__(self): return self @@ -64,18 +81,21 @@ def error(self, msg): raise ParseException(msg) - def reset(self): - raise NotImplementedError('use holds!') class ParseFileState(_State): """A lazy parsing state implementation, reading from stream.""" - _fobj = None - _buf = [] # List of characters. - _index = 0 # Index in current _buf _total_offset = 0 # Index of first _buf entry in stream since start + def __repr__(self): + return 'PFS(ix={}, to={}, buf={})'.format(self._index, self._total_offset, self._buf) + def __init__(self, f): + self._stream_finished = False + self._holds = [] + self._buf = [] + self._index = 0 + self._total_offset = 0 if type(f) is str: self._fobj = open(f, 'r') elif isinstance(f, io.IOBase): @@ -86,17 +106,21 @@ def __del__(self): if self._fobj: self._fobj.close() + def _maybe_collect(self): # No holds left, forget everything up to now. if len(self._holds) == 0: self._buf = self._buf[self._index:] - else: # Find oldest hold and update buffer. + else: # Find oldest hold and update buffer to hold everything from the oldest hold onwards. assert sorted(self._holds) == self._holds to_clean = self._holds[0]-self._total_offset - self._buf = self._buf[:to_clean] + self._buf = self._buf[to_clean:] self._total_offset += to_clean self._index -= to_clean - self._holds.pop(0) + + def _reset_index(self, i): + assert i >= self._total_offset and i <= self._total_offset + self._index + self._index = i - self._total_offset def index(self): return self._total_offset + self._index @@ -105,17 +129,28 @@ def fill_buffer(self, min=0): if len(self._buf)-self._index <= min: - self._buf.extend(self._fobj.read(self.PREFILL)) + new = self._fobj.read(self.PREFILL) + self._buf.extend(new) + if len(new) == 0: + self._stream_finished = True def peek(self): self.fill_buffer() + if self.finished(): + return None return self._buf[self._index] def next(self): + if self.finished(): + return None self.fill_buffer() self._index += 1 return self._buf[self._index-1] + def advance(self, n): + self.fill_buffer(self._index + n) + self._index += n + def remaining(self): print('warning: remaining() on ParseFileState is only accurate to up to {} characters lookahead and expensive'.format(self.PREFIL)) self.fill_buffer(self.PREFILL) @@ -125,15 +160,18 @@ print('warning: len() is inaccurate on ParseFileState, returning only past and present state') return self._total_offset + len(self._buf) + def finished(self): + return self._stream_finished and self._index == len(self._buf) + class ParseState(_State): """Encapsulates state as the parser goes through input supplied as string.""" - _input = '' - _index = 0 def __init__(self, s): """Create a ParseState object from str s, representing the input to be parsed.""" + self._holds = [] self._input = s + self._index = 0 def __repr__(self): if self._index < len(self._input): @@ -147,6 +185,9 @@ self._index += 1 return current + def advance(self, n): + self._index += n + def peek(self): return self._input[self._index] @@ -156,7 +197,7 @@ def len(self): return len(self._input) - def reset(self, ix): + def _reset_index(self, ix): self._index = ix def __iter__(self):