Mercurial > lbo > hg > pcombinators
changeset 16:0d0f03327145 draft
Implement native Integer parser (with up to 2x performance)
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Sun, 19 May 2019 21:15:23 +0200 |
parents | f405236a2f6c |
children | 720d3dfcaa09 |
files | combinators.py |
diffstat | 1 files changed, 48 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/combinators.py Sun May 19 21:04:04 2019 +0200 +++ b/combinators.py Sun May 19 21:15:23 2019 +0200 @@ -77,11 +77,11 @@ return AtomicSequence(self, other) def __mul__(self, times): - """Repeat a parser, exactly `times`.""" + """Repeat a parser exactly `times`.""" return StrictRepeat(self, times) def __rmul__(self, times): - """Repeat a parser, exactly `times`.""" + """Repeat a parser exactly `times`.""" return self.__mul__(times) def __or__(self, other): @@ -102,6 +102,11 @@ """ return _Transform(self, fn) + def then(self, next): + """Consume part of the input, discarding it, and return the result + parsed by the supplied next parser.""" + return Last(AtomicSequence(self, next)) + # Combinators class _Transform(Parser): @@ -147,6 +152,16 @@ st = st2 return results, st2 + +class AtomicSequence(_Sequence): + """Execute a series of parsers after each other. All must succeed. Result is list of results of the parsers.""" + _atomic = True + +class OptimisticSequence(_Sequence): + """Execute a series of parsers after each other, as far as possible + (until the first parser fails). Result is list of results of the parsers.""" + _atomic = False + class _Repeat(Parser): _parser = None _times = 0 @@ -185,15 +200,6 @@ def Maybe(p): return Repeat(p, 1) -class AtomicSequence(_Sequence): - """Execute a series of parsers after each other. All must succeed. Result is list of results of the parsers.""" - _atomic = True - -class OptimisticSequence(_Sequence): - """Execute a series of parsers after each other, as far as possible - (until the first parser fails). Result is list of results of the parsers.""" - _atomic = False - class _Alternative(Parser): """Attempt a series of parsers and return the result of the first one matching.""" _parsers = [] @@ -325,6 +331,7 @@ # Small specific parsers. def Nothing(): + """Matches the empty string, and always succeeds.""" return String('') def CharSet(s): @@ -332,29 +339,12 @@ Result is string.""" return ConcatenateResults(Repeat(OneOf(s), -1)) +# See section below for optimized versions of the following parsers. + def CanonicalInteger(): """Return a parser that parses integers and results in an integer. Result is int.""" return Last(Whitespace() + (ConcatenateResults(Maybe(String('-')) + CharSet('0123456789')) >> int)) -class Integer(): - """Parser for integers of form [-]dddd[...]. Result is int. - - This parser is up to twice as fast as CanonicalInteger and thus implemented - manually.""" - _digits = CharSet('0123456789') - - def parse(self, st): - initial = st.index() - multiplier = 1 - minus, st = String('-').parse(st) - if minus is not None: - multiplier = -1 - digits, st = self._digits.parse(st) - if digits is not None: - return int(digits)*multiplier, st - st.reset(initial) - return None, st - def CanonicalFloat(): """Return a parser that parses floats and results in floats. Result is float.""" def c(l): @@ -367,6 +357,17 @@ Repeat(OneOf('.'), 1) + CharSet('0123456789')) return (Skip(Whitespace()) + number) >> c +def NonEmptyString(): + """Return a parser that parses a string until the first whitespace, + skipping whitespace before. Result is string.""" + return Last(Whitespace() + Regex('\w+')) + +def Whitespace(): + """Parse whitespace (space, newline, tab). Result is string.""" + return CharSet(' \n\r\t') | Nothing() + +# Optimized parsers + class Float(): """Parses a float like [-]ddd[.ddd]. @@ -392,11 +393,21 @@ return float(big + '.' + small) * multiplier, st return float(big) * multiplier, st -def NonEmptyString(): - """Return a parser that parses a string until the first whitespace, - skipping whitespace before. Result is string.""" - return Last(Whitespace() + Regex('\w+')) +class Integer(): + """Parser for integers of form [-]dddd[...]. Result is int. + + This parser is up to twice as fast as CanonicalInteger and thus implemented + manually.""" + _digits = CharSet('0123456789') -def Whitespace(): - """Parse whitespace (space, newline, tab). Result is string.""" - return CharSet(' \n\r\t') | Nothing() \ No newline at end of file + def parse(self, st): + initial = st.index() + multiplier = 1 + minus, st = String('-').parse(st) + if minus is not None: + multiplier = -1 + digits, st = self._digits.parse(st) + if digits is not None: + return int(digits)*multiplier, st + st.reset(initial) + return None, st \ No newline at end of file