changeset 16:0d0f03327145 draft

Implement native Integer parser (with up to 2x performance)
author Lewin Bormann <lbo@spheniscida.de>
date Sun, 19 May 2019 21:15:23 +0200
parents f405236a2f6c
children 720d3dfcaa09
files combinators.py
diffstat 1 files changed, 48 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/combinators.py	Sun May 19 21:04:04 2019 +0200
+++ b/combinators.py	Sun May 19 21:15:23 2019 +0200
@@ -77,11 +77,11 @@
         return AtomicSequence(self, other)
 
     def __mul__(self, times):
-        """Repeat a parser, exactly `times`."""
+        """Repeat a parser exactly `times`."""
         return StrictRepeat(self, times)
 
     def __rmul__(self, times):
-        """Repeat a parser, exactly `times`."""
+        """Repeat a parser exactly `times`."""
         return self.__mul__(times)
 
     def __or__(self, other):
@@ -102,6 +102,11 @@
         """
         return _Transform(self, fn)
 
+    def then(self, next):
+        """Consume part of the input, discarding it, and return the result
+        parsed by the supplied next parser."""
+        return Last(AtomicSequence(self, next))
+
 # Combinators
 
 class _Transform(Parser):
@@ -147,6 +152,16 @@
             st = st2
         return results, st2
 
+
+class AtomicSequence(_Sequence):
+    """Execute a series of parsers after each other. All must succeed. Result is list of results of the parsers."""
+    _atomic = True
+
+class OptimisticSequence(_Sequence):
+    """Execute a series of parsers after each other, as far as possible
+    (until the first parser fails). Result is list of results of the parsers."""
+    _atomic = False
+
 class _Repeat(Parser):
     _parser = None
     _times = 0
@@ -185,15 +200,6 @@
 def Maybe(p):
     return Repeat(p, 1)
 
-class AtomicSequence(_Sequence):
-    """Execute a series of parsers after each other. All must succeed. Result is list of results of the parsers."""
-    _atomic = True
-
-class OptimisticSequence(_Sequence):
-    """Execute a series of parsers after each other, as far as possible
-    (until the first parser fails). Result is list of results of the parsers."""
-    _atomic = False
-
 class _Alternative(Parser):
     """Attempt a series of parsers and return the result of the first one matching."""
     _parsers = []
@@ -325,6 +331,7 @@
 # Small specific parsers.
 
 def Nothing():
+    """Matches the empty string, and always succeeds."""
     return String('')
 
 def CharSet(s):
@@ -332,29 +339,12 @@
     Result is string."""
     return ConcatenateResults(Repeat(OneOf(s), -1))
 
+# See section below for optimized versions of the following parsers.
+
 def CanonicalInteger():
     """Return a parser that parses integers and results in an integer. Result is int."""
     return Last(Whitespace() + (ConcatenateResults(Maybe(String('-')) + CharSet('0123456789')) >> int))
 
-class Integer():
-    """Parser for integers of form [-]dddd[...]. Result is int.
-
-    This parser is up to twice as fast as CanonicalInteger and thus implemented
-    manually."""
-    _digits = CharSet('0123456789')
-
-    def parse(self, st):
-        initial = st.index()
-        multiplier = 1
-        minus, st = String('-').parse(st)
-        if minus is not None:
-            multiplier = -1
-        digits, st = self._digits.parse(st)
-        if digits is not None:
-            return int(digits)*multiplier, st
-        st.reset(initial)
-        return None, st
-
 def CanonicalFloat():
     """Return a parser that parses floats and results in floats. Result is float."""
     def c(l):
@@ -367,6 +357,17 @@
             Repeat(OneOf('.'), 1) + CharSet('0123456789'))
     return (Skip(Whitespace()) + number) >> c
 
+def NonEmptyString():
+    """Return a parser that parses a string until the first whitespace,
+    skipping whitespace before. Result is string."""
+    return Last(Whitespace() + Regex('\w+'))
+
+def Whitespace():
+    """Parse whitespace (space, newline, tab). Result is string."""
+    return CharSet(' \n\r\t') | Nothing()
+
+# Optimized parsers
+
 class Float():
     """Parses a float like [-]ddd[.ddd].
 
@@ -392,11 +393,21 @@
                 return float(big + '.' + small) * multiplier, st
         return float(big) * multiplier, st
 
-def NonEmptyString():
-    """Return a parser that parses a string until the first whitespace,
-    skipping whitespace before. Result is string."""
-    return Last(Whitespace() + Regex('\w+'))
+class Integer():
+    """Parser for integers of form [-]dddd[...]. Result is int.
+
+    This parser is up to twice as fast as CanonicalInteger and thus implemented
+    manually."""
+    _digits = CharSet('0123456789')
 
-def Whitespace():
-    """Parse whitespace (space, newline, tab). Result is string."""
-    return CharSet(' \n\r\t') | Nothing()
\ No newline at end of file
+    def parse(self, st):
+        initial = st.index()
+        multiplier = 1
+        minus, st = String('-').parse(st)
+        if minus is not None:
+            multiplier = -1
+        digits, st = self._digits.parse(st)
+        if digits is not None:
+            return int(digits)*multiplier, st
+        st.reset(initial)
+        return None, st
\ No newline at end of file