changeset 15:f405236a2f6c draft

Implement native parsers for numbers with significant performance gains.
author Lewin Bormann <lbo@spheniscida.de>
date Sun, 19 May 2019 21:04:04 +0200
parents 3a28ea3325e4
children 0d0f03327145
files combinators.py
diffstat 1 files changed, 57 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/combinators.py	Sun May 19 20:30:45 2019 +0200
+++ b/combinators.py	Sun May 19 21:04:04 2019 +0200
@@ -182,6 +182,9 @@
     Result is list of results of the parsers."""
     _strict = False
 
+def Maybe(p):
+    return Repeat(p, 1)
+
 class AtomicSequence(_Sequence):
     """Execute a series of parsers after each other. All must succeed. Result is list of results of the parsers."""
     _atomic = True
@@ -267,10 +270,11 @@
     def parse(self, st):
         initial = st.index()
         s = self._s
-        while len(s) > 0 and not st.finished() and s[0] == st.peek():
+        i = 0
+        while i < len(s) and not st.finished() and s[i] == st.peek():
             st.next()
-            s = s[1:]
-        if len(s) == 0:
+            i += 1
+        if i == len(s):
             return (self._s, st)
         st.reset(initial)
         return (None, st)
@@ -328,11 +332,30 @@
     Result is string."""
     return ConcatenateResults(Repeat(OneOf(s), -1))
 
-def Integer():
+def CanonicalInteger():
     """Return a parser that parses integers and results in an integer. Result is int."""
-    return Last(Whitespace() + (CharSet('0123456789') >> int))
+    return Last(Whitespace() + (ConcatenateResults(Maybe(String('-')) + CharSet('0123456789')) >> int))
+
+class Integer():
+    """Parser for integers of form [-]dddd[...]. Result is int.
+
+    This parser is up to twice as fast as CanonicalInteger and thus implemented
+    manually."""
+    _digits = CharSet('0123456789')
 
-def Float():
+    def parse(self, st):
+        initial = st.index()
+        multiplier = 1
+        minus, st = String('-').parse(st)
+        if minus is not None:
+            multiplier = -1
+        digits, st = self._digits.parse(st)
+        if digits is not None:
+            return int(digits)*multiplier, st
+        st.reset(initial)
+        return None, st
+
+def CanonicalFloat():
     """Return a parser that parses floats and results in floats. Result is float."""
     def c(l):
         """Convert parts of a number into a float."""
@@ -344,9 +367,35 @@
             Repeat(OneOf('.'), 1) + CharSet('0123456789'))
     return (Skip(Whitespace()) + number) >> c
 
+class Float():
+    """Parses a float like [-]ddd[.ddd].
+
+    Float parses floats with more manual code, making it up to 40% faster than
+    CanonicalFloat."""
+    _digits = CharSet('0123456789')
+
+    def parse(self, st):
+        initial = st.index()
+        multiplier = 1
+        minus, st = String('-').parse(st)
+        if minus is not None:
+            multiplier = -1
+        big, st = self._digits.parse(st)
+        if big is None:
+            st.reset(initial)
+            return None, st
+        small = ''
+        dot, st = String('.').parse(st)
+        if dot is not None:
+            small, st = self._digits.parse(st)
+            if small is not None:
+                return float(big + '.' + small) * multiplier, st
+        return float(big) * multiplier, st
+
 def NonEmptyString():
-    """Return a parser that parses a string until the first whitespace, skipping whitespace before. Result is string."""
-    return Last(Skip(Whitespace()) + Regex('\w+'))
+    """Return a parser that parses a string until the first whitespace,
+    skipping whitespace before. Result is string."""
+    return Last(Whitespace() + Regex('\w+'))
 
 def Whitespace():
     """Parse whitespace (space, newline, tab). Result is string."""