changeset 17:720d3dfcaa09 draft

Split into modules for primitives and combinators
author Lewin Bormann <lbo@spheniscida.de>
date Sun, 19 May 2019 21:20:07 +0200
parents 0d0f03327145
children 55a0151ea8bd
files __init__.py combinators.py primitives.py
diffstat 3 files changed, 175 insertions(+), 149 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/__init__.py	Sun May 19 21:20:07 2019 +0200
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun May 19 21:17:49 2019
+
+@author: lbo
+"""
+
+from pcombinators.combinators import *
+from pcombinators.primitives import *
\ No newline at end of file
--- a/combinators.py	Sun May 19 21:15:23 2019 +0200
+++ b/combinators.py	Sun May 19 21:20:07 2019 +0200
@@ -262,152 +262,4 @@
 
 def ConcatenateResults(p):
     """Concatenate string results into a single string. Result is string."""
-    return p >> (lambda l: ''.join(l) if l and len(l) > 0 else None)
-
-# Parsers
-
-class String(Parser):
-    """Consume a fixed string. Result is the string."""
-    _s = ''
-
-    def __init__(self, s):
-        self._s = s
-
-    def parse(self, st):
-        initial = st.index()
-        s = self._s
-        i = 0
-        while i < len(s) and not st.finished() and s[i] == st.peek():
-            st.next()
-            i += 1
-        if i == len(s):
-            return (self._s, st)
-        st.reset(initial)
-        return (None, st)
-
-class OneOf(Parser):
-    """Parse characters in the given set. Result is string or None, if none were parsed."""
-    _set = None
-
-    def __init__(self, s):
-        """
-        Example:
-            CharSet('abcd')
-            CharSet('0123456789')
-        """
-        self._set = set(s)
-
-    def parse(self, st):
-        if not st.finished() and st.peek() in self._set:
-            return st.next(), st
-        else:
-            return None, st
-
-
-class Regex(Parser):
-    """Parse a string using a regular expression. The result is either the
-    string or a tuple with all matched groups. Result is string."""
-    _rx = None
-
-    def __init__(self, rx):
-        if not isinstance(rx, re.Pattern):
-            rx = re.compile(rx)
-        self._rx = rx
-
-    def parse(self, st):
-        start = st.index()
-        match = re.match(self._rx, st.remaining())
-        if match is None:
-            return None, st
-        begin, end = match.span()
-        result = match.group(0)
-        if len(match.groups()) > 1:
-            result = list(match.groups())
-        elif len(match.groups()) > 0:
-            result = match.group(1)
-        st.reset(start+end)
-        return result, st
-
-# Small specific parsers.
-
-def Nothing():
-    """Matches the empty string, and always succeeds."""
-    return String('')
-
-def CharSet(s):
-    """Matches arbitrarily many characters from the set s (which can be a string).
-    Result is string."""
-    return ConcatenateResults(Repeat(OneOf(s), -1))
-
-# See section below for optimized versions of the following parsers.
-
-def CanonicalInteger():
-    """Return a parser that parses integers and results in an integer. Result is int."""
-    return Last(Whitespace() + (ConcatenateResults(Maybe(String('-')) + CharSet('0123456789')) >> int))
-
-def CanonicalFloat():
-    """Return a parser that parses floats and results in floats. Result is float."""
-    def c(l):
-        """Convert parts of a number into a float."""
-        if l and len(l) > 0:
-            return float(''.join(l))
-        return None
-    number = OptimisticSequence(
-            Repeat(OneOf('-'), 1) + CharSet('0123456789'),
-            Repeat(OneOf('.'), 1) + CharSet('0123456789'))
-    return (Skip(Whitespace()) + number) >> c
-
-def NonEmptyString():
-    """Return a parser that parses a string until the first whitespace,
-    skipping whitespace before. Result is string."""
-    return Last(Whitespace() + Regex('\w+'))
-
-def Whitespace():
-    """Parse whitespace (space, newline, tab). Result is string."""
-    return CharSet(' \n\r\t') | Nothing()
-
-# Optimized parsers
-
-class Float():
-    """Parses a float like [-]ddd[.ddd].
-
-    Float parses floats with more manual code, making it up to 40% faster than
-    CanonicalFloat."""
-    _digits = CharSet('0123456789')
-
-    def parse(self, st):
-        initial = st.index()
-        multiplier = 1
-        minus, st = String('-').parse(st)
-        if minus is not None:
-            multiplier = -1
-        big, st = self._digits.parse(st)
-        if big is None:
-            st.reset(initial)
-            return None, st
-        small = ''
-        dot, st = String('.').parse(st)
-        if dot is not None:
-            small, st = self._digits.parse(st)
-            if small is not None:
-                return float(big + '.' + small) * multiplier, st
-        return float(big) * multiplier, st
-
-class Integer():
-    """Parser for integers of form [-]dddd[...]. Result is int.
-
-    This parser is up to twice as fast as CanonicalInteger and thus implemented
-    manually."""
-    _digits = CharSet('0123456789')
-
-    def parse(self, st):
-        initial = st.index()
-        multiplier = 1
-        minus, st = String('-').parse(st)
-        if minus is not None:
-            multiplier = -1
-        digits, st = self._digits.parse(st)
-        if digits is not None:
-            return int(digits)*multiplier, st
-        st.reset(initial)
-        return None, st
\ No newline at end of file
+    return p >> (lambda l: ''.join(l) if l and len(l) > 0 else None)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/primitives.py	Sun May 19 21:20:07 2019 +0200
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sun May 19 21:15:30 2019
+
+@author: lbo
+"""
+
+import re
+
+from pcombinators.combinators import (
+        Parser,
+        ConcatenateResults,
+        OptimisticSequence,
+        Maybe,
+        Last,
+        Repeat,
+        Skip)
+
+# Parsers
+
+class String(Parser):
+    """Consume a fixed string. Result is the string."""
+    _s = ''
+
+    def __init__(self, s):
+        self._s = s
+
+    def parse(self, st):
+        initial = st.index()
+        s = self._s
+        i = 0
+        while i < len(s) and not st.finished() and s[i] == st.peek():
+            st.next()
+            i += 1
+        if i == len(s):
+            return (self._s, st)
+        st.reset(initial)
+        return (None, st)
+
+class OneOf(Parser):
+    """Parse characters in the given set. Result is string or None, if none were parsed."""
+    _set = None
+
+    def __init__(self, s):
+        """
+        Example:
+            CharSet('abcd')
+            CharSet('0123456789')
+        """
+        self._set = set(s)
+
+    def parse(self, st):
+        if not st.finished() and st.peek() in self._set:
+            return st.next(), st
+        else:
+            return None, st
+
+
+class Regex(Parser):
+    """Parse a string using a regular expression. The result is either the
+    string or a tuple with all matched groups. Result is string."""
+    _rx = None
+
+    def __init__(self, rx):
+        if not isinstance(rx, re.Pattern):
+            rx = re.compile(rx)
+        self._rx = rx
+
+    def parse(self, st):
+        start = st.index()
+        match = re.match(self._rx, st.remaining())
+        if match is None:
+            return None, st
+        begin, end = match.span()
+        result = match.group(0)
+        if len(match.groups()) > 1:
+            result = list(match.groups())
+        elif len(match.groups()) > 0:
+            result = match.group(1)
+        st.reset(start+end)
+        return result, st
+
+def Nothing():
+    """Matches the empty string, and always succeeds."""
+    return String('')
+
+def CharSet(s):
+    """Matches arbitrarily many characters from the set s (which can be a string).
+    Result is string."""
+    return ConcatenateResults(Repeat(OneOf(s), -1))
+
+# See section below for optimized versions of the following parsers.
+
+def CanonicalInteger():
+    """Return a parser that parses integers and results in an integer. Result is int."""
+    return Last(Whitespace() + (ConcatenateResults(Maybe(String('-')) + CharSet('0123456789')) >> int))
+
+def CanonicalFloat():
+    """Return a parser that parses floats and results in floats. Result is float."""
+    def c(l):
+        """Convert parts of a number into a float."""
+        if l and len(l) > 0:
+            return float(''.join(l))
+        return None
+    number = OptimisticSequence(
+            Repeat(OneOf('-'), 1) + CharSet('0123456789'),
+            Repeat(OneOf('.'), 1) + CharSet('0123456789'))
+    return (Skip(Whitespace()) + number) >> c
+
+def NonEmptyString():
+    """Return a parser that parses a string until the first whitespace,
+    skipping whitespace before. Result is string."""
+    return Last(Whitespace() + Regex('\w+'))
+
+def Whitespace():
+    """Parse whitespace (space, newline, tab). Result is string."""
+    return CharSet(' \n\r\t') | Nothing()
+
+# Optimized parsers
+
+class Float():
+    """Parses a float like [-]ddd[.ddd].
+
+    Float parses floats with more manual code, making it up to 40% faster than
+    CanonicalFloat."""
+    _digits = CharSet('0123456789')
+
+    def parse(self, st):
+        initial = st.index()
+        multiplier = 1
+        minus, st = String('-').parse(st)
+        if minus is not None:
+            multiplier = -1
+        big, st = self._digits.parse(st)
+        if big is None:
+            st.reset(initial)
+            return None, st
+        small = ''
+        dot, st = String('.').parse(st)
+        if dot is not None:
+            small, st = self._digits.parse(st)
+            if small is not None:
+                return float(big + '.' + small) * multiplier, st
+        return float(big) * multiplier, st
+
+class Integer():
+    """Parser for integers of form [-]dddd[...]. Result is int.
+
+    This parser is up to twice as fast as CanonicalInteger and thus implemented
+    manually."""
+    _digits = CharSet('0123456789')
+
+    def parse(self, st):
+        initial = st.index()
+        multiplier = 1
+        minus, st = String('-').parse(st)
+        if minus is not None:
+            multiplier = -1
+        digits, st = self._digits.parse(st)
+        if digits is not None:
+            return int(digits)*multiplier, st
+        st.reset(initial)
+        return None, st
\ No newline at end of file