Mercurial > lbo > hg > pcombinators

--- a/README.md	Sat May 25 14:50:26 2019 +0200
+++ b/README.md	Sat May 25 22:12:55 2019 +0200
@@ -4,12 +4,15 @@
 always been fascinated by them, so I wanted to try if I can implement them :-)

 There are examples in the form of
-* a JSON parser in `pcombinators/json_test.py` and
+* a JSON parser in `pcombinators/tests/json.py` and
  * test it with `parse_json('{"ob": "ject"}')` or `Value().parse(ParseFileState('test.json'))`.
  * It does not accept whitespace except in strings. `parse_json()` takes care
  of this in a simple way, but keep this in mind when trying to parse your own
  input.
-* a parser for arithmetic expressions in `pcombinators/arith_test.py`.
+* a parser for arithmetic expressions in `pcombinators/tests/arith.py`
+* a parser for CSV files in `pcombinators/tests/csv.py`
+
+TODO: Formal documentation (although every parser is already documented now)

 More simple examples:
--- a/pcombinators/arith_test.py	Sat May 25 14:50:26 2019 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Let's test the combinators in a real world application!
-
-@author: lbo
-"""
-
-from pcombinators.state import ParseState
-from pcombinators.combinators import *
-from pcombinators.primitives import *
-
-def Operator(set):
-    """An operator or parenthesis."""
-    return OneOf(set)
-
-def Parens():
-    """Parentheses contain a term."""
-    return (Operator('(') + Term() + Operator(')')) >> (lambda l: l[1])
-
-def Variable():
-    """A variable consists of several letters."""
-    return Regex('[a-zA-Z]+[0-9]*')
-
-def Atom():
-    """An atom is a variable or a float or a parentheses term."""
-    return (Variable() | Parens() | Float())
-
-def Product():
-    return OptimisticSequence(Power(), Operator('*/') + Lazy(Product)) >> operator_result_to_tuple
-
-class Power(Parser):
-    ops = Operator('^')
-
-    def __init__(self):
-        self.p = OptimisticSequence(Lazy(Atom), self.ops + self) >> operator_result_to_tuple
-
-    def parse(self, st):
-        return self.p.parse(st)
-
-class Term(Parser):
-    ops = Operator('+-')
-
-    def __init__(self):
-        self.p = OptimisticSequence(Product(), self.ops + self) >> operator_result_to_tuple
-
-    def parse(self, st):
-        # Try to parse a product, then a sum operator, then another term.
-        # OptimisticSequence will just return a product if there is no sum operator.
-        return self.p.parse(st)
-
-
-def operator_result_to_tuple(l):
-    if len(l) == 1:
-        return l[0]
-    elif len(l) == 2 and len(l[1]) == 2:
-        return (l[0], l[1][0], l[1][1])
-    else:
-        # Parse failed if not either 1 or 3.
-        raise Exception("Parse failed: Missing operand")
-
-def pretty_print(tpl):
-    # tpl is a (left, op, right) tuple or a scalar.
-    if not isinstance(tpl, tuple):
-        return str(tpl)
-    assert len(tpl) == 3
-    return '({} {} {})'.format(pretty_print(tpl[0]), tpl[1], pretty_print(tpl[2]))
-
-def parse_and_print(expr):
-    """Parse an expression string and return a string of the parsing result."""
-    parsed, st = Term().parse(ParseState(expr.replace(' ', '')))
-    if parsed is None:
-        print('Parse error :(', st)
-        return
-    return pretty_print(parsed)
\ No newline at end of file
--- a/pcombinators/csv_test.py	Sat May 25 14:50:26 2019 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri May 24 23:46:48 2019
-
-@author: lbo
-"""
-
-from pcombinators import *
-
-separator = Whitespace() + OneOf(",") + Whitespace()
-string = Skip(String('"')) + NoneInSet('"') + Skip(String('"'))
-integer = Last(Integer() + Skip((Peek(NoneInSet('.')) | EndOfInput())))
-value = integer | Float() | Last(string)
-line = Flatten(Repeat(OptimisticSequence(value, Skip(separator)), -1)).then_skip((String('\n') | EndOfInput()))
-
-file = Repeat(line, -1)
\ No newline at end of file
--- a/pcombinators/json_test.py	Sat May 25 14:50:26 2019 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Example on how to write a JSON parser.
-
-@author: lbo
-"""
-
-from pcombinators.combinators import *
-from pcombinators.primitives import *
-
-JString = Last(Skip(String('"')) + NoneInSet('"') + Skip(String('"')))
-
-example_json = '{"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300,"retail":20}}'
-
-class Value(Parser):
-    """Bare-bones, but fully functioning, JSON parser. Doesn't like escaped quotes.
-
-    Example:
-        >>> Value().parse(ParseState(my_json_string))
-        ({'id': 1.0,
-          'name': 'Foo',
-          'price': 123.0,
-          'tags': ['Bar', 'Eek'],
-          'stock': {'warehouse': 300.0, 'retail': 20.0}},
-         ParseState({"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300,"retail":20}}<>))
-    """
-    def parse(self, st):
-        return (Dict | List | JString | Float()).parse(st)
-
-# We moved out all the piece parsers out of functions to reduce allocation overhead.
-# It improves performance by roughly 2x.
-
-# LISTS
-
-def concat_elems_elem(l):
-    if len(l) == 1:
-        return l
-    elif len(l) == 2 and type(l[0]) is list:
-        l[0].append(l[1])
-        return l[0]
-    assert False, "Unexpected list format: {}".format(l)
-
-# An entry is any value.
-entry = Last(Value() + Skip(String(',') | Nothing()))
-# A list is a [, followed by mid entries, followed by a final entry, and a
-# closing ]. The list is wrapped in a list to prevent merging in other parsers.
-# Flatten() takes care that the list from Repeat() and the single entry are made
-# into one list.
-List = Last(Skip(String('[')) +
-        Repeat(entry, -1) +
-        Skip(String(']')))
-
-# DICTS
-
-# A separator is whitespace, a colon, and more whitespace (Whitespace() also accepts empty string)
-separator = Skip(String(":"))
-# Entry is a String followed by a separator and a value. Wrap the value in a list to prevent merging.
-# The two-element list is converted to a tuple.
-entry = JString + separator + (Value()) >> (lambda l: tuple(l))
-# A mid entry is followed by a comma.
-midentry = Last(entry + Skip(String(',') | Nothing()))
-# A dict is a {, followed by entries, followed by a final entry, followed by a closing }
-dct = Flatten(
-        Skip(String("{")) + Repeat(midentry, -1) + Skip(String("}")))
-# Convert the list of tuples into a dict.
-Dict = dct >> dict
-
-def remove_unused_whitespace(s):
-    acc = []
-    lvl = 0
-    ws = set(' \n\t\r')
-    for c in s:
-        if c == '"':
-            lvl += 1 if lvl == 0 else -1
-        if lvl == 0 and c in ws:
-            continue
-        acc.append(c)
-    return ''.join(acc)
-
-def parse_json(json):
-    return Value().parse(ParseState(remove_unused_whitespace(json)))
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pcombinators/tests/arith.py	Sat May 25 22:12:55 2019 +0200
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Let's test the combinators in a real world application!
+
+@author: lbo
+"""
+
+from pcombinators.state import ParseState
+from pcombinators.combinators import *
+from pcombinators.primitives import *
+
+def Operator(set):
+    """An operator or parenthesis."""
+    return OneOf(set)
+
+def Parens():
+    """Parentheses contain a term."""
+    return (Operator('(') + Term() + Operator(')')) >> (lambda l: l[1])
+
+def Variable():
+    """A variable consists of several letters."""
+    return Regex('[a-zA-Z]+[0-9]*')
+
+def Atom():
+    """An atom is a variable or a float or a parentheses term."""
+    return (Variable() | Parens() | Float())
+
+def Product():
+    return OptimisticSequence(Power(), Operator('*/') + Lazy(Product)) >> operator_result_to_tuple
+
+class Power(Parser):
+    ops = Operator('^')
+
+    def __init__(self):
+        self.p = OptimisticSequence(Lazy(Atom), self.ops + self) >> operator_result_to_tuple
+
+    def parse(self, st):
+        return self.p.parse(st)
+
+class Term(Parser):
+    ops = Operator('+-')
+
+    def __init__(self):
+        self.p = OptimisticSequence(Product(), self.ops + self) >> operator_result_to_tuple
+
+    def parse(self, st):
+        # Try to parse a product, then a sum operator, then another term.
+        # OptimisticSequence will just return a product if there is no sum operator.
+        return self.p.parse(st)
+
+
+def operator_result_to_tuple(l):
+    if len(l) == 1:
+        return l[0]
+    elif len(l) == 2 and len(l[1]) == 2:
+        return (l[0], l[1][0], l[1][1])
+    else:
+        # Parse failed if not either 1 or 3.
+        raise Exception("Parse failed: Missing operand")
+
+def pretty_print(tpl):
+    # tpl is a (left, op, right) tuple or a scalar.
+    if not isinstance(tpl, tuple):
+        return str(tpl)
+    assert len(tpl) == 3
+    return '({} {} {})'.format(pretty_print(tpl[0]), tpl[1], pretty_print(tpl[2]))
+
+def parse_and_print(expr):
+    """Parse an expression string and return a string of the parsing result."""
+    parsed, st = Term().parse(ParseState(expr.replace(' ', '')))
+    if parsed is None:
+        print('Parse error :(', st)
+        return
+    return pretty_print(parsed)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pcombinators/tests/csv.py	Sat May 25 22:12:55 2019 +0200
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri May 24 23:46:48 2019
+
+@author: lbo
+"""
+
+from pcombinators import *
+
+separator = Whitespace() + OneOf(",") + Whitespace()
+string = Skip(String('"')) + NoneInSet('"') + Skip(String('"'))
+integer = Last(Integer() + Skip((Peek(NoneInSet('.')) | EndOfInput())))
+value = integer | Float() | Last(string)
+line = Flatten(Repeat(OptimisticSequence(value, Skip(separator)), -1)).then_skip((String('\n') | EndOfInput()))
+
+file = Repeat(line, -1)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pcombinators/tests/json.py	Sat May 25 22:12:55 2019 +0200
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Example on how to write a JSON parser.
+
+@author: lbo
+"""
+
+from pcombinators.combinators import *
+from pcombinators.primitives import *
+
+JString = Last(Skip(String('"')) + NoneInSet('"') + Skip(String('"')))
+
+example_json = '{"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300,"retail":20}}'
+
+class Value(Parser):
+    """Bare-bones, but fully functioning, JSON parser. Doesn't like escaped quotes.
+
+    Example:
+        >>> Value().parse(ParseState(my_json_string))
+        ({'id': 1.0,
+          'name': 'Foo',
+          'price': 123.0,
+          'tags': ['Bar', 'Eek'],
+          'stock': {'warehouse': 300.0, 'retail': 20.0}},
+         ParseState({"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300,"retail":20}}<>))
+    """
+    def parse(self, st):
+        return (Dict | List | JString | Float()).parse(st)
+
+# We moved out all the piece parsers out of functions to reduce allocation overhead.
+# It improves performance by roughly 2x.
+
+# LISTS
+
+def concat_elems_elem(l):
+    if len(l) == 1:
+        return l
+    elif len(l) == 2 and type(l[0]) is list:
+        l[0].append(l[1])
+        return l[0]
+    assert False, "Unexpected list format: {}".format(l)
+
+# An entry is any value.
+entry = Last(Value() + Skip(String(',') | Nothing()))
+# A list is a [, followed by mid entries, followed by a final entry, and a
+# closing ]. The list is wrapped in a list to prevent merging in other parsers.
+# Flatten() takes care that the list from Repeat() and the single entry are made
+# into one list.
+List = Last(Skip(String('[')) +
+        Repeat(entry, -1) +
+        Skip(String(']')))
+
+# DICTS
+
+# A separator is whitespace, a colon, and more whitespace (Whitespace() also accepts empty string)
+separator = Skip(String(":"))
+# Entry is a String followed by a separator and a value. Wrap the value in a list to prevent merging.
+# The two-element list is converted to a tuple.
+entry = JString + separator + (Value()) >> (lambda l: tuple(l))
+# A mid entry is followed by a comma.
+midentry = Last(entry + Skip(String(',') | Nothing()))
+# A dict is a {, followed by entries, followed by a final entry, followed by a closing }
+dct = Flatten(
+        Skip(String("{")) + Repeat(midentry, -1) + Skip(String("}")))
+# Convert the list of tuples into a dict.
+Dict = dct >> dict
+
+def remove_unused_whitespace(s):
+    acc = []
+    lvl = 0
+    ws = set(' \n\t\r')
+    for c in s:
+        if c == '"':
+            lvl += 1 if lvl == 0 else -1
+        if lvl == 0 and c in ws:
+            continue
+        acc.append(c)
+    return ''.join(acc)
+
+def parse_json(json):
+    return Value().parse(ParseState(remove_unused_whitespace(json)))
\ No newline at end of file