Mercurial > lbo > hg > pcombinators
changeset 70:3d93b1ac9c29 draft
Move tests into subdirectory
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Sat, 25 May 2019 22:12:55 +0200 |
parents | f6c53dea71f2 |
children | ae904cd0859e |
files | README.md pcombinators/arith_test.py pcombinators/csv_test.py pcombinators/json_test.py pcombinators/tests/arith.py pcombinators/tests/csv.py pcombinators/tests/json.py |
diffstat | 7 files changed, 179 insertions(+), 176 deletions(-) [+] |
line wrap: on
line diff
--- a/README.md Sat May 25 14:50:26 2019 +0200 +++ b/README.md Sat May 25 22:12:55 2019 +0200 @@ -4,12 +4,15 @@ always been fascinated by them, so I wanted to try if I can implement them :-) There are examples in the form of -* a JSON parser in `pcombinators/json_test.py` and +* a JSON parser in `pcombinators/tests/json.py` and * test it with `parse_json('{"ob": "ject"}')` or `Value().parse(ParseFileState('test.json'))`. * It does not accept whitespace except in strings. `parse_json()` takes care of this in a simple way, but keep this in mind when trying to parse your own input. -* a parser for arithmetic expressions in `pcombinators/arith_test.py`. +* a parser for arithmetic expressions in `pcombinators/tests/arith.py` +* a parser for CSV files in `pcombinators/tests/csv.py` + +TODO: Formal documentation (although every parser is already documented now) More simple examples:
--- a/pcombinators/arith_test.py Sat May 25 14:50:26 2019 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Let's test the combinators in a real world application! - -@author: lbo -""" - -from pcombinators.state import ParseState -from pcombinators.combinators import * -from pcombinators.primitives import * - -def Operator(set): - """An operator or parenthesis.""" - return OneOf(set) - -def Parens(): - """Parentheses contain a term.""" - return (Operator('(') + Term() + Operator(')')) >> (lambda l: l[1]) - -def Variable(): - """A variable consists of several letters.""" - return Regex('[a-zA-Z]+[0-9]*') - -def Atom(): - """An atom is a variable or a float or a parentheses term.""" - return (Variable() | Parens() | Float()) - -def Product(): - return OptimisticSequence(Power(), Operator('*/') + Lazy(Product)) >> operator_result_to_tuple - -class Power(Parser): - ops = Operator('^') - - def __init__(self): - self.p = OptimisticSequence(Lazy(Atom), self.ops + self) >> operator_result_to_tuple - - def parse(self, st): - return self.p.parse(st) - -class Term(Parser): - ops = Operator('+-') - - def __init__(self): - self.p = OptimisticSequence(Product(), self.ops + self) >> operator_result_to_tuple - - def parse(self, st): - # Try to parse a product, then a sum operator, then another term. - # OptimisticSequence will just return a product if there is no sum operator. - return self.p.parse(st) - - -def operator_result_to_tuple(l): - if len(l) == 1: - return l[0] - elif len(l) == 2 and len(l[1]) == 2: - return (l[0], l[1][0], l[1][1]) - else: - # Parse failed if not either 1 or 3. - raise Exception("Parse failed: Missing operand") - -def pretty_print(tpl): - # tpl is a (left, op, right) tuple or a scalar. - if not isinstance(tpl, tuple): - return str(tpl) - assert len(tpl) == 3 - return '({} {} {})'.format(pretty_print(tpl[0]), tpl[1], pretty_print(tpl[2])) - -def parse_and_print(expr): - """Parse an expression string and return a string of the parsing result.""" - parsed, st = Term().parse(ParseState(expr.replace(' ', ''))) - if parsed is None: - print('Parse error :(', st) - return - return pretty_print(parsed) \ No newline at end of file
--- a/pcombinators/csv_test.py Sat May 25 14:50:26 2019 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri May 24 23:46:48 2019 - -@author: lbo -""" - -from pcombinators import * - -separator = Whitespace() + OneOf(",") + Whitespace() -string = Skip(String('"')) + NoneInSet('"') + Skip(String('"')) -integer = Last(Integer() + Skip((Peek(NoneInSet('.')) | EndOfInput()))) -value = integer | Float() | Last(string) -line = Flatten(Repeat(OptimisticSequence(value, Skip(separator)), -1)).then_skip((String('\n') | EndOfInput())) - -file = Repeat(line, -1) \ No newline at end of file
--- a/pcombinators/json_test.py Sat May 25 14:50:26 2019 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Example on how to write a JSON parser. - -@author: lbo -""" - -from pcombinators.combinators import * -from pcombinators.primitives import * - -JString = Last(Skip(String('"')) + NoneInSet('"') + Skip(String('"'))) - -example_json = '{"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300,"retail":20}}' - -class Value(Parser): - """Bare-bones, but fully functioning, JSON parser. Doesn't like escaped quotes. - - Example: - >>> Value().parse(ParseState(my_json_string)) - ({'id': 1.0, - 'name': 'Foo', - 'price': 123.0, - 'tags': ['Bar', 'Eek'], - 'stock': {'warehouse': 300.0, 'retail': 20.0}}, - ParseState({"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300,"retail":20}}<>)) - """ - def parse(self, st): - return (Dict | List | JString | Float()).parse(st) - -# We moved out all the piece parsers out of functions to reduce allocation overhead. -# It improves performance by roughly 2x. - -# LISTS - -def concat_elems_elem(l): - if len(l) == 1: - return l - elif len(l) == 2 and type(l[0]) is list: - l[0].append(l[1]) - return l[0] - assert False, "Unexpected list format: {}".format(l) - -# An entry is any value. -entry = Last(Value() + Skip(String(',') | Nothing())) -# A list is a [, followed by mid entries, followed by a final entry, and a -# closing ]. The list is wrapped in a list to prevent merging in other parsers. -# Flatten() takes care that the list from Repeat() and the single entry are made -# into one list. -List = Last(Skip(String('[')) + - Repeat(entry, -1) + - Skip(String(']'))) - -# DICTS - -# A separator is whitespace, a colon, and more whitespace (Whitespace() also accepts empty string) -separator = Skip(String(":")) -# Entry is a String followed by a separator and a value. Wrap the value in a list to prevent merging. -# The two-element list is converted to a tuple. -entry = JString + separator + (Value()) >> (lambda l: tuple(l)) -# A mid entry is followed by a comma. -midentry = Last(entry + Skip(String(',') | Nothing())) -# A dict is a {, followed by entries, followed by a final entry, followed by a closing } -dct = Flatten( - Skip(String("{")) + Repeat(midentry, -1) + Skip(String("}"))) -# Convert the list of tuples into a dict. -Dict = dct >> dict - -def remove_unused_whitespace(s): - acc = [] - lvl = 0 - ws = set(' \n\t\r') - for c in s: - if c == '"': - lvl += 1 if lvl == 0 else -1 - if lvl == 0 and c in ws: - continue - acc.append(c) - return ''.join(acc) - -def parse_json(json): - return Value().parse(ParseState(remove_unused_whitespace(json))) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pcombinators/tests/arith.py Sat May 25 22:12:55 2019 +0200 @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Let's test the combinators in a real world application! + +@author: lbo +""" + +from pcombinators.state import ParseState +from pcombinators.combinators import * +from pcombinators.primitives import * + +def Operator(set): + """An operator or parenthesis.""" + return OneOf(set) + +def Parens(): + """Parentheses contain a term.""" + return (Operator('(') + Term() + Operator(')')) >> (lambda l: l[1]) + +def Variable(): + """A variable consists of several letters.""" + return Regex('[a-zA-Z]+[0-9]*') + +def Atom(): + """An atom is a variable or a float or a parentheses term.""" + return (Variable() | Parens() | Float()) + +def Product(): + return OptimisticSequence(Power(), Operator('*/') + Lazy(Product)) >> operator_result_to_tuple + +class Power(Parser): + ops = Operator('^') + + def __init__(self): + self.p = OptimisticSequence(Lazy(Atom), self.ops + self) >> operator_result_to_tuple + + def parse(self, st): + return self.p.parse(st) + +class Term(Parser): + ops = Operator('+-') + + def __init__(self): + self.p = OptimisticSequence(Product(), self.ops + self) >> operator_result_to_tuple + + def parse(self, st): + # Try to parse a product, then a sum operator, then another term. + # OptimisticSequence will just return a product if there is no sum operator. + return self.p.parse(st) + + +def operator_result_to_tuple(l): + if len(l) == 1: + return l[0] + elif len(l) == 2 and len(l[1]) == 2: + return (l[0], l[1][0], l[1][1]) + else: + # Parse failed if not either 1 or 3. + raise Exception("Parse failed: Missing operand") + +def pretty_print(tpl): + # tpl is a (left, op, right) tuple or a scalar. + if not isinstance(tpl, tuple): + return str(tpl) + assert len(tpl) == 3 + return '({} {} {})'.format(pretty_print(tpl[0]), tpl[1], pretty_print(tpl[2])) + +def parse_and_print(expr): + """Parse an expression string and return a string of the parsing result.""" + parsed, st = Term().parse(ParseState(expr.replace(' ', ''))) + if parsed is None: + print('Parse error :(', st) + return + return pretty_print(parsed) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pcombinators/tests/csv.py Sat May 25 22:12:55 2019 +0200 @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri May 24 23:46:48 2019 + +@author: lbo +""" + +from pcombinators import * + +separator = Whitespace() + OneOf(",") + Whitespace() +string = Skip(String('"')) + NoneInSet('"') + Skip(String('"')) +integer = Last(Integer() + Skip((Peek(NoneInSet('.')) | EndOfInput()))) +value = integer | Float() | Last(string) +line = Flatten(Repeat(OptimisticSequence(value, Skip(separator)), -1)).then_skip((String('\n') | EndOfInput())) + +file = Repeat(line, -1) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pcombinators/tests/json.py Sat May 25 22:12:55 2019 +0200 @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Example on how to write a JSON parser. + +@author: lbo +""" + +from pcombinators.combinators import * +from pcombinators.primitives import * + +JString = Last(Skip(String('"')) + NoneInSet('"') + Skip(String('"'))) + +example_json = '{"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300,"retail":20}}' + +class Value(Parser): + """Bare-bones, but fully functioning, JSON parser. Doesn't like escaped quotes. + + Example: + >>> Value().parse(ParseState(my_json_string)) + ({'id': 1.0, + 'name': 'Foo', + 'price': 123.0, + 'tags': ['Bar', 'Eek'], + 'stock': {'warehouse': 300.0, 'retail': 20.0}}, + ParseState({"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300,"retail":20}}<>)) + """ + def parse(self, st): + return (Dict | List | JString | Float()).parse(st) + +# We moved out all the piece parsers out of functions to reduce allocation overhead. +# It improves performance by roughly 2x. + +# LISTS + +def concat_elems_elem(l): + if len(l) == 1: + return l + elif len(l) == 2 and type(l[0]) is list: + l[0].append(l[1]) + return l[0] + assert False, "Unexpected list format: {}".format(l) + +# An entry is any value. +entry = Last(Value() + Skip(String(',') | Nothing())) +# A list is a [, followed by mid entries, followed by a final entry, and a +# closing ]. The list is wrapped in a list to prevent merging in other parsers. +# Flatten() takes care that the list from Repeat() and the single entry are made +# into one list. +List = Last(Skip(String('[')) + + Repeat(entry, -1) + + Skip(String(']'))) + +# DICTS + +# A separator is whitespace, a colon, and more whitespace (Whitespace() also accepts empty string) +separator = Skip(String(":")) +# Entry is a String followed by a separator and a value. Wrap the value in a list to prevent merging. +# The two-element list is converted to a tuple. +entry = JString + separator + (Value()) >> (lambda l: tuple(l)) +# A mid entry is followed by a comma. +midentry = Last(entry + Skip(String(',') | Nothing())) +# A dict is a {, followed by entries, followed by a final entry, followed by a closing } +dct = Flatten( + Skip(String("{")) + Repeat(midentry, -1) + Skip(String("}"))) +# Convert the list of tuples into a dict. +Dict = dct >> dict + +def remove_unused_whitespace(s): + acc = [] + lvl = 0 + ws = set(' \n\t\r') + for c in s: + if c == '"': + lvl += 1 if lvl == 0 else -1 + if lvl == 0 and c in ws: + continue + acc.append(c) + return ''.join(acc) + +def parse_json(json): + return Value().parse(ParseState(remove_unused_whitespace(json))) \ No newline at end of file