Mercurial > lbo > hg > pcombinators
changeset 49:9f7c57e5ef26 draft
json_test: Improve performance by 2x by removing Whitespace parsers
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Thu, 23 May 2019 22:37:38 +0200 |
parents | 26fd19b2573d |
children | 4a86c41e42f1 |
files | pcombinators/json_test.py pcombinators/primitives.py |
diffstat | 2 files changed, 11 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/pcombinators/json_test.py Thu May 23 22:28:32 2019 +0200 +++ b/pcombinators/json_test.py Thu May 23 22:37:38 2019 +0200 @@ -11,8 +11,6 @@ JString = Last(Skip(String('"')) + NoneInSet('"') + Skip(String('"'))) -wrapl = lambda l: [l] - example_json = '{"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300, "retail":20}}' class Value(Parser): @@ -28,7 +26,7 @@ ParseState({"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300, "retail":20}}<>)) """ def parse(self, st): - return Last(Skip(Whitespace()) + (Dict | List | JString | Float()) + Skip(Whitespace())).parse(st) + return (Dict | List | JString | Float()).parse(st) # We moved out all the piece parsers out of functions to reduce allocation overhead. # It improves performance by roughly 2x. @@ -59,14 +57,17 @@ # DICTS # A separator is whitespace, a colon, and more whitespace (Whitespace() also accepts empty string) -separator = Skip((Whitespace() + String(":") + Whitespace())) +separator = Skip(String(":")) # Entry is a String followed by a separator and a value. Wrap the value in a list to prevent merging. # The two-element list is converted to a tuple. entry = JString + separator + (Value()) >> (lambda l: tuple(l)) # A mid entry is followed by a comma. -midentry = Last(entry + Skip(String(',') + Skip(Whitespace()))) +midentry = Last(entry + Skip(String(','))) # A dict is a {, followed by entries, followed by a final entry, followed by a closing } dct = Flatten( Skip(String("{")) + ((Repeat(midentry, -1) + entry)) + Skip(String("}"))) # Convert the list of tuples into a dict. Dict = dct >> dict + +def parse_json(json): + return Dict.parse(ParseState(json.replace(' ', ''))) \ No newline at end of file
--- a/pcombinators/primitives.py Thu May 23 22:28:32 2019 +0200 +++ b/pcombinators/primitives.py Thu May 23 22:37:38 2019 +0200 @@ -122,7 +122,11 @@ return Last(Whitespace() + Regex('\w+')) def Whitespace(): - """Parse whitespace (space, newline, tab). Result is string.""" + """Parse whitespace (space, newline, tab). Result is string. + + WARNING: Applying this everywhere is very expensive. If possible, try to + remove whitespace from the input and not use whitespace parsers at all. + """ return CharSet(' \n\r\t') | Nothing() # Optimized parsers