changeset 49:9f7c57e5ef26 draft

json_test: Improve performance by 2x by removing Whitespace parsers
author Lewin Bormann <lbo@spheniscida.de>
date Thu, 23 May 2019 22:37:38 +0200
parents 26fd19b2573d
children 4a86c41e42f1
files pcombinators/json_test.py pcombinators/primitives.py
diffstat 2 files changed, 11 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/pcombinators/json_test.py	Thu May 23 22:28:32 2019 +0200
+++ b/pcombinators/json_test.py	Thu May 23 22:37:38 2019 +0200
@@ -11,8 +11,6 @@
 
 JString = Last(Skip(String('"')) + NoneInSet('"') + Skip(String('"')))
 
-wrapl = lambda l: [l]
-
 example_json = '{"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300, "retail":20}}'
 
 class Value(Parser):
@@ -28,7 +26,7 @@
          ParseState({"id":1,"name":"Foo","price":123,"tags":["Bar","Eek"],"stock":{"warehouse":300, "retail":20}}<>))
     """
     def parse(self, st):
-        return Last(Skip(Whitespace()) + (Dict | List | JString | Float()) + Skip(Whitespace())).parse(st)
+        return (Dict | List | JString | Float()).parse(st)
 
 # We moved out all the piece parsers out of functions to reduce allocation overhead.
 # It improves performance by roughly 2x.
@@ -59,14 +57,17 @@
 # DICTS
 
 # A separator is whitespace, a colon, and more whitespace (Whitespace() also accepts empty string)
-separator = Skip((Whitespace() + String(":") + Whitespace()))
+separator = Skip(String(":"))
 # Entry is a String followed by a separator and a value. Wrap the value in a list to prevent merging.
 # The two-element list is converted to a tuple.
 entry = JString + separator + (Value()) >> (lambda l: tuple(l))
 # A mid entry is followed by a comma.
-midentry = Last(entry + Skip(String(',') + Skip(Whitespace())))
+midentry = Last(entry + Skip(String(',')))
 # A dict is a {, followed by entries, followed by a final entry, followed by a closing }
 dct = Flatten(
         Skip(String("{")) + ((Repeat(midentry, -1) + entry)) + Skip(String("}")))
 # Convert the list of tuples into a dict.
 Dict = dct >> dict
+
+def parse_json(json):
+    return Dict.parse(ParseState(json.replace(' ', '')))
\ No newline at end of file
--- a/pcombinators/primitives.py	Thu May 23 22:28:32 2019 +0200
+++ b/pcombinators/primitives.py	Thu May 23 22:37:38 2019 +0200
@@ -122,7 +122,11 @@
     return Last(Whitespace() + Regex('\w+'))
 
 def Whitespace():
-    """Parse whitespace (space, newline, tab). Result is string."""
+    """Parse whitespace (space, newline, tab). Result is string.
+
+    WARNING: Applying this everywhere is very expensive. If possible, try to
+    remove whitespace from the input and not use whitespace parsers at all.
+    """
     return CharSet(' \n\r\t') | Nothing()
 
 # Optimized parsers