changeset 30:9ef50316b2e7

Improve speed of OneOf matcher by not using HashSets always This improves the JSON value microbenchmark by more than 45%.
author Lewin Bormann <lewin@lewin-bormann.info>
date Thu, 06 Jun 2019 23:52:51 +0200
parents 09d274a46fa4
children ecfdbe1d1acb
files src/primitives.rs
diffstat 1 files changed, 36 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/src/primitives.rs	Thu Jun 06 22:28:24 2019 +0200
+++ b/src/primitives.rs	Thu Jun 06 23:52:51 2019 +0200
@@ -222,16 +222,46 @@
     }
 }
 
+/// Efficient matching depending on number of characters (a HashSet is much slower than a Vec for
+/// less than ~20 chars).
+enum MatchSpec {
+    One(char),
+    Some(Vec<char>),
+    Many(HashSet<char>),
+}
+
+/// Threshold above which a HashSet is used instead of a Vec. Empirically determined.
+const MATCHSPEC_MANY_THRESHOLD: usize = 20;
+
+impl MatchSpec {
+    fn new<S: AsRef<str>>(chars: S) -> MatchSpec {
+        if chars.as_ref().len() == 1 {
+            MatchSpec::One(chars.as_ref().chars().next().unwrap())
+        } else if chars.as_ref().len() <= MATCHSPEC_MANY_THRESHOLD {
+            MatchSpec::Some(chars.as_ref().chars().collect())
+        } else {
+            MatchSpec::Many(chars.as_ref().chars().collect())
+        }
+    }
+    fn matches(&self, c: char) -> bool {
+        match self {
+            MatchSpec::One(cc) => c == *cc,
+            MatchSpec::Some(cs) => cs.contains(&c),
+            MatchSpec::Many(cs) => cs.contains(&c),
+        }
+    }
+}
+
 /// OneOf matches any character that is (or is not) in its set.
-pub struct OneOf(HashSet<char>, bool);
+pub struct OneOf(MatchSpec, bool);
 
 impl OneOf {
     pub fn new<S: AsRef<str>>(chars: S) -> OneOf {
-        OneOf(chars.as_ref().chars().collect(), false)
+        OneOf(MatchSpec::new(chars), false)
     }
     /// Create a OneOf parser that parses all characters *not* in the given set.
     pub fn new_none_of<S: AsRef<str>>(chars: S) -> OneOf {
-        OneOf(chars.as_ref().chars().collect(), true)
+        OneOf(MatchSpec::new(chars), true)
     }
 }
 
@@ -243,20 +273,11 @@
     ) -> ParseResult<Self::Result> {
         match st.peek() {
             Some(c) => {
-                let present = self.0.contains(&c);
-                if self.1 {
-                    // Inverse mode: Match all chars that are not in set.
-                    if present {
-                        return Err(ParseError::Fail("char (inverse) not matched", st.index()));
-                    }
+                if self.0.matches(c) ^ self.1 {
                     st.next();
-                    return Ok(c);
+                    Ok(c)
                 } else {
-                    if present {
-                        st.next();
-                        return Ok(c);
-                    }
-                    return Err(ParseError::Fail("char not matched", st.index()));
+                    Err(ParseError::Fail("char not matched", st.index()))
                 }
             }
             _ => Err(ParseError::EOF),