changeset 46:cfc2c894ef2c

Finish repetition parsing and fix matching loop
author Lewin Bormann <lbo@spheniscida.de>
date Thu, 18 Jul 2019 17:31:14 +0200
parents 8ffaa9bacff3
children 34980229be32
files src/matcher.rs src/matching.rs src/parse.rs src/repr.rs
diffstat 4 files changed, 50 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/src/matcher.rs	Wed Jul 17 10:00:58 2019 +0200
+++ b/src/matcher.rs	Thu Jul 18 17:31:14 2019 +0200
@@ -43,6 +43,9 @@
     pub fn reset(&mut self, n: usize) {
         self.ix = n;
     }
+    pub fn finished(&self) -> bool {
+        self.ix == self.src.len()
+    }
 }
 
 /// A Matcher matches parts of a Matchee (where a Matchee is a string to be matched). While
@@ -58,7 +61,7 @@
 pub struct CharMatcher(pub char);
 impl Matcher for CharMatcher {
     fn matches(&self, m: &Matchee) -> (bool, usize) {
-        (m.current() == self.0, 1)
+        (!m.finished() && m.current() == self.0, 1)
     }
 }
 
@@ -71,9 +74,9 @@
 }
 impl Matcher for StringMatcher {
     fn matches(&self, m: &Matchee) -> (bool, usize) {
-        if m.ix + self.0.len() <= m.src.len() {
+        if m.pos() + self.0.len() <= m.src.len() {
             (
-                m.src[m.ix..m.ix + self.0.len()].starts_with(&self.0),
+                m.src[m.pos()..m.pos() + self.0.len()].starts_with(&self.0),
                 self.0.len(),
             )
         } else {
@@ -86,7 +89,10 @@
 pub struct CharRangeMatcher(pub char, pub char);
 impl Matcher for CharRangeMatcher {
     fn matches(&self, m: &Matchee) -> (bool, usize) {
-        (m.current() >= self.0 && m.current() <= self.1, 1)
+        (
+            !m.finished() && m.current() >= self.0 && m.current() <= self.1,
+            1,
+        )
     }
 }
 
@@ -94,7 +100,7 @@
 pub struct CharSetMatcher(pub Vec<char>);
 impl Matcher for CharSetMatcher {
     fn matches(&self, m: &Matchee) -> (bool, usize) {
-        (self.0.contains(&m.current()), 1)
+        (!m.finished() && self.0.contains(&m.current()), 1)
     }
 }
 
@@ -116,8 +122,8 @@
 impl Matcher for AnchorMatcher {
     fn matches(&self, m: &Matchee) -> (bool, usize) {
         match self {
-            &AnchorMatcher::Begin => (m.ix == 0, 0),
-            &AnchorMatcher::End => (m.ix == m.src.len(), 0),
+            &AnchorMatcher::Begin => (m.pos() == 0, 0),
+            &AnchorMatcher::End => (m.finished(), 0),
         }
     }
 }
--- a/src/matching.rs	Wed Jul 17 10:00:58 2019 +0200
+++ b/src/matching.rs	Thu Jul 18 17:31:14 2019 +0200
@@ -112,36 +112,36 @@
 
         // Iterate over all current states, see which match, and add the successors of matching
         // states to the states_next list.
-        for mut st in states.drain(..) {
-            let (next1, next2) = st.node.borrow().next_states();
+        for mut matchst in states.drain(..) {
+            let (next1, next2) = matchst.node.borrow().next_states();
 
             // Check if this node is a submatch start or end. If it is, the list of pending
             // submatches is cloned and the current position pushed (Start) or the most recent
             // submatch start popped and stored in the overall submatch list (End).
-            let sub = st.node.borrow().sub.clone();
+            let sub = matchst.node.borrow().sub.clone();
             match sub {
-                Some(Submatch::Start) => st.start_submatch(),
-                Some(Submatch::End) => st.stop_submatch(),
+                Some(Submatch::Start) => matchst.start_submatch(),
+                Some(Submatch::End) => matchst.stop_submatch(),
                 None => {}
             }
 
             // Found match (intentionally down here, after finalizing submatch processing). Only
             // update match if this match is longer than the previous one.
-            if next1.is_none() && next2.is_none() && st.matchee.pos() > longestmatch {
+            if next1.is_none() && next2.is_none() && matchst.matchee.pos() > longestmatch {
                 ismatch = true;
-                matches = st.submatches.borrow().clone();
-                longestmatch = st.matchee.pos();
+                matches = matchst.submatches.borrow().clone();
+                longestmatch = matchst.matchee.pos();
                 continue;
             }
             // longest_partial_match contains the furthest any substate has advanced into the
             // string.
-            if st.matchee.pos() > longest_partial_match {
-                longest_partial_match = st.matchee.pos();
+            if matchst.matchee.pos() > longest_partial_match {
+                longest_partial_match = matchst.matchee.pos();
             }
 
             let mut advance_by = 0;
             // Check if the current state matches.
-            if let Some((matched, howmany)) = st.node.borrow().matches(&st.matchee) {
+            if let Some((matched, howmany)) = matchst.node.borrow().matches(&matchst.matchee) {
                 // Current state didn't match, throw away.
                 if !matched {
                     continue;
@@ -154,16 +154,16 @@
             if next1.is_some() && next2.is_some() {
                 // If the current state matched, or it didn't have a matcher, push next states into
                 // list of next states.
-                states_next.push(st.fork(next1.unwrap(), advance_by));
-                st.update(next2.unwrap(), advance_by);
-                states_next.push(st);
+                states_next.push(matchst.fork(next1.unwrap(), advance_by));
+                matchst.update(next2.unwrap(), advance_by);
+                states_next.push(matchst);
             } else if let Some(n1) = next1 {
                 // Reuse current state if only one successor (common case).
-                st.update(n1, advance_by);
-                states_next.push(st)
+                matchst.update(n1, advance_by);
+                states_next.push(matchst)
             } else if let Some(n2) = next2 {
-                st.update(n2, advance_by);
-                states_next.push(st);
+                matchst.update(n2, advance_by);
+                states_next.push(matchst);
             }
         }
         // Swap state lists, leaving states_next empty.
@@ -182,21 +182,20 @@
     use state::*;
 
     fn simple_re0() -> Pattern {
-        (parse::parse("a(b+|bb|bbb|c+){1,3}$c$").unwrap())
+        parse::parse("aa+$").unwrap()
     }
 
     // /a(b|c)(xx)?$/
     fn raw_re() -> Pattern {
         Pattern::Concat(vec![
             Pattern::CharRange('a', 'a'),
-            Pattern::Submatch(Box::new(
-                Pattern::Alternate(vec![(Pattern::Char('b')), (Pattern::Char('c'))]),
-            )),
-            Pattern::Submatch(Box::new(
-                Pattern::Repeated(Box::new(Repetition::ZeroOrOnce(Pattern::Str(
-                    "xx".to_string(),
-                )))),
-            )),
+            Pattern::Submatch(Box::new(Pattern::Alternate(vec![
+                (Pattern::Char('b')),
+                (Pattern::Char('c')),
+            ]))),
+            Pattern::Submatch(Box::new(Pattern::Repeated(Box::new(
+                Repetition::ZeroOrOnce(Pattern::Str("xx".to_string())),
+            )))),
             Pattern::Anchor(AnchorLocation::End),
         ])
     }
@@ -204,8 +203,8 @@
     #[test]
     fn test_match_simple() {
         let re = simple_re0();
-        // println!("{:?}", re);
-        println!("{:?}", do_match(start_compile(&re), "abbbbxxabb$c"));
+        println!("{:?}", re);
+        println!("{:?}", do_match(start_compile(&re), "aaab"));
         let dot = dot(start_compile(&re));
         println!("digraph st {{ {} }}", dot);
     }
--- a/src/parse.rs	Wed Jul 17 10:00:58 2019 +0200
+++ b/src/parse.rs	Thu Jul 18 17:31:14 2019 +0200
@@ -300,7 +300,7 @@
         // {2,3}
         let min = errtostr(u32::from_str(&String::from_iter(parts[0].unwrap().iter())))?;
         let max = errtostr(u32::from_str(&String::from_iter(parts[1].unwrap().iter())))?;
-        return Ok(Repetition::Specific(p, min, Some(max)))
+        return Ok(Repetition::Specific(p, min, Some(max)));
     }
 
     Err(format!("invalid repetition pattern {:?}", &rep[..]))
@@ -458,7 +458,10 @@
 
     #[test]
     fn test_parse_repetition_manual() {
-        println!("digraph st {{ {} }}", dot(start_compile(&parse("[abc]{1,5}").unwrap())));
+        println!(
+            "digraph st {{ {} }}",
+            dot(start_compile(&parse("[abc]{1,5}").unwrap()))
+        );
     }
     #[test]
     fn test_parse_manual() {
--- a/src/repr.rs	Wed Jul 17 10:00:58 2019 +0200
+++ b/src/repr.rs	Thu Jul 18 17:31:14 2019 +0200
@@ -228,12 +228,12 @@
                     Pattern::Char('d'),
                 ])),
             )))),
-            Pattern::Submatch(Box::new(
-                Pattern::Repeated(Box::new(Repetition::OnceOrMore(Pattern::Alternate(vec![
+            Pattern::Submatch(Box::new(Pattern::Repeated(Box::new(
+                Repetition::OnceOrMore(Pattern::Alternate(vec![
                     (Pattern::Char('e')),
                     (Pattern::Char('f')),
-                ])))),
-            )),
+                ])),
+            )))),
             Pattern::Repeated(Box::new(Repetition::Specific(
                 Pattern::Char('x'),
                 1,