Mercurial > lbo > hg > rex
changeset 46:cfc2c894ef2c
Finish repetition parsing and fix matching loop
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Thu, 18 Jul 2019 17:31:14 +0200 |
parents | 8ffaa9bacff3 |
children | 34980229be32 |
files | src/matcher.rs src/matching.rs src/parse.rs src/repr.rs |
diffstat | 4 files changed, 50 insertions(+), 42 deletions(-) [+] |
line wrap: on
line diff
--- a/src/matcher.rs Wed Jul 17 10:00:58 2019 +0200 +++ b/src/matcher.rs Thu Jul 18 17:31:14 2019 +0200 @@ -43,6 +43,9 @@ pub fn reset(&mut self, n: usize) { self.ix = n; } + pub fn finished(&self) -> bool { + self.ix == self.src.len() + } } /// A Matcher matches parts of a Matchee (where a Matchee is a string to be matched). While @@ -58,7 +61,7 @@ pub struct CharMatcher(pub char); impl Matcher for CharMatcher { fn matches(&self, m: &Matchee) -> (bool, usize) { - (m.current() == self.0, 1) + (!m.finished() && m.current() == self.0, 1) } } @@ -71,9 +74,9 @@ } impl Matcher for StringMatcher { fn matches(&self, m: &Matchee) -> (bool, usize) { - if m.ix + self.0.len() <= m.src.len() { + if m.pos() + self.0.len() <= m.src.len() { ( - m.src[m.ix..m.ix + self.0.len()].starts_with(&self.0), + m.src[m.pos()..m.pos() + self.0.len()].starts_with(&self.0), self.0.len(), ) } else { @@ -86,7 +89,10 @@ pub struct CharRangeMatcher(pub char, pub char); impl Matcher for CharRangeMatcher { fn matches(&self, m: &Matchee) -> (bool, usize) { - (m.current() >= self.0 && m.current() <= self.1, 1) + ( + !m.finished() && m.current() >= self.0 && m.current() <= self.1, + 1, + ) } } @@ -94,7 +100,7 @@ pub struct CharSetMatcher(pub Vec<char>); impl Matcher for CharSetMatcher { fn matches(&self, m: &Matchee) -> (bool, usize) { - (self.0.contains(&m.current()), 1) + (!m.finished() && self.0.contains(&m.current()), 1) } } @@ -116,8 +122,8 @@ impl Matcher for AnchorMatcher { fn matches(&self, m: &Matchee) -> (bool, usize) { match self { - &AnchorMatcher::Begin => (m.ix == 0, 0), - &AnchorMatcher::End => (m.ix == m.src.len(), 0), + &AnchorMatcher::Begin => (m.pos() == 0, 0), + &AnchorMatcher::End => (m.finished(), 0), } } }
--- a/src/matching.rs Wed Jul 17 10:00:58 2019 +0200 +++ b/src/matching.rs Thu Jul 18 17:31:14 2019 +0200 @@ -112,36 +112,36 @@ // Iterate over all current states, see which match, and add the successors of matching // states to the states_next list. - for mut st in states.drain(..) { - let (next1, next2) = st.node.borrow().next_states(); + for mut matchst in states.drain(..) { + let (next1, next2) = matchst.node.borrow().next_states(); // Check if this node is a submatch start or end. If it is, the list of pending // submatches is cloned and the current position pushed (Start) or the most recent // submatch start popped and stored in the overall submatch list (End). - let sub = st.node.borrow().sub.clone(); + let sub = matchst.node.borrow().sub.clone(); match sub { - Some(Submatch::Start) => st.start_submatch(), - Some(Submatch::End) => st.stop_submatch(), + Some(Submatch::Start) => matchst.start_submatch(), + Some(Submatch::End) => matchst.stop_submatch(), None => {} } // Found match (intentionally down here, after finalizing submatch processing). Only // update match if this match is longer than the previous one. - if next1.is_none() && next2.is_none() && st.matchee.pos() > longestmatch { + if next1.is_none() && next2.is_none() && matchst.matchee.pos() > longestmatch { ismatch = true; - matches = st.submatches.borrow().clone(); - longestmatch = st.matchee.pos(); + matches = matchst.submatches.borrow().clone(); + longestmatch = matchst.matchee.pos(); continue; } // longest_partial_match contains the furthest any substate has advanced into the // string. - if st.matchee.pos() > longest_partial_match { - longest_partial_match = st.matchee.pos(); + if matchst.matchee.pos() > longest_partial_match { + longest_partial_match = matchst.matchee.pos(); } let mut advance_by = 0; // Check if the current state matches. - if let Some((matched, howmany)) = st.node.borrow().matches(&st.matchee) { + if let Some((matched, howmany)) = matchst.node.borrow().matches(&matchst.matchee) { // Current state didn't match, throw away. if !matched { continue; @@ -154,16 +154,16 @@ if next1.is_some() && next2.is_some() { // If the current state matched, or it didn't have a matcher, push next states into // list of next states. - states_next.push(st.fork(next1.unwrap(), advance_by)); - st.update(next2.unwrap(), advance_by); - states_next.push(st); + states_next.push(matchst.fork(next1.unwrap(), advance_by)); + matchst.update(next2.unwrap(), advance_by); + states_next.push(matchst); } else if let Some(n1) = next1 { // Reuse current state if only one successor (common case). - st.update(n1, advance_by); - states_next.push(st) + matchst.update(n1, advance_by); + states_next.push(matchst) } else if let Some(n2) = next2 { - st.update(n2, advance_by); - states_next.push(st); + matchst.update(n2, advance_by); + states_next.push(matchst); } } // Swap state lists, leaving states_next empty. @@ -182,21 +182,20 @@ use state::*; fn simple_re0() -> Pattern { - (parse::parse("a(b+|bb|bbb|c+){1,3}$c$").unwrap()) + parse::parse("aa+$").unwrap() } // /a(b|c)(xx)?$/ fn raw_re() -> Pattern { Pattern::Concat(vec![ Pattern::CharRange('a', 'a'), - Pattern::Submatch(Box::new( - Pattern::Alternate(vec![(Pattern::Char('b')), (Pattern::Char('c'))]), - )), - Pattern::Submatch(Box::new( - Pattern::Repeated(Box::new(Repetition::ZeroOrOnce(Pattern::Str( - "xx".to_string(), - )))), - )), + Pattern::Submatch(Box::new(Pattern::Alternate(vec![ + (Pattern::Char('b')), + (Pattern::Char('c')), + ]))), + Pattern::Submatch(Box::new(Pattern::Repeated(Box::new( + Repetition::ZeroOrOnce(Pattern::Str("xx".to_string())), + )))), Pattern::Anchor(AnchorLocation::End), ]) } @@ -204,8 +203,8 @@ #[test] fn test_match_simple() { let re = simple_re0(); - // println!("{:?}", re); - println!("{:?}", do_match(start_compile(&re), "abbbbxxabb$c")); + println!("{:?}", re); + println!("{:?}", do_match(start_compile(&re), "aaab")); let dot = dot(start_compile(&re)); println!("digraph st {{ {} }}", dot); }
--- a/src/parse.rs Wed Jul 17 10:00:58 2019 +0200 +++ b/src/parse.rs Thu Jul 18 17:31:14 2019 +0200 @@ -300,7 +300,7 @@ // {2,3} let min = errtostr(u32::from_str(&String::from_iter(parts[0].unwrap().iter())))?; let max = errtostr(u32::from_str(&String::from_iter(parts[1].unwrap().iter())))?; - return Ok(Repetition::Specific(p, min, Some(max))) + return Ok(Repetition::Specific(p, min, Some(max))); } Err(format!("invalid repetition pattern {:?}", &rep[..])) @@ -458,7 +458,10 @@ #[test] fn test_parse_repetition_manual() { - println!("digraph st {{ {} }}", dot(start_compile(&parse("[abc]{1,5}").unwrap()))); + println!( + "digraph st {{ {} }}", + dot(start_compile(&parse("[abc]{1,5}").unwrap())) + ); } #[test] fn test_parse_manual() {
--- a/src/repr.rs Wed Jul 17 10:00:58 2019 +0200 +++ b/src/repr.rs Thu Jul 18 17:31:14 2019 +0200 @@ -228,12 +228,12 @@ Pattern::Char('d'), ])), )))), - Pattern::Submatch(Box::new( - Pattern::Repeated(Box::new(Repetition::OnceOrMore(Pattern::Alternate(vec![ + Pattern::Submatch(Box::new(Pattern::Repeated(Box::new( + Repetition::OnceOrMore(Pattern::Alternate(vec![ (Pattern::Char('e')), (Pattern::Char('f')), - ])))), - )), + ])), + )))), Pattern::Repeated(Box::new(Repetition::Specific( Pattern::Char('x'), 1,