Mercurial > lbo > hg > rex
changeset 44:11af4959e69a
rustfmt
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Wed, 17 Jul 2019 09:35:29 +0200 |
parents | 0a19bc59e7d6 |
children | 8ffaa9bacff3 |
files | src/compile.rs src/lib.rs src/matcher.rs src/matching.rs src/parse.rs src/repr.rs src/state.rs |
diffstat | 7 files changed, 203 insertions(+), 137 deletions(-) [+] |
line wrap: on
line diff
--- a/src/compile.rs Fri Nov 24 18:30:31 2017 +0000 +++ b/src/compile.rs Wed Jul 17 09:35:29 2019 +0200 @@ -1,4 +1,3 @@ - use matcher::{self, wrap_matcher}; use repr::{AnchorLocation, Pattern, Repetition}; use state::{wrap_state, State, Submatch, WrappedState}; @@ -231,14 +230,16 @@ // the repeated pattern. if let Some(max) = max_ { for _ in 0..(max - min) { - repetition.push( - Pattern::Repeated( - Box::new(Repetition::ZeroOrOnce(p.clone())))); + repetition.push(Pattern::Repeated(Box::new(Repetition::ZeroOrOnce( + p.clone(), + )))); } } else { // If no upper limit is set, append a ZeroOrMore state for the repeated // pattern. - repetition.push(Pattern::Repeated(Box::new(Repetition::ZeroOrMore(p.clone())))); + repetition.push(Pattern::Repeated(Box::new(Repetition::ZeroOrMore( + p.clone(), + )))); } Pattern::Concat(repetition).to_state() }
--- a/src/lib.rs Fri Nov 24 18:30:31 2017 +0000 +++ b/src/lib.rs Wed Jul 17 09:35:29 2019 +0200 @@ -1,4 +1,3 @@ - #![allow(dead_code)] mod compile;
--- a/src/matcher.rs Fri Nov 24 18:30:31 2017 +0000 +++ b/src/matcher.rs Wed Jul 17 09:35:29 2019 +0200 @@ -2,9 +2,9 @@ //! matched against the conditions in a regular expression. #![allow(dead_code)] +use std::fmt::Debug; use std::iter::FromIterator; use std::rc::Rc; -use std::fmt::Debug; /// Matchee contains a character and position to match. It's used by the matching logic to check /// whether a certain position within a string is matched by a matcher. The driving logic is @@ -72,7 +72,10 @@ impl Matcher for StringMatcher { fn matches(&self, m: &Matchee) -> (bool, usize) { if m.ix + self.0.len() <= m.src.len() { - (m.src[m.ix..m.ix + self.0.len()].starts_with(&self.0), self.0.len()) + ( + m.src[m.ix..m.ix + self.0.len()].starts_with(&self.0), + self.0.len(), + ) } else { (false, self.0.len()) }
--- a/src/matching.rs Fri Nov 24 18:30:31 2017 +0000 +++ b/src/matching.rs Wed Jul 17 09:35:29 2019 +0200 @@ -8,8 +8,8 @@ use std::mem; use std::rc::Rc; -use state::{WrappedState, Submatch}; use matcher::Matchee; +use state::{Submatch, WrappedState}; #[derive(Clone, Debug)] pub struct MatchState { @@ -177,10 +177,10 @@ #[cfg(test)] mod tests { use super::*; + use compile::*; + use parse; use repr::*; use state::*; - use compile::*; - use parse; fn simple_re0() -> Pattern { (parse::parse("a(b+|bb|bbb|c+)$c$").unwrap()) @@ -189,18 +189,16 @@ // /a(b|c)(xx)?$/ fn raw_re() -> Pattern { Pattern::Concat(vec![ - Pattern::CharRange('a', 'a'), - Pattern::Submatch( - Box::new((Pattern::Alternate( - vec![ - ((Pattern::Char('b'))), - ((Pattern::Char('c')))] - )))), - Pattern::Submatch(Box::new(( - Pattern::Repeated(Box::new( - Repetition::ZeroOrOnce( - Pattern::Str("xx".to_string()))))))), - Pattern::Anchor(AnchorLocation::End), + Pattern::CharRange('a', 'a'), + Pattern::Submatch(Box::new( + (Pattern::Alternate(vec![(Pattern::Char('b')), (Pattern::Char('c'))])), + )), + Pattern::Submatch(Box::new( + (Pattern::Repeated(Box::new(Repetition::ZeroOrOnce(Pattern::Str( + "xx".to_string(), + ))))), + )), + Pattern::Anchor(AnchorLocation::End), ]) }
--- a/src/parse.rs Fri Nov 24 18:30:31 2017 +0000 +++ b/src/parse.rs Wed Jul 17 09:35:29 2019 +0200 @@ -23,7 +23,9 @@ impl ParseStack { fn new() -> ParseStack { - ParseStack { s: Vec::with_capacity(4) } + ParseStack { + s: Vec::with_capacity(4), + } } fn push(&mut self, p: Pattern) { self.s.push(p) @@ -192,15 +194,13 @@ } } ')' => return s.err("unopened ')'", 0), - '[' => { - match parse_char_set(s) { - Ok((pat, newst)) => { - stack.push(pat); - s = newst; - } - Err(e) => return Err(e), + '[' => match parse_char_set(s) { + Ok((pat, newst)) => { + stack.push(pat); + s = newst; } - } + Err(e) => return Err(e), + }, ']' => return s.err("unopened ']'", 0), '{' => { match split_in_parens(s.clone(), CURLY_BRACKETS) { @@ -278,13 +278,23 @@ } } - if nparts == 1 { + if nparts == 0 { + // {} + return rep.err("empty {} spec", 0); + } else if nparts == 1 { // {1} if let Ok(n) = u32::from_str(&String::from_iter(parts[0].unwrap().iter())) { return Ok(Repetition::Specific(p, n, None)); } else { - return Err(format!("invalid repetition '{}'", String::from_iter(rep[..].iter()))); + return Err(format!( + "invalid repetition '{}'", + String::from_iter(rep[..].iter()) + )); } + } else if nparts == 2 { + // {2,3} + let min = u32::from_str(&String::from_iter(parts[0].unwrap().iter())); + let max = u32::from_str(&String::from_iter(parts[1].unwrap().iter())); } Err(String::from("abc")) @@ -296,9 +306,10 @@ // split_in_parens returns two new ParseStates; the first one containing the contents of the // parenthesized clause starting at s[0], the second one containing the rest. -fn split_in_parens<'a>(s: ParseState<'a>, - parens: (char, char)) - -> Option<(ParseState<'a>, ParseState<'a>)> { +fn split_in_parens<'a>( + s: ParseState<'a>, + parens: (char, char), +) -> Option<(ParseState<'a>, ParseState<'a>)> { if let Some(end) = find_closing_paren(s.clone(), parens) { Some((s.sub(1, end), s.from(end + 1))) } else { @@ -336,31 +347,54 @@ #[test] fn test_find_closing_paren() { - for case in &[("(abc)de", Some(4)), ("()a", Some(1)), ("(abcd)", Some(5)), ("(abc", None)] { + for case in &[ + ("(abc)de", Some(4)), + ("()a", Some(1)), + ("(abcd)", Some(5)), + ("(abc", None), + ] { let src: Vec<char> = case.0.chars().collect(); - assert_eq!(find_closing_paren(ParseState::new(src.as_ref()), ROUND_PARENS), - case.1); + assert_eq!( + find_closing_paren(ParseState::new(src.as_ref()), ROUND_PARENS), + case.1 + ); } } #[test] fn test_parse_charset() { - for case in &[("[a]", Pattern::Char('a')), - ("[ab]", Pattern::CharSet(vec!['a', 'b'])), - ("[ba-]", Pattern::CharSet(vec!['b', 'a', '-'])), - ("[a-z]", Pattern::CharRange('a', 'z')), - ("[a-z-]", - Pattern::Alternate(vec![Pattern::CharRange('a', 'z'), Pattern::Char('-')])), - ("[-a-z-]", - Pattern::Alternate(vec![Pattern::CharRange('a', 'z'), - Pattern::CharSet(vec!['-', '-'])])), - ("[a-zA-Z]", - Pattern::Alternate(vec![Pattern::CharRange('a', 'z'), - Pattern::CharRange('A', 'Z')])), - ("[a-zA-Z-]", - Pattern::Alternate(vec![Pattern::CharRange('a', 'z'), - Pattern::CharRange('A', 'Z'), - Pattern::Char('-')]))] { + for case in &[ + ("[a]", Pattern::Char('a')), + ("[ab]", Pattern::CharSet(vec!['a', 'b'])), + ("[ba-]", Pattern::CharSet(vec!['b', 'a', '-'])), + ("[a-z]", Pattern::CharRange('a', 'z')), + ( + "[a-z-]", + Pattern::Alternate(vec![Pattern::CharRange('a', 'z'), Pattern::Char('-')]), + ), + ( + "[-a-z-]", + Pattern::Alternate(vec![ + Pattern::CharRange('a', 'z'), + Pattern::CharSet(vec!['-', '-']), + ]), + ), + ( + "[a-zA-Z]", + Pattern::Alternate(vec![ + Pattern::CharRange('a', 'z'), + Pattern::CharRange('A', 'Z'), + ]), + ), + ( + "[a-zA-Z-]", + Pattern::Alternate(vec![ + Pattern::CharRange('a', 'z'), + Pattern::CharRange('A', 'Z'), + Pattern::Char('-'), + ]), + ), + ] { let src: Vec<char> = case.0.chars().collect(); let st = ParseState::new(&src); assert_eq!(parse_char_set(st).unwrap().0, case.1); @@ -369,10 +403,14 @@ #[test] fn test_parse_subs() { - let case1 = ("a(b)c", - Pattern::Concat(vec![Pattern::Char('a'), - Pattern::Submatch(Box::new(Pattern::Char('b'))), - Pattern::Char('c')])); + let case1 = ( + "a(b)c", + Pattern::Concat(vec![ + Pattern::Char('a'), + Pattern::Submatch(Box::new(Pattern::Char('b'))), + Pattern::Char('c'), + ]), + ); let case2 = ("(b)", Pattern::Submatch(Box::new(Pattern::Char('b')))); for c in &[case1, case2] { @@ -382,12 +420,18 @@ #[test] fn test_parse_res() { - let case1 = ("a(Bcd)e", - Pattern::Concat(vec![Pattern::Char('a'), - Pattern::Submatch(Box::new(Pattern::Concat(vec![Pattern::Char('B'), - Pattern::Char('c'), - Pattern::Char('d')]))), - Pattern::Char('e')])); + let case1 = ( + "a(Bcd)e", + Pattern::Concat(vec![ + Pattern::Char('a'), + Pattern::Submatch(Box::new(Pattern::Concat(vec![ + Pattern::Char('B'), + Pattern::Char('c'), + Pattern::Char('d'), + ]))), + Pattern::Char('e'), + ]), + ); for c in &[case1] { assert_eq!(c.1, parse(c.0).unwrap());
--- a/src/repr.rs Fri Nov 24 18:30:31 2017 +0000 +++ b/src/repr.rs Wed Jul 17 09:35:29 2019 +0200 @@ -170,21 +170,36 @@ #[test] fn test_repr_optimize() { // case = (want, input) - let case1 = - (Pattern::Str("abc".to_string()), - Pattern::Concat(vec![Pattern::Char('a'), Pattern::Char('b'), Pattern::Char('c')])); - let case2 = (Pattern::Str("abcd".to_string()), - Pattern::Concat(vec![Pattern::Str("a".to_string()), - Pattern::Char('b'), - Pattern::Str("cd".to_string())])); - let case3 = (Pattern::Concat(vec![Pattern::Str("abc".to_string()), - Pattern::Anchor(AnchorLocation::End), - Pattern::Char('d')]), - Pattern::Concat(vec![Pattern::Char('a'), - Pattern::Char('b'), - Pattern::Char('c'), - Pattern::Anchor(AnchorLocation::End), - Pattern::Char('d')])); + let case1 = ( + Pattern::Str("abc".to_string()), + Pattern::Concat(vec![ + Pattern::Char('a'), + Pattern::Char('b'), + Pattern::Char('c'), + ]), + ); + let case2 = ( + Pattern::Str("abcd".to_string()), + Pattern::Concat(vec![ + Pattern::Str("a".to_string()), + Pattern::Char('b'), + Pattern::Str("cd".to_string()), + ]), + ); + let case3 = ( + Pattern::Concat(vec![ + Pattern::Str("abc".to_string()), + Pattern::Anchor(AnchorLocation::End), + Pattern::Char('d'), + ]), + Pattern::Concat(vec![ + Pattern::Char('a'), + Pattern::Char('b'), + Pattern::Char('c'), + Pattern::Anchor(AnchorLocation::End), + Pattern::Char('d'), + ]), + ); for c in vec![case1, case2, case3].into_iter() { assert_eq!(c.0, optimize::optimize(c.1)); @@ -193,47 +208,49 @@ // /a(b|c)/ fn simple_re0() -> Pattern { - Pattern::Concat(vec![Pattern::CharRange('a', 'a'), - Pattern::Alternate(vec![((Pattern::Char('b'))), - ((Pattern::Char('c')))])]) + Pattern::Concat(vec![ + Pattern::CharRange('a', 'a'), + Pattern::Alternate(vec![(Pattern::Char('b')), (Pattern::Char('c'))]), + ]) } // Returns compiled form of /(a[bc])?(cd)*(e|f)+x{1,3}(g|hh|i)j{2,}klm/ fn simple_re1() -> Pattern { - Pattern::Concat(vec!( - Pattern::Repeated( - Box::new( - Repetition::ZeroOrOnce( - Pattern::Submatch(Box::new(Pattern::Concat(vec!( - Pattern::Char('a'), Pattern::CharRange('b', 'c')))))))), - - Pattern::Repeated( - Box::new(Repetition::ZeroOrMore( - Pattern::Submatch(Box::new(Pattern::Concat(vec!( - Pattern::Char('c'), Pattern::Char('d')))))))), - - Pattern::Submatch( - Box::new(( - Pattern::Repeated( - Box::new(Repetition::OnceOrMore( - Pattern::Alternate(vec!( - ((Pattern::Char('e'))), - ((Pattern::Char('f'))))))))))), - - - Pattern::Repeated( - Box::new(Repetition::Specific(Pattern::Char('x'), 1, Some(3)))), - - Pattern::Alternate(vec!( - ((Pattern::Char('g'))), - ((Pattern::Repeated( - Box::new(Repetition::Specific(Pattern::Char('h'), 2, Some(2)))))), - ((Pattern::Char('i'))))), - - Pattern::Repeated( - Box::new(Repetition::Specific(Pattern::Char('j'), 2, None))), - - Pattern::Str("klm".to_string()), - )) + Pattern::Concat(vec![ + Pattern::Repeated(Box::new(Repetition::ZeroOrOnce(Pattern::Submatch( + Box::new(Pattern::Concat(vec![ + Pattern::Char('a'), + Pattern::CharRange('b', 'c'), + ])), + )))), + Pattern::Repeated(Box::new(Repetition::ZeroOrMore(Pattern::Submatch( + Box::new(Pattern::Concat(vec![ + Pattern::Char('c'), + Pattern::Char('d'), + ])), + )))), + Pattern::Submatch(Box::new( + (Pattern::Repeated(Box::new(Repetition::OnceOrMore(Pattern::Alternate(vec![ + (Pattern::Char('e')), + (Pattern::Char('f')), + ]))))), + )), + Pattern::Repeated(Box::new(Repetition::Specific( + Pattern::Char('x'), + 1, + Some(3), + ))), + Pattern::Alternate(vec![ + (Pattern::Char('g')), + (Pattern::Repeated(Box::new(Repetition::Specific( + Pattern::Char('h'), + 2, + Some(2), + )))), + (Pattern::Char('i')), + ]), + Pattern::Repeated(Box::new(Repetition::Specific(Pattern::Char('j'), 2, None))), + Pattern::Str("klm".to_string()), + ]) } use compile::start_compile;
--- a/src/state.rs Fri Nov 24 18:30:31 2017 +0000 +++ b/src/state.rs Wed Jul 17 09:35:29 2019 +0200 @@ -68,17 +68,19 @@ } fn to_string(&self) -> String { - format!("m:{} sub:{}", - if let Some(ref m) = self.matcher { - format!("{:?}", m) - } else { - "_".to_string() - }, - if let Some(ref s) = self.sub { - format!("{:?}", s) - } else { - "".to_string() - }) + format!( + "m:{} sub:{}", + if let Some(ref m) = self.matcher { + format!("{:?}", m) + } else { + "_".to_string() + }, + if let Some(ref s) = self.sub { + format!("{:?}", s) + } else { + "".to_string() + } + ) } } @@ -103,13 +105,15 @@ for next in [node.borrow().out.clone(), node.borrow().out1.clone()].into_iter() { if let &Some(ref o) = next { let nextid = format!("{:p}", o.as_ptr()); - write!(&mut result, - "\"{} {}\" -> \"{} {}\";\n", - id, - node.borrow().to_string(), - nextid, - o.borrow().to_string()) - .unwrap(); + write!( + &mut result, + "\"{} {}\" -> \"{} {}\";\n", + id, + node.borrow().to_string(), + nextid, + o.borrow().to_string() + ) + .unwrap(); if !visited.contains(&nextid) { todo.push_front(o.clone());