changeset 42:8932c5b37a89

Move compilation functionality into dedicated module.
author Lewin Bormann <lbo@spheniscida.de>
date Wed, 08 Nov 2017 19:17:19 +0100
parents 89dbd4e17f9e
children 0a19bc59e7d6
files src/compile.rs src/lib.rs src/matching.rs src/parse.rs src/repr.rs src/state.rs
diffstat 6 files changed, 255 insertions(+), 249 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/compile.rs	Wed Nov 08 19:17:19 2017 +0100
@@ -0,0 +1,247 @@
+
+use matcher::{self, wrap_matcher};
+use repr::{AnchorLocation, Pattern, Repetition};
+use state::{wrap_state, State, Submatch, WrappedState};
+
+/// Types implementing Compile can be compiled into a state graph.
+pub trait Compile {
+    /// to_state returns the start node of a subgraph, and a list of pointers that need to be
+    /// connected to the following subgraph. The list can contain the first tuple element.
+    fn to_state(&self) -> (WrappedState, Vec<WrappedState>);
+}
+
+/// start_compile takes a parsed regex as RETree and returns the first node of a directed graph
+/// representing the regex.
+pub fn start_compile(re: &Pattern) -> WrappedState {
+    let (s, sp) = re.to_state();
+
+    let mut before = State::default();
+    before.sub = Some(Submatch::Start);
+    before.out = Some(s);
+    let mut end = State::default();
+    end.sub = Some(Submatch::End);
+
+    let endw = wrap_state(end);
+    // Connect all loose ends with the final node.
+    for p in sp {
+        p.borrow_mut().patch(endw.clone());
+    }
+    wrap_state(before)
+}
+
+impl Compile for Pattern {
+    fn to_state(&self) -> (WrappedState, Vec<WrappedState>) {
+        match *self {
+            Pattern::Concat(ref ps) => {
+                if ps.is_empty() {
+                    panic!("invalid Concat len: 0")
+                } else if ps.len() == 1 {
+                    return ps[0].to_state();
+                }
+
+                let (init, mut lastp) = ps[0].to_state();
+                for i in 1..ps.len() {
+                    let (next, nextp) = ps[i].to_state();
+                    // Connect all loose ends with the new node.
+                    for p in lastp {
+                        p.borrow_mut().patch(next.clone());
+                    }
+                    // Remember the loose ends of this one.
+                    lastp = nextp;
+                }
+                (init, lastp)
+            }
+            Pattern::Any => {
+                let s = wrap_state(State {
+                    out: None,
+                    out1: None,
+                    matcher: wrap_matcher(Box::new(matcher::AnyMatcher)),
+                    sub: None,
+                });
+                (s.clone(), vec![s])
+            }
+            Pattern::Char(c) => {
+                let s = wrap_state(State {
+                    out: None,
+                    out1: None,
+                    matcher: wrap_matcher(Box::new(matcher::CharMatcher(c))),
+                    sub: None,
+                });
+                (s.clone(), vec![s])
+            }
+            Pattern::Str(ref s) => {
+                let s = wrap_state(State {
+                    out: None,
+                    out1: None,
+                    matcher: wrap_matcher(Box::new(matcher::StringMatcher::new(s))),
+                    sub: None,
+                });
+                (s.clone(), vec![s])
+            }
+            Pattern::CharRange(from, to) => {
+                let s = wrap_state(State {
+                    out: None,
+                    out1: None,
+                    matcher: wrap_matcher(Box::new(matcher::CharRangeMatcher(from, to))),
+                    sub: None,
+                });
+                (s.clone(), vec![s])
+            }
+            Pattern::CharSet(ref set) => {
+                let s = wrap_state(State {
+                    out: None,
+                    out1: None,
+                    matcher: wrap_matcher(Box::new(matcher::CharSetMatcher(set.clone()))),
+                    sub: None,
+                });
+                (s.clone(), vec![s])
+            }
+            Pattern::Alternate(ref r) => alternate(&r, &vec![]),
+            Pattern::Submatch(ref p) => {
+                let (s, sp) = p.to_state();
+                let before = wrap_state(State {
+                    out: Some(s),
+                    out1: None,
+                    matcher: None,
+                    sub: Some(Submatch::Start),
+                });
+                let after = wrap_state(State {
+                    out: None,
+                    out1: None,
+                    matcher: None,
+                    sub: Some(Submatch::End),
+                });
+                for p in sp {
+                    p.borrow_mut().patch(after.clone());
+                }
+                (before, vec![after])
+            }
+            Pattern::Repeated(ref p) => p.to_state(),
+            Pattern::Anchor(ref loc) => {
+                let mut m = matcher::AnchorMatcher::Begin;
+                match loc {
+                    &AnchorLocation::End => m = matcher::AnchorMatcher::End,
+                    _ => (),
+                };
+                let s = wrap_state(State {
+                    out: None,
+                    out1: None,
+                    matcher: wrap_matcher(Box::new(m)),
+                    sub: None,
+                });
+                (s.clone(), vec![s])
+            }
+        }
+    }
+}
+
+// alternate compiles a list of patterns into a graph that accepts any one of the patterns.
+fn alternate(ps: &[Pattern], to_patch: &[WrappedState]) -> (WrappedState, Vec<WrappedState>) {
+    if ps.len() == 1 {
+        let (s, sp) = ps[0].to_state();
+        for e in to_patch {
+            e.borrow_mut().patch(s.clone());
+        }
+        (s, sp)
+    } else {
+        let mut init = State {
+            out: None,
+            out1: None,
+            matcher: None,
+            sub: None,
+        };
+        let mid = ps.len() / 2;
+        let (left, mut leftpatch) = alternate(&ps[..mid], &vec![]);
+        let (right, mut rightpatch) = alternate(&ps[mid..], &vec![]);
+        init.patch(left);
+        init.patch(right);
+        leftpatch.append(&mut rightpatch);
+        (wrap_state(init), leftpatch)
+    }
+}
+
+impl Compile for Repetition {
+    fn to_state(&self) -> (WrappedState, Vec<WrappedState>) {
+        match *self {
+            Repetition::ZeroOrOnce(ref p) => {
+                let (s, to_patch) = p.to_state();
+                let after = wrap_state(State {
+                    out: None,
+                    out1: None,
+                    matcher: None,
+                    sub: None,
+                });
+                let before = wrap_state(State {
+                    out: Some(s.clone()),
+                    out1: Some(after.clone()),
+                    matcher: None,
+                    sub: None,
+                });
+                for p in to_patch {
+                    p.borrow_mut().patch(after.clone());
+                }
+                (before, vec![after])
+            }
+            Repetition::ZeroOrMore(ref p) => {
+                let (s, to_patch) = p.to_state();
+                let before = wrap_state(State {
+                    out: Some(s.clone()),
+                    out1: None,
+                    matcher: None,
+                    sub: None,
+                });
+                let after = wrap_state(State {
+                    out: Some(s.clone()),
+                    out1: None,
+                    matcher: None,
+                    sub: None,
+                });
+                before.borrow_mut().patch(after.clone());
+                for p in to_patch {
+                    p.borrow_mut().patch(after.clone());
+                }
+                (before, vec![after])
+            }
+            Repetition::OnceOrMore(ref p) => {
+                let (s, to_patch) = p.to_state();
+                let after = wrap_state(State {
+                    out: Some(s.clone()),
+                    out1: None,
+                    matcher: None,
+                    sub: None,
+                });
+                for p in to_patch {
+                    p.borrow_mut().patch(after.clone());
+                }
+                (s, vec![after])
+            }
+            // Specific is 'min' concatenations of a simple state and 'max - min' concatenations of
+            // a ZeroOrOnce state.
+            Repetition::Specific(ref p, min, max_) => {
+                let cap = max_.unwrap_or(min) as usize;
+                assert!(cap >= min as usize);
+                let mut repetition = Vec::with_capacity(cap);
+
+                // Append the minimum required number of occurrences.
+                for _ in 0..min {
+                    repetition.push(p.clone());
+                }
+
+                // If an upper limit is set, append max-min repetitions of ZeroOrOnce states for
+                // the repeated pattern.
+                if let Some(max) = max_ {
+                    for _ in 0..(max - min) {
+                        repetition.push(
+                            Pattern::Repeated(
+                                Box::new(Repetition::ZeroOrOnce(p.clone()))));
+                    }
+                } else {
+                    // If no upper limit is set, append a ZeroOrMore state for the repeated
+                    // pattern.
+                    repetition.push(Pattern::Repeated(Box::new(Repetition::ZeroOrMore(p.clone()))));
+                }
+                Pattern::Concat(repetition).to_state()
+            }
+        }
+    }
+}
--- a/src/lib.rs	Mon Aug 21 20:34:59 2017 +0200
+++ b/src/lib.rs	Wed Nov 08 19:17:19 2017 +0100
@@ -1,6 +1,7 @@
 
 #![allow(dead_code)]
 
+mod compile;
 mod matcher;
 mod matching;
 mod parse;
@@ -12,5 +13,5 @@
 /// entire matched string. A submatch is a tuple of (start, end), where end is the index of the
 /// first character that isn't part of the submatch anymore (i.e. [start, end)).
 fn compile_and_match(re: &repr::Pattern, s: &str) -> (bool, Vec<(usize, usize)>) {
-    matching::do_match(repr::start_compile(re), s)
+    matching::do_match(compile::start_compile(re), s)
 }
--- a/src/matching.rs	Mon Aug 21 20:34:59 2017 +0200
+++ b/src/matching.rs	Wed Nov 08 19:17:19 2017 +0100
@@ -179,6 +179,7 @@
     use super::*;
     use repr::*;
     use state::*;
+    use compile::*;
     use parse;
 
     fn simple_re0() -> Pattern {
--- a/src/parse.rs	Mon Aug 21 20:34:59 2017 +0200
+++ b/src/parse.rs	Wed Nov 08 19:17:19 2017 +0100
@@ -283,6 +283,7 @@
 #[cfg(test)]
 mod tests {
     use super::*;
+    use compile::*;
     use repr::*;
     use state::dot;
 
--- a/src/repr.rs	Mon Aug 21 20:34:59 2017 +0200
+++ b/src/repr.rs	Wed Nov 08 19:17:19 2017 +0100
@@ -1,29 +1,7 @@
-//! The repr module is concerned with the representation of parsed regular expressions and their
-//! compilation into a state graph. The state graph itself is defined in the state module.
+//! The repr module is concerned with the representation of parsed regular expressions. A Pattern
+//! is compiled by the `compile` module into a state graph defined in `state`.
 #![allow(dead_code)]
 
-use matcher::{self, wrap_matcher};
-use state::{wrap_state, Compile, State, Submatch, WrappedState};
-
-/// start_compile takes a parsed regex as RETree and returns the first node of a directed graph
-/// representing the regex.
-pub fn start_compile(re: &Pattern) -> WrappedState {
-    let (s, sp) = re.to_state();
-
-    let mut before = State::default();
-    before.sub = Some(Submatch::Start);
-    before.out = Some(s);
-    let mut end = State::default();
-    end.sub = Some(Submatch::End);
-
-    let endw = wrap_state(end);
-    // Connect all loose ends with the final node.
-    for p in sp {
-        p.borrow_mut().patch(endw.clone());
-    }
-    wrap_state(before)
-}
-
 /// A Pattern is either a repeated pattern, a stored submatch, an alternation between two patterns,
 /// two patterns following each other, or a character range or set.
 #[derive(Clone, Debug, PartialEq)]
@@ -55,137 +33,6 @@
     End,
 }
 
-impl Compile for Pattern {
-    fn to_state(&self) -> (WrappedState, Vec<WrappedState>) {
-        match *self {
-            Pattern::Concat(ref ps) => {
-                if ps.is_empty() {
-                    panic!("invalid Concat len: 0")
-                } else if ps.len() == 1 {
-                    return ps[0].to_state();
-                }
-
-                let (init, mut lastp) = ps[0].to_state();
-                for i in 1..ps.len() {
-                    let (next, nextp) = ps[i].to_state();
-                    // Connect all loose ends with the new node.
-                    for p in lastp {
-                        p.borrow_mut().patch(next.clone());
-                    }
-                    // Remember the loose ends of this one.
-                    lastp = nextp;
-                }
-                (init, lastp)
-            }
-            Pattern::Any => {
-                let s = wrap_state(State {
-                    out: None,
-                    out1: None,
-                    matcher: wrap_matcher(Box::new(matcher::AnyMatcher)),
-                    sub: None,
-                });
-                (s.clone(), vec![s])
-            }
-            Pattern::Char(c) => {
-                let s = wrap_state(State {
-                    out: None,
-                    out1: None,
-                    matcher: wrap_matcher(Box::new(matcher::CharMatcher(c))),
-                    sub: None,
-                });
-                (s.clone(), vec![s])
-            }
-            Pattern::Str(ref s) => {
-                let s = wrap_state(State {
-                    out: None,
-                    out1: None,
-                    matcher: wrap_matcher(Box::new(matcher::StringMatcher::new(s))),
-                    sub: None,
-                });
-                (s.clone(), vec![s])
-            }
-            Pattern::CharRange(from, to) => {
-                let s = wrap_state(State {
-                    out: None,
-                    out1: None,
-                    matcher: wrap_matcher(Box::new(matcher::CharRangeMatcher(from, to))),
-                    sub: None,
-                });
-                (s.clone(), vec![s])
-            }
-            Pattern::CharSet(ref set) => {
-                let s = wrap_state(State {
-                    out: None,
-                    out1: None,
-                    matcher: wrap_matcher(Box::new(matcher::CharSetMatcher(set.clone()))),
-                    sub: None,
-                });
-                (s.clone(), vec![s])
-            }
-            Pattern::Alternate(ref r) => alternate(&r, &vec![]),
-            Pattern::Submatch(ref p) => {
-                let (s, sp) = p.to_state();
-                let before = wrap_state(State {
-                    out: Some(s),
-                    out1: None,
-                    matcher: None,
-                    sub: Some(Submatch::Start),
-                });
-                let after = wrap_state(State {
-                    out: None,
-                    out1: None,
-                    matcher: None,
-                    sub: Some(Submatch::End),
-                });
-                for p in sp {
-                    p.borrow_mut().patch(after.clone());
-                }
-                (before, vec![after])
-            }
-            Pattern::Repeated(ref p) => p.to_state(),
-            Pattern::Anchor(ref loc) => {
-                let mut m = matcher::AnchorMatcher::Begin;
-                match loc {
-                    &AnchorLocation::End => m = matcher::AnchorMatcher::End,
-                    _ => (),
-                };
-                let s = wrap_state(State {
-                    out: None,
-                    out1: None,
-                    matcher: wrap_matcher(Box::new(m)),
-                    sub: None,
-                });
-                (s.clone(), vec![s])
-            }
-        }
-    }
-}
-
-// alternate compiles a list of patterns into a graph that accepts any one of the patterns.
-fn alternate(ps: &[Pattern], to_patch: &[WrappedState]) -> (WrappedState, Vec<WrappedState>) {
-    if ps.len() == 1 {
-        let (s, sp) = ps[0].to_state();
-        for e in to_patch {
-            e.borrow_mut().patch(s.clone());
-        }
-        (s, sp)
-    } else {
-        let mut init = State {
-            out: None,
-            out1: None,
-            matcher: None,
-            sub: None,
-        };
-        let mid = ps.len() / 2;
-        let (left, mut leftpatch) = alternate(&ps[..mid], &vec![]);
-        let (right, mut rightpatch) = alternate(&ps[mid..], &vec![]);
-        init.patch(left);
-        init.patch(right);
-        leftpatch.append(&mut rightpatch);
-        (wrap_state(init), leftpatch)
-    }
-}
-
 /// A pattern can be repeated in various manners, which is represented by the pattern being wrapped
 /// in a Repetition.
 /// The inner type is a pattern, because a repetition is either concerned with only one pattern
@@ -202,92 +49,6 @@
     Specific(Pattern, u32, Option<u32>),
 }
 
-impl Repetition {
-    fn to_state(&self) -> (WrappedState, Vec<WrappedState>) {
-        match *self {
-            Repetition::ZeroOrOnce(ref p) => {
-                let (s, to_patch) = p.to_state();
-                let after = wrap_state(State {
-                    out: None,
-                    out1: None,
-                    matcher: None,
-                    sub: None,
-                });
-                let before = wrap_state(State {
-                    out: Some(s.clone()),
-                    out1: Some(after.clone()),
-                    matcher: None,
-                    sub: None,
-                });
-                for p in to_patch {
-                    p.borrow_mut().patch(after.clone());
-                }
-                (before, vec![after])
-            }
-            Repetition::ZeroOrMore(ref p) => {
-                let (s, to_patch) = p.to_state();
-                let before = wrap_state(State {
-                    out: Some(s.clone()),
-                    out1: None,
-                    matcher: None,
-                    sub: None,
-                });
-                let after = wrap_state(State {
-                    out: Some(s.clone()),
-                    out1: None,
-                    matcher: None,
-                    sub: None,
-                });
-                before.borrow_mut().patch(after.clone());
-                for p in to_patch {
-                    p.borrow_mut().patch(after.clone());
-                }
-                (before, vec![after])
-            }
-            Repetition::OnceOrMore(ref p) => {
-                let (s, to_patch) = p.to_state();
-                let after = wrap_state(State {
-                    out: Some(s.clone()),
-                    out1: None,
-                    matcher: None,
-                    sub: None,
-                });
-                for p in to_patch {
-                    p.borrow_mut().patch(after.clone());
-                }
-                (s, vec![after])
-            }
-            // Specific is 'min' concatenations of a simple state and 'max - min' concatenations of
-            // a ZeroOrOnce state.
-            Repetition::Specific(ref p, min, max_) => {
-                let cap = max_.unwrap_or(min) as usize;
-                assert!(cap >= min as usize);
-                let mut repetition = Vec::with_capacity(cap);
-
-                // Append the minimum required number of occurrences.
-                for _ in 0..min {
-                    repetition.push(p.clone());
-                }
-
-                // If an upper limit is set, append max-min repetitions of ZeroOrOnce states for
-                // the repeated pattern.
-                if let Some(max) = max_ {
-                    for _ in 0..(max - min) {
-                        repetition.push(
-                            Pattern::Repeated(
-                                Box::new(Repetition::ZeroOrOnce(p.clone()))));
-                    }
-                } else {
-                    // If no upper limit is set, append a ZeroOrMore state for the repeated
-                    // pattern.
-                    repetition.push(Pattern::Repeated(Box::new(Repetition::ZeroOrMore(p.clone()))));
-                }
-                Pattern::Concat(repetition).to_state()
-            }
-        }
-    }
-}
-
 /// optimize contains functionality for optimizing the representation of regular expressions for
 /// more efficient matching.
 pub mod optimize {
@@ -475,6 +236,8 @@
         ))
     }
 
+    use compile::start_compile;
+
     #[test]
     fn test_re1() {
         // println!("{:?}", start_compile(simple_re0()));
--- a/src/state.rs	Mon Aug 21 20:34:59 2017 +0200
+++ b/src/state.rs	Wed Nov 08 19:17:19 2017 +0100
@@ -12,13 +12,6 @@
 
 use matcher::{Matchee, Matcher};
 
-/// Types implementing Compile can be compiled into a state graph.
-pub trait Compile {
-    /// to_state returns the start node of a subgraph, and a list of pointers that need to be
-    /// connected to the following subgraph. The list can contain the first tuple element.
-    fn to_state(&self) -> (WrappedState, Vec<WrappedState>);
-}
-
 /// State is a single state that the evaluation can be in. It contains several output states as
 /// well as a matcher.
 #[derive(Debug, Default, Clone)]