changeset 60:197ee4af43af

More info about notorious regex.
author Lewin Bormann <lbo@spheniscida.de>
date Fri, 30 Aug 2019 16:30:59 +0200
parents 46bbd73f7dcb
children 987e10c9e6fe
files src/lib.rs src/matching.rs src/tests.rs
diffstat 3 files changed, 10 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib.rs	Fri Aug 30 16:30:41 2019 +0200
+++ b/src/lib.rs	Fri Aug 30 16:30:59 2019 +0200
@@ -24,7 +24,7 @@
 
 /// Parse, compile, and match a regular expression. Not recommended for repeated use, as the
 /// regular expression will be compiled every time. Use `compile()` and `match_re()` to make this
-/// more efficient.
+/// more efficient (about 3x faster).
 pub fn match_re_str(re: &str, s: &str) -> Result<(bool, Vec<(usize, usize)>), String> {
     return Ok(compile_and_match(&parse::parse(re)?, s));
 }
--- a/src/matching.rs	Fri Aug 30 16:30:41 2019 +0200
+++ b/src/matching.rs	Fri Aug 30 16:30:59 2019 +0200
@@ -154,6 +154,10 @@
 
             // We only clone the current state if there's a fork in the graph. Otherwise we reuse
             // the old state.
+            //
+            // NOTE: This is what causes exponential cost of parsing "notorious" regular
+            // expressions. It is easy to implement; it would be better to at most create one new
+            // state, and construct more lazily.
             if next1.is_some() && next2.is_some() {
                 // If the current state matched, or it didn't have a matcher, push next states into
                 // list of next states.
--- a/src/tests.rs	Fri Aug 30 16:30:41 2019 +0200
+++ b/src/tests.rs	Fri Aug 30 16:30:59 2019 +0200
@@ -16,6 +16,11 @@
 }
 
 #[test]
+fn test_notorious_graph() {
+    render_graph("(x+x+)+y");
+}
+
+#[test]
 fn test_simple_repeat() {
     assert!(match_re("a+", "aaa").0);
     assert!(match_re("aaa+", "aaa").0);