changeset 49:6662ff63ce90

Add many more tests and optimize REs before matching
author Lewin Bormann <lbo@spheniscida.de>
date Fri, 19 Jul 2019 18:16:18 +0200
parents 5e245f1851d7
children f0843388fd96
files README.md src/tests.rs
diffstat 2 files changed, 81 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Fri Jul 19 18:14:40 2019 +0200
+++ b/README.md	Fri Jul 19 18:16:18 2019 +0200
@@ -3,3 +3,8 @@
 rex is a mere playground where I'm trying to write an engine for parsing, compiling and matching regular expressions.
 
 Consider the code licensed under the MIT license.
+
+## Bugs
+
+* Submatches can not start at the same position.
+* Regular expressions matching empty strings do not match empty strings.
--- a/src/tests.rs	Fri Jul 19 18:14:40 2019 +0200
+++ b/src/tests.rs	Fri Jul 19 18:16:18 2019 +0200
@@ -2,11 +2,12 @@
 
 //! A general test suite aiming for wide coverage of positive and negative matches.
 
-use crate::{compile, matching, parse};
+use crate::{compile, matching, parse, repr};
 
 fn match_re(re: &str, s: &str) -> (bool, Vec<(usize, usize)>) {
     let parsed = parse::parse(re).unwrap();
-    let ready = compile::start_compile(&parsed);
+    let optimized = repr::optimize::optimize(parsed);
+    let ready = compile::start_compile(&optimized);
     matching::do_match(ready, s)
 }
 
@@ -20,3 +21,76 @@
     assert!(!match_re("a+", "").0);
     assert!(!match_re("aa+$", "aaabc").0);
 }
+
+#[test]
+fn test_specific_repeat() {
+    assert!(match_re("a{1,3}", "a").0);
+    assert!(match_re("a{1,3}", "aa").0);
+    assert!(match_re("a{1,3}", "aaa").0);
+    assert!(match_re("a{1,3}", "aaaa").0);
+
+    assert!(match_re("a?", "a").0);
+    // (bug)
+    //assert!(match_re("a?", "").0);
+    assert!(match_re("xa?", "x").0);
+
+    assert!(!match_re("a{1,3}$", "aaaa").0);
+    assert!(match_re("a{1,3}a$", "aaaa").0);
+    assert!(match_re("a{1,3}b$", "aaab").0);
+    assert!(!match_re("^a{1,3}$", "xaaa").0);
+    assert_eq!(vec![(1, 4)], match_re("a{1,3}$", "xaaa").1);
+
+    assert!(match_re("a{3}", "aaa").0);
+    assert!(match_re("a{0,3}", "a").0);
+    assert!(match_re("xa{,3}", "x").0);
+    // (bug)
+    //assert!(match_re("xa{,3}", "").0);
+    assert!(match_re("a{,3}", "a").0);
+    assert!(match_re("a{,3}", "aa").0);
+    assert!(match_re("a{,3}", "aaa").0);
+
+    assert!(match_re("a{3,}", "aaa").0);
+    assert!(match_re("a{3,}", "aaaa").0);
+}
+
+#[test]
+fn test_character_classes() {
+    assert!(match_re("^[a-z]{1,3}$", "abc").0);
+    assert!(!match_re("^[a-z]{1,3}$", "Abc").0);
+    assert!(match_re("^[A-z]{1,3}$", "Abc").0);
+    assert!(!match_re("^[A-Z]{1,3}$", "Abc").0);
+    assert!(match_re("^[A-z]{1,3}$", "Abc").0);
+    assert!(!match_re("^[a-Z]{1,3}$", "Abc").0);
+    assert!(match_re("^[0-9]{1,3}$", "012").0);
+    assert!(match_re("^[0-9]{1,3}$", "02").0);
+}
+
+#[test]
+fn test_anchoring() {
+    assert!(match_re("abc", "012abcdef").0);
+    assert!(!match_re("^abc", "012abcdef").0);
+    assert!(!match_re("abc$", "012abcdef").0);
+    assert!(!match_re("^abc$", "012abcdef").0);
+    assert!(match_re("^abc", "abc").0);
+    assert!(match_re("abc$", "abc").0);
+}
+
+#[test]
+fn test_alternate() {
+    assert!(match_re("a|bc|d", "a").0);
+    assert!(match_re("a|bc|d", "d").0);
+    assert!(!match_re("a|bc|d", "b").0);
+    assert!(match_re("a|bc|d", "bc").0);
+}
+
+#[test]
+fn test_submatches() {
+    assert_eq!(vec![(0, 3)], match_re("abc", "abcde").1);
+    assert_eq!(vec![(1, 4)], match_re("abc", "0abcde").1);
+    assert_eq!(vec![(1, 4), (2, 3)], match_re("a(b)c", "0abcde").1);
+    assert_eq!(vec![(1, 4), (2, 3)], match_re("a(.)c", "0abcde").1);
+    assert_eq!(
+        vec![(1, 6), (2, 5), (3, 4)],
+        match_re("a(b(.)d)e", "0abcde").1
+    );
+}