Mercurial > lbo > hg > rex
changeset 49:6662ff63ce90
Add many more tests and optimize REs before matching
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Fri, 19 Jul 2019 18:16:18 +0200 |
parents | 5e245f1851d7 |
children | f0843388fd96 |
files | README.md src/tests.rs |
diffstat | 2 files changed, 81 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/README.md Fri Jul 19 18:14:40 2019 +0200 +++ b/README.md Fri Jul 19 18:16:18 2019 +0200 @@ -3,3 +3,8 @@ rex is a mere playground where I'm trying to write an engine for parsing, compiling and matching regular expressions. Consider the code licensed under the MIT license. + +## Bugs + +* Submatches can not start at the same position. +* Regular expressions matching empty strings do not match empty strings.
--- a/src/tests.rs Fri Jul 19 18:14:40 2019 +0200 +++ b/src/tests.rs Fri Jul 19 18:16:18 2019 +0200 @@ -2,11 +2,12 @@ //! A general test suite aiming for wide coverage of positive and negative matches. -use crate::{compile, matching, parse}; +use crate::{compile, matching, parse, repr}; fn match_re(re: &str, s: &str) -> (bool, Vec<(usize, usize)>) { let parsed = parse::parse(re).unwrap(); - let ready = compile::start_compile(&parsed); + let optimized = repr::optimize::optimize(parsed); + let ready = compile::start_compile(&optimized); matching::do_match(ready, s) } @@ -20,3 +21,76 @@ assert!(!match_re("a+", "").0); assert!(!match_re("aa+$", "aaabc").0); } + +#[test] +fn test_specific_repeat() { + assert!(match_re("a{1,3}", "a").0); + assert!(match_re("a{1,3}", "aa").0); + assert!(match_re("a{1,3}", "aaa").0); + assert!(match_re("a{1,3}", "aaaa").0); + + assert!(match_re("a?", "a").0); + // (bug) + //assert!(match_re("a?", "").0); + assert!(match_re("xa?", "x").0); + + assert!(!match_re("a{1,3}$", "aaaa").0); + assert!(match_re("a{1,3}a$", "aaaa").0); + assert!(match_re("a{1,3}b$", "aaab").0); + assert!(!match_re("^a{1,3}$", "xaaa").0); + assert_eq!(vec![(1, 4)], match_re("a{1,3}$", "xaaa").1); + + assert!(match_re("a{3}", "aaa").0); + assert!(match_re("a{0,3}", "a").0); + assert!(match_re("xa{,3}", "x").0); + // (bug) + //assert!(match_re("xa{,3}", "").0); + assert!(match_re("a{,3}", "a").0); + assert!(match_re("a{,3}", "aa").0); + assert!(match_re("a{,3}", "aaa").0); + + assert!(match_re("a{3,}", "aaa").0); + assert!(match_re("a{3,}", "aaaa").0); +} + +#[test] +fn test_character_classes() { + assert!(match_re("^[a-z]{1,3}$", "abc").0); + assert!(!match_re("^[a-z]{1,3}$", "Abc").0); + assert!(match_re("^[A-z]{1,3}$", "Abc").0); + assert!(!match_re("^[A-Z]{1,3}$", "Abc").0); + assert!(match_re("^[A-z]{1,3}$", "Abc").0); + assert!(!match_re("^[a-Z]{1,3}$", "Abc").0); + assert!(match_re("^[0-9]{1,3}$", "012").0); + assert!(match_re("^[0-9]{1,3}$", "02").0); +} + +#[test] +fn test_anchoring() { + assert!(match_re("abc", "012abcdef").0); + assert!(!match_re("^abc", "012abcdef").0); + assert!(!match_re("abc$", "012abcdef").0); + assert!(!match_re("^abc$", "012abcdef").0); + assert!(match_re("^abc", "abc").0); + assert!(match_re("abc$", "abc").0); +} + +#[test] +fn test_alternate() { + assert!(match_re("a|bc|d", "a").0); + assert!(match_re("a|bc|d", "d").0); + assert!(!match_re("a|bc|d", "b").0); + assert!(match_re("a|bc|d", "bc").0); +} + +#[test] +fn test_submatches() { + assert_eq!(vec![(0, 3)], match_re("abc", "abcde").1); + assert_eq!(vec![(1, 4)], match_re("abc", "0abcde").1); + assert_eq!(vec![(1, 4), (2, 3)], match_re("a(b)c", "0abcde").1); + assert_eq!(vec![(1, 4), (2, 3)], match_re("a(.)c", "0abcde").1); + assert_eq!( + vec![(1, 6), (2, 5), (3, 4)], + match_re("a(b(.)d)e", "0abcde").1 + ); +}