changeset 5:b02268b0d4b1 draft

Generalize two-hand expression parsing, and add parser for powers
author Lewin Bormann <lbo@spheniscida.de>
date Wed, 23 Nov 2016 21:13:05 +0100
parents 000d0a69ed00
children ad77c2ad4853
files TODO src/generic_term_parser_experiment.rs src/lib.rs src/old_manual_parsers.rs
diffstat 4 files changed, 209 insertions(+), 157 deletions(-) [+]
line wrap: on
line diff
--- a/TODO	Wed Nov 23 20:53:58 2016 +0100
+++ b/TODO	Wed Nov 23 21:13:05 2016 +0100
@@ -1,3 +1,4 @@
 * Add variable symbols
 * Implement evaluation
-* Implement power operator (^)
+* Implement preprocessing (lexing light)
+* Implement pretty printer
--- a/src/generic_term_parser_experiment.rs	Wed Nov 23 20:53:58 2016 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-
-type TwoHandBuilder = fn(Box<Ast>, Box<Ast>) -> Ast;
-type OperatorMap = HashMap<char, TwoHandBuilder>;
-
-fn term_op_map() -> OperatorMap {
-    let mut map: OperatorMap = HashMap::new();
-    map.insert('*', Ast::Multiplication);
-    map.insert('/', Ast::Division);
-    map
-}
-
-fn expr_op_map() -> OperatorMap {
-    let mut map: OperatorMap = HashMap::new();
-    map.insert('+', Ast::Addition);
-    map.insert('-', Ast::Subtraction);
-    map
-}
-
-fn parse_twohand_expr<'a>(opmap: OperatorMap, st: State<'a>) -> ParseResult {
-    if st.len() == 0 {
-        err("Empty", st)
-    } else {
-        let ops: Vec<char> = opmap.keys().map(|c| *c).collect();
-        // Predicate: Is this an operator we're looking for
-        let _wanted_operator = |c: char| ops.iter().find(|d| c == **d).is_some();
-
-        // Test if addition or subtraction
-        let mut paren_lvl = 0;
-        let mut ix = 0;
-        let mut op_pos: i32 = -1;
-
-        // Look for operator at top level
-        for c in st.iter() {
-            if c == '(' {
-                paren_lvl += 1;
-            } else if c == ')' {
-                paren_lvl -= 1;
-            } else if paren_lvl == 0 && (_wanted_operator(c)) {
-                // yep!
-                op_pos = ix;
-                break;
-            }
-            ix += 1;
-        }
-
-        if op_pos > -1 {
-            let left = pexpr(st.restrict(0, op_pos as usize));
-            let right = pexpr(st.restrict(op_pos as usize + 1, st.len()));
-
-            if left.ok() && right.ok() {
-                match (left, right) {
-                    (ParseResult::Ok(a1), ParseResult::Ok(a2)) => {
-                        if let Some(builder) = opmap.get(&st.at(op_pos as usize)) {
-                            ParseResult::Ok(builder(Box::new(a1), Box::new(a2)))
-                        } else {
-                            err("No matching Ast builder found", st)
-                        }
-                    }
-                    _ => panic!("Expected OK results"),
-                }
-            } else if !left.ok() {
-                err("Left part of term is bad", st)
-            } else {
-                err("Right or both parts of term is|are bad", st)
-            }
-        } else {
-            // XXX This needs to be generic, too
-            sym(st)
-        }
-    }
-}
-
--- a/src/lib.rs	Wed Nov 23 20:53:58 2016 +0100
+++ b/src/lib.rs	Wed Nov 23 21:13:05 2016 +0100
@@ -20,6 +20,7 @@
     Subtraction(Box<Ast>, Box<Ast>),
     Multiplication(Box<Ast>, Box<Ast>),
     Division(Box<Ast>, Box<Ast>),
+    Pow(Box<Ast>, Box<Ast>),
     Num(i64),
 }
 
@@ -177,25 +178,71 @@
             }
         }
     } else {
-        expr(st)
+        // expr(st)
+        parse_twohand_expr(expr_op_map(),
+                           |st2| {
+                               parse_twohand_expr(term_op_map(),
+                                                  |st3| parse_twohand_expr(pow_op_map(), sym, st3),
+                                                  st2)
+                           },
+                           st)
     }
 }
 
-fn expr<'a>(st: State<'a>) -> ParseResult {
+
+type TwoHandBuilder = fn(Box<Ast>, Box<Ast>) -> Ast;
+type OperatorMap = HashMap<char, TwoHandBuilder>;
+
+fn term_op_map() -> OperatorMap {
+    let mut map: OperatorMap = HashMap::new();
+    map.insert('*', Ast::Multiplication);
+    map.insert('/', Ast::Division);
+    map
+}
+
+fn expr_op_map() -> OperatorMap {
+    let mut map: OperatorMap = HashMap::new();
+    map.insert('+', Ast::Addition);
+    map.insert('-', Ast::Subtraction);
+    map
+}
+
+fn pow_op_map() -> OperatorMap {
+    let mut map: OperatorMap = HashMap::new();
+    map.insert('^', Ast::Pow);
+    map
+}
+
+/// Parse any expression that has two sides and an infix operator (e.g. '4*5' or '6+7').
+///
+/// opmap is a map from operator character to a function building an Ast fragment.
+///
+/// then is a function that is called if the original parsing attempt fails (it's a parser for the
+/// next-more-specific construct)
+///
+/// st is the usual state input.
+fn parse_twohand_expr<'a, F>(opmap: OperatorMap, then: F, st: State<'a>) -> ParseResult
+    where F: Fn(State<'a>) -> ParseResult
+{
     if st.len() == 0 {
         err("Empty", st)
     } else {
+        let ops: Vec<char> = opmap.keys().map(|c| *c).collect();
+        // Predicate: Is this an operator we're looking for
+        let _wanted_operator = |c: char| ops.iter().find(|d| c == **d).is_some();
+
         // Test if addition or subtraction
         let mut paren_lvl = 0;
         let mut ix = 0;
         let mut op_pos: i32 = -1;
 
+        // Look for operator at top level
         for c in st.iter() {
             if c == '(' {
                 paren_lvl += 1;
             } else if c == ')' {
                 paren_lvl -= 1;
-            } else if paren_lvl == 0 && (c == '+' || c == '-') {
+            } else if paren_lvl == 0 && (_wanted_operator(c)) {
                 // yep!
                 op_pos = ix;
                 break;
@@ -210,59 +257,10 @@
             if left.ok() && right.ok() {
                 match (left, right) {
                     (ParseResult::Ok(a1), ParseResult::Ok(a2)) => {
-                        if st.at(op_pos as usize) == '-' {
-                            ParseResult::Ok(Ast::Subtraction(Box::new(a1), Box::new(a2)))
+                        if let Some(builder) = opmap.get(&st.at(op_pos as usize)) {
+                            ParseResult::Ok(builder(Box::new(a1), Box::new(a2)))
                         } else {
-                            ParseResult::Ok(Ast::Addition(Box::new(a1), Box::new(a2)))
-                        }
-                    }
-                    _ => panic!("Expected OK results"),
-                }
-            } else if !left.ok() {
-                err("Left part of expr is bad", st)
-            } else {
-                err("Right or both parts of expr is|are bad", st)
-            }
-        } else {
-            term(st)
-        }
-    }
-}
-
-fn term<'a>(st: State<'a>) -> ParseResult {
-    if st.len() == 0 {
-        err("Empty", st)
-    } else {
-        // Test if addition or subtraction
-        let mut paren_lvl = 0;
-        let mut ix = 0;
-        let mut op_pos: i32 = -1;
-
-        // Look for operator at top level
-        for c in st.iter() {
-            if c == '(' {
-                paren_lvl += 1;
-            } else if c == ')' {
-                paren_lvl -= 1;
-            } else if paren_lvl == 0 && (c == '*' || c == '/') {
-                // yep!
-                op_pos = ix;
-                break;
-            }
-            ix += 1;
-        }
-
-        if op_pos > -1 {
-            let left = pexpr(st.restrict(0, op_pos as usize));
-            let right = pexpr(st.restrict(op_pos as usize + 1, st.len()));
-
-            if left.ok() && right.ok() {
-                match (left, right) {
-                    (ParseResult::Ok(a1), ParseResult::Ok(a2)) => {
-                        if st.at(op_pos as usize) == '*' {
-                            ParseResult::Ok(Ast::Multiplication(Box::new(a1), Box::new(a2)))
-                        } else {
-                            ParseResult::Ok(Ast::Division(Box::new(a1), Box::new(a2)))
+                            err("No matching Ast builder found", st)
                         }
                     }
                     _ => panic!("Expected OK results"),
@@ -270,37 +268,11 @@
             } else if !left.ok() {
                 err("Left part of term is bad", st)
             } else {
-                err("Right or both parts of term is|are bad", st)
+                err("Right or both parts of 2-hand expr is|are bad", st)
             }
         } else {
-            sym(st)
-        }
-    }
-}
-fn sym<'a>(st: State<'a>) -> ParseResult {
-    if st.len() == 0 {
-        err("Empty", st)
-    } else if st.at(0) == '-' {
-        match num(st.restrict(1, st.len())) {
-            ParseResult::Ok(Ast::Num(n)) => ParseResult::Ok(Ast::Num(-n)),
-            x => x,
+            then(st)
         }
-    } else {
-        num(st)
-    }
-}
-
-fn num<'a>(st: State<'a>) -> ParseResult {
-    if st.len() == 0 {
-        err("Empty", st)
-    } else if st.iter().all(|c| c >= '0' && c <= '9') {
-        let s = String::from_iter(st.iter());
-        match i64::from_str(&s) {
-            Ok(n) => ParseResult::Ok(Ast::Num(n)),
-            Err(e) => err(&format!("int parse err: {}", e), st),
-        }
-    } else {
-        err("Not a number", st)
     }
 }
 
@@ -360,7 +332,7 @@
         let st = super::state(&v);
 
         assert_eq!(super::pexpr(st),
-                   ParseResult::Err("Right or both parts of expr is|are bad".to_string(),
+                   ParseResult::Err("Right or both parts of 2-hand expr is|are bad".to_string(),
                                     "at 0-2: [2+]".to_string()));
     }
 
@@ -398,4 +370,33 @@
                    ParseResult::Ok(Ast::Addition(Box::new(Ast::Num(3)),
                    Box::new(Ast::Multiplication(Box::new(Ast::Num(6)), Box::new(Ast::Num(2)))))));
     }
+
+    #[test]
+    fn test_pow() {
+        let v = super::str_to_vec("2^4");
+        let st = super::state(&v);
+
+        assert_eq!(super::pexpr(st),
+                   ParseResult::Ok(Ast::Pow(Box::new(Ast::Num(2)), Box::new(Ast::Num(4)))));
+
+        let v = super::str_to_vec("2*3^4");
+        let st = super::state(&v);
+
+        assert_eq!(super::pexpr(st),
+                   ParseResult::Ok(Ast::Multiplication(Box::new(Ast::Num(2)),
+                                                       Box::new(Ast::Pow(Box::new(Ast::Num(3)),
+                                                                         Box::new(Ast::Num(4)))))));
+    }
+
+    #[test]
+    fn test_full() {
+        let v = super::str_to_vec("3+4*5^6");
+        let st = super::state(&v);
+
+        let power = Ast::Pow(Box::new(Ast::Num(5)), Box::new(Ast::Num(6)));
+        let product = Ast::Multiplication(Box::new(Ast::Num(4)), Box::new(power));
+        let sum = Ast::Addition(Box::new(Ast::Num(3)), Box::new(product));
+
+        assert_eq!(super::pexpr(st), ParseResult::Ok(sum));
+    }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/old_manual_parsers.rs	Wed Nov 23 21:13:05 2016 +0100
@@ -0,0 +1,122 @@
+fn expr<'a>(st: State<'a>) -> ParseResult {
+    if st.len() == 0 {
+        err("Empty", st)
+    } else {
+        // Test if addition or subtraction
+        let mut paren_lvl = 0;
+        let mut ix = 0;
+        let mut op_pos: i32 = -1;
+
+        for c in st.iter() {
+            if c == '(' {
+                paren_lvl += 1;
+            } else if c == ')' {
+                paren_lvl -= 1;
+            } else if paren_lvl == 0 && (c == '+' || c == '-') {
+                // yep!
+                op_pos = ix;
+                break;
+            }
+            ix += 1;
+        }
+
+        if op_pos > -1 {
+            let left = pexpr(st.restrict(0, op_pos as usize));
+            let right = pexpr(st.restrict(op_pos as usize + 1, st.len()));
+
+            if left.ok() && right.ok() {
+                match (left, right) {
+                    (ParseResult::Ok(a1), ParseResult::Ok(a2)) => {
+                        if st.at(op_pos as usize) == '-' {
+                            ParseResult::Ok(Ast::Subtraction(Box::new(a1), Box::new(a2)))
+                        } else {
+                            ParseResult::Ok(Ast::Addition(Box::new(a1), Box::new(a2)))
+                        }
+                    }
+                    _ => panic!("Expected OK results"),
+                }
+            } else if !left.ok() {
+                err("Left part of expr is bad", st)
+            } else {
+                err("Right or both parts of expr is|are bad", st)
+            }
+        } else {
+            term(st)
+        }
+    }
+}
+
+fn term<'a>(st: State<'a>) -> ParseResult {
+    if st.len() == 0 {
+        err("Empty", st)
+    } else {
+        // Test if addition or subtraction
+        let mut paren_lvl = 0;
+        let mut ix = 0;
+        let mut op_pos: i32 = -1;
+
+        // Look for operator at top level
+        for c in st.iter() {
+            if c == '(' {
+                paren_lvl += 1;
+            } else if c == ')' {
+                paren_lvl -= 1;
+            } else if paren_lvl == 0 && (c == '*' || c == '/') {
+                // yep!
+                op_pos = ix;
+                break;
+            }
+            ix += 1;
+        }
+
+        if op_pos > -1 {
+            let left = pexpr(st.restrict(0, op_pos as usize));
+            let right = pexpr(st.restrict(op_pos as usize + 1, st.len()));
+
+            if left.ok() && right.ok() {
+                match (left, right) {
+                    (ParseResult::Ok(a1), ParseResult::Ok(a2)) => {
+                        if st.at(op_pos as usize) == '*' {
+                            ParseResult::Ok(Ast::Multiplication(Box::new(a1), Box::new(a2)))
+                        } else {
+                            ParseResult::Ok(Ast::Division(Box::new(a1), Box::new(a2)))
+                        }
+                    }
+                    _ => panic!("Expected OK results"),
+                }
+            } else if !left.ok() {
+                err("Left part of term is bad", st)
+            } else {
+                err("Right or both parts of term is|are bad", st)
+            }
+        } else {
+            sym(st)
+        }
+    }
+}
+fn sym<'a>(st: State<'a>) -> ParseResult {
+    if st.len() == 0 {
+        err("Empty", st)
+    } else if st.at(0) == '-' {
+        match num(st.restrict(1, st.len())) {
+            ParseResult::Ok(Ast::Num(n)) => ParseResult::Ok(Ast::Num(-n)),
+            x => x,
+        }
+    } else {
+        num(st)
+    }
+}
+
+fn num<'a>(st: State<'a>) -> ParseResult {
+    if st.len() == 0 {
+        err("Empty", st)
+    } else if st.iter().all(|c| c >= '0' && c <= '9') {
+        let s = String::from_iter(st.iter());
+        match i64::from_str(&s) {
+            Ok(n) => ParseResult::Ok(Ast::Num(n)),
+            Err(e) => err(&format!("int parse err: {}", e), st),
+        }
+    } else {
+        err("Not a number", st)
+    }
+}