view src/lib.rs @ 5:b02268b0d4b1 draft

Generalize two-hand expression parsing, and add parser for powers
author Lewin Bormann <lbo@spheniscida.de>
date Wed, 23 Nov 2016 21:13:05 +0100
parents 000d0a69ed00
children ad77c2ad4853
line wrap: on
line source

/// Syntax (so far)
///
/// pexpr ::= '(' pexpr ')' | expr
/// expr  ::= pexpr (+|-) pexpr | term
/// term  ::= pexpr (*|/) pexpr | sym
/// sym   ::= -num | num
/// num   ::= 0..9*
///
/// May be expanded by using variables.
///

use std::iter::FromIterator;
use std::str::FromStr;
use std::collections::HashMap;

/// Expression tree
#[derive(Debug, PartialEq)]
enum Ast {
    Addition(Box<Ast>, Box<Ast>),
    Subtraction(Box<Ast>, Box<Ast>),
    Multiplication(Box<Ast>, Box<Ast>),
    Division(Box<Ast>, Box<Ast>),
    Pow(Box<Ast>, Box<Ast>),
    Num(i64),
}

#[derive(Debug)]
struct State<'a> {
    from: usize,
    to: usize,

    text: &'a Vec<char>,
}

impl<'a> State<'a> {
    /// Returns the ix'th character starting from the current parse offset (self.from)
    fn at(&self, ix: usize) -> char {
        if ix > (self.to - self.from) {
            panic!("")
        } else {
            self.text[self.from + ix]
        }
    }

    fn len(&self) -> usize {
        self.to - self.from
    }

    fn restrict(&self, from: usize, to: usize) -> State<'a> {
        if from > to || (to > (self.to - self.from)) {
            panic!(format!("Bad indices ({},{}) in restrict on ({},{})",
                           from,
                           to,
                           self.from,
                           self.to))
        } else {
            State {
                text: self.text,
                from: self.from + from,
                to: self.from + to,
            }
        }
    }

    fn iter(&self) -> StateIter<'a> {
        StateIter {
            ix: self.from,
            max: self.to,
            text: self.text,
        }
    }
}

struct StateIter<'a> {
    ix: usize,
    max: usize,
    text: &'a Vec<char>,
}

impl<'a> Iterator for StateIter<'a> {
    type Item = char;

    fn next(&mut self) -> Option<char> {
        if self.ix < self.max {
            self.ix += 1;
            Some(self.text[self.ix - 1])
        } else {
            None
        }
    }
}

#[derive(Debug, PartialEq)]
enum ParseResult {
    Ok(Ast),
    Err(String, String),
}

impl ParseResult {
    fn err(&self) -> String {
        match self {
            &ParseResult::Ok(_) => String::new(),
            &ParseResult::Err(ref s, ref t) => format!("{} <=> {}", s, t),
        }
    }

    fn ok(&self) -> bool {
        match self {
            &ParseResult::Ok(_) => true,
            _ => false,
        }
    }
}

fn err<'a>(s: &str, st: State<'a>) -> ParseResult {
    ParseResult::Err(s.to_string(),
                     format!("at {}-{}: [{}]",
                             st.from,
                             st.to,
                             String::from_iter(st.iter())))
}

// Parse helpers

fn str_to_vec(s: &str) -> Vec<char> {
    s.chars().collect()
}

fn state<'a>(text: &'a Vec<char>) -> State<'a> {
    State {
        text: text,
        from: 0,
        to: text.len(),
    }
}

/// takes a preprocessed input string (no whitespace)
fn enter(text: Vec<char>) -> ParseResult {
    let st = State {
        from: 0,
        to: text.len(),
        text: &text,
    };
    pexpr(st)
}

// Parsers

/// An expression in parentheses
fn pexpr<'a>(st: State<'a>) -> ParseResult {
    if st.len() == 0 {
        err("Empty", st)
    } else if st.at(0) == '(' {
        if st.at(st.len() - 1) == ')' {
            // parentheses around expression; parse subexpression
            pexpr(st.restrict(1, st.len() - 1))
        } else {
            // Make sure that paren has matching close
            let mut paren_lvl = 0;
            let mut good = false;
            for c in st.iter() {
                if c == '(' {
                    paren_lvl += 1;
                } else if c == ')' && paren_lvl > 1 {
                    paren_lvl -= 1;
                } else if c == ')' {
                    good = true;
                    break;
                } else {
                }
            }

            // Paren is closed somewhere
            if good {
                expr(st)
            } else {
                err("Missing closing paren", st)
            }
        }
    } else {
        // expr(st)
        parse_twohand_expr(expr_op_map(),
                           |st2| {
                               parse_twohand_expr(term_op_map(),
                                                  |st3| parse_twohand_expr(pow_op_map(), sym, st3),
                                                  st2)
                           },
                           st)
    }
}


type TwoHandBuilder = fn(Box<Ast>, Box<Ast>) -> Ast;
type OperatorMap = HashMap<char, TwoHandBuilder>;

fn term_op_map() -> OperatorMap {
    let mut map: OperatorMap = HashMap::new();
    map.insert('*', Ast::Multiplication);
    map.insert('/', Ast::Division);
    map
}

fn expr_op_map() -> OperatorMap {
    let mut map: OperatorMap = HashMap::new();
    map.insert('+', Ast::Addition);
    map.insert('-', Ast::Subtraction);
    map
}

fn pow_op_map() -> OperatorMap {
    let mut map: OperatorMap = HashMap::new();
    map.insert('^', Ast::Pow);
    map
}

/// Parse any expression that has two sides and an infix operator (e.g. '4*5' or '6+7').
///
/// opmap is a map from operator character to a function building an Ast fragment.
///
/// then is a function that is called if the original parsing attempt fails (it's a parser for the
/// next-more-specific construct)
///
/// st is the usual state input.
fn parse_twohand_expr<'a, F>(opmap: OperatorMap, then: F, st: State<'a>) -> ParseResult
    where F: Fn(State<'a>) -> ParseResult
{
    if st.len() == 0 {
        err("Empty", st)
    } else {
        let ops: Vec<char> = opmap.keys().map(|c| *c).collect();
        // Predicate: Is this an operator we're looking for
        let _wanted_operator = |c: char| ops.iter().find(|d| c == **d).is_some();

        // Test if addition or subtraction
        let mut paren_lvl = 0;
        let mut ix = 0;
        let mut op_pos: i32 = -1;

        // Look for operator at top level
        for c in st.iter() {
            if c == '(' {
                paren_lvl += 1;
            } else if c == ')' {
                paren_lvl -= 1;
            } else if paren_lvl == 0 && (_wanted_operator(c)) {
                // yep!
                op_pos = ix;
                break;
            }
            ix += 1;
        }

        if op_pos > -1 {
            let left = pexpr(st.restrict(0, op_pos as usize));
            let right = pexpr(st.restrict(op_pos as usize + 1, st.len()));

            if left.ok() && right.ok() {
                match (left, right) {
                    (ParseResult::Ok(a1), ParseResult::Ok(a2)) => {
                        if let Some(builder) = opmap.get(&st.at(op_pos as usize)) {
                            ParseResult::Ok(builder(Box::new(a1), Box::new(a2)))
                        } else {
                            err("No matching Ast builder found", st)
                        }
                    }
                    _ => panic!("Expected OK results"),
                }
            } else if !left.ok() {
                err("Left part of term is bad", st)
            } else {
                err("Right or both parts of 2-hand expr is|are bad", st)
            }
        } else {
            then(st)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use super::{ParseResult, Ast};

    #[test]
    fn test_num() {
        let v = super::str_to_vec("234");
        let st = super::state(&v);

        assert_eq!(super::num(st), ParseResult::Ok(Ast::Num(234)));
    }

    #[test]
    fn test_sym() {
        let v = super::str_to_vec("234");
        let st = super::state(&v);

        assert_eq!(super::sym(st), ParseResult::Ok(Ast::Num(234)));

        let v = super::str_to_vec("-234");
        let st = super::state(&v);
        assert_eq!(super::sym(st), ParseResult::Ok(Ast::Num(-234)));

        let v = super::str_to_vec("-a");
        let st = super::state(&v);
        assert_eq!(super::sym(st).err(),
                   "Not a number <=> at 1-2: [a]".to_string());
    }

    #[test]
    fn test_expr() {
        let v = super::str_to_vec("2+4");
        let st = super::state(&v);

        assert_eq!(super::expr(st),
                   ParseResult::Ok(Ast::Addition(Box::new(Ast::Num(2)), Box::new(Ast::Num(4)))));

        let v = super::str_to_vec("2-4");
        let st = super::state(&v);

        assert_eq!(super::expr(st),
                   ParseResult::Ok(Ast::Subtraction(Box::new(Ast::Num(2)), Box::new(Ast::Num(4)))));

        let v = super::str_to_vec("2+4+5");
        let st = super::state(&v);

        assert_eq!(super::expr(st),
                   ParseResult::Ok(Ast::Addition(Box::new(Ast::Num(2)),
                                                 Box::new(Ast::Addition(Box::new(Ast::Num(4)),
                                                                        Box::new(Ast::Num(5)))))));

        let v = super::str_to_vec("2+");
        let st = super::state(&v);

        assert_eq!(super::pexpr(st),
                   ParseResult::Err("Right or both parts of 2-hand expr is|are bad".to_string(),
                                    "at 0-2: [2+]".to_string()));
    }

    #[test]
    fn test_term() {
        let v = super::str_to_vec("3*6");
        let st = super::state(&v);

        assert_eq!(super::pexpr(st),
                   ParseResult::Ok(Ast::Multiplication(Box::new(Ast::Num(3)),
                                                       Box::new(Ast::Num(6)))));
    }

    #[test]
    fn test_pexpr() {
        let v = super::str_to_vec("((3*6))");
        let st = super::state(&v);

        assert_eq!(super::pexpr(st),
                   ParseResult::Ok(Ast::Multiplication(Box::new(Ast::Num(3)),
                                                       Box::new(Ast::Num(6)))));

        let v = super::str_to_vec("((3+6)*2)");
        let st = super::state(&v);

        assert_eq!(super::pexpr(st),
                   ParseResult::Ok(
                       Ast::Multiplication(Box::new(Ast::Addition(Box::new(Ast::Num(3)),
                       Box::new(Ast::Num(6)))), Box::new(Ast::Num(2)))));

        let v = super::str_to_vec("(3+6*2)");
        let st = super::state(&v);

        assert_eq!(super::pexpr(st),
                   ParseResult::Ok(Ast::Addition(Box::new(Ast::Num(3)),
                   Box::new(Ast::Multiplication(Box::new(Ast::Num(6)), Box::new(Ast::Num(2)))))));
    }

    #[test]
    fn test_pow() {
        let v = super::str_to_vec("2^4");
        let st = super::state(&v);

        assert_eq!(super::pexpr(st),
                   ParseResult::Ok(Ast::Pow(Box::new(Ast::Num(2)), Box::new(Ast::Num(4)))));

        let v = super::str_to_vec("2*3^4");
        let st = super::state(&v);

        assert_eq!(super::pexpr(st),
                   ParseResult::Ok(Ast::Multiplication(Box::new(Ast::Num(2)),
                                                       Box::new(Ast::Pow(Box::new(Ast::Num(3)),
                                                                         Box::new(Ast::Num(4)))))));
    }

    #[test]
    fn test_full() {
        let v = super::str_to_vec("3+4*5^6");
        let st = super::state(&v);

        let power = Ast::Pow(Box::new(Ast::Num(5)), Box::new(Ast::Num(6)));
        let product = Ast::Multiplication(Box::new(Ast::Num(4)), Box::new(power));
        let sum = Ast::Addition(Box::new(Ast::Num(3)), Box::new(product));

        assert_eq!(super::pexpr(st), ParseResult::Ok(sum));
    }
}