changeset 13:5eb364ffbdcb

Improve Integer parsing and extend it to all integer types
author Lewin Bormann <lewin@lewin-bormann.info>
date Sun, 02 Jun 2019 23:11:00 +0200
parents a37d7c2aa256
children 4122b5f7562b
files src/combinators.rs src/primitives.rs
diffstat 2 files changed, 137 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/src/combinators.rs	Sun Jun 02 15:19:00 2019 +0200
+++ b/src/combinators.rs	Sun Jun 02 23:11:00 2019 +0200
@@ -1,5 +1,3 @@
-use std::ops::Shr;
-
 use crate::parser::{ParseError, ParseResult, Parser};
 use crate::state::ParseState;
 
@@ -312,7 +310,7 @@
 
     #[test]
     fn test_pair() {
-        let mut p = Sequence::new((Int, StringParser::new(" aba".to_string())));
+        let mut p = Sequence::new((Int64::new(), StringParser::new(" aba".to_string())));
         let mut ps = ParseState::new("123 aba");
         assert_eq!(Ok((123, " aba".to_string())), p.parse(&mut ps));
     }
@@ -345,7 +343,7 @@
             StringParser::new("ab"),
             StringParser::new("de"),
             StringParser::new(" "),
-            Transform::new(Int, |i| Ok(i.to_string())),
+            Transform::new(Int64::new(), |i| Ok(i.to_string())),
         ));
         let mut ps = ParseState::new("de 34");
         assert_eq!(Ok("de".to_string()), p.parse(&mut ps));
@@ -355,7 +353,7 @@
 
     #[test]
     fn test_partial_sequence() {
-        let mut p = PartialSequence::new((StringParser::new("a"), StringParser::new("c"), Int));
+        let mut p = PartialSequence::new((StringParser::new("a"), StringParser::new("c"), Int64::new()));
         let mut ps = ParseState::new("acde");
         assert_eq!(
             Ok((Some("a".to_string()), Some("c".to_string()), None)),
@@ -363,7 +361,7 @@
         );
 
         let mut p = PartialSequence::new((
-            Sequence::new((Int, StringParser::new(" "), Int)),
+            Sequence::new((Int64::new(), StringParser::new(" "), Int64::new())),
             StringParser::new("x"),
         ));
         let mut ps = ParseState::new("12 -12 nothing else");
--- a/src/primitives.rs	Sun Jun 02 15:19:00 2019 +0200
+++ b/src/primitives.rs	Sun Jun 02 23:11:00 2019 +0200
@@ -3,7 +3,9 @@
 use crate::state::ParseState;
 
 use std::collections::HashSet;
+use std::error::Error;
 use std::iter::FromIterator;
+use std::str;
 
 pub struct StringParser(String);
 
@@ -44,29 +46,75 @@
     }
 }
 
-pub struct Int;
+pub struct Int<IType: Default + str::FromStr>(IType);
 
-impl Parser for Int {
-    type Result = i64;
+pub type Int128 = Int<i128>;
+pub type Int64 = Int<i64>;
+pub type Int32 = Int<i32>;
+pub type Int16 = Int<i16>;
+pub type Int8 = Int<i8>;
+pub type Uint128 = Int<u128>;
+pub type Uint64 = Int<u64>;
+pub type Uint32 = Int<u32>;
+pub type Uint16 = Int<u16>;
+pub type Uint8 = Int<u8>;
+
+impl<IType: Default + str::FromStr> Int<IType> {
+    pub fn new() -> Int<IType> {
+        Int(IType::default())
+    }
+}
+
+impl<IType: Default + str::FromStr<Err = std::num::ParseIntError> + std::convert::TryFrom<i8>>
+    Parser for Int<IType>
+{
+    type Result = IType;
     fn parse(
         &mut self,
         st: &mut ParseState<impl Iterator<Item = char>>,
     ) -> ParseResult<Self::Result> {
-        let mut negative: i64 = 1;
-        let mut result: i64 = 0;
+        // Optimization for most ints.
+        const BUFSIZE: usize = 8;
+        let mut buf: [char; BUFSIZE] = [' '; BUFSIZE];
+        let mut widebuf: Option<Vec<char>> = None;
+        let mut i = 0;
 
-        match st.peek() {
-            None => return Err(ParseError::EOF),
-            Some('-') => negative = -1,
-            Some(c) if c.is_digit(10) => result = result * 10 + ((c as i64) - ('0' as i64)),
-            Some(_) => return Err(ParseError::Fail("not an int", st.index())),
+        if IType::try_from(-1 as i8).is_ok() {
+            // Check for negative sign, only if integer can be signed.
+            match st.peek() {
+                None => return Err(ParseError::EOF),
+                Some('-') => {
+                    buf[i] = '-';
+                    i += 1;
+                }
+                Some(c) if c.is_digit(10) => {
+                    buf[i] = c;
+                    i += 1;
+                }
+                Some(_) => return Err(ParseError::Fail("not start of integer", st.index())),
+            }
         }
+
         let hold = st.hold();
-        st.next();
+        if i > 0 {
+            st.next();
+        }
 
+        // Consume digits
         loop {
             match st.next() {
-                Some(c) if c.is_digit(10) => result = result * 10 + ((c as i64) - ('0' as i64)),
+                Some(c) if c.is_digit(10) => {
+                    if widebuf.is_none() {
+                        buf[i] = c;
+                        i += 1;
+                        if i >= BUFSIZE {
+                            widebuf = Some(buf.to_vec());
+                        }
+                    } else {
+                        widebuf.as_mut().unwrap().push(c);
+                        i += 1;
+                    }
+                }
                 Some(_) => {
                     st.undo_next();
                     break;
@@ -74,16 +122,39 @@
                 None => break,
             }
         }
-        st.release(hold);
-        return Ok(result * negative);
+        if i == 0 {
+            st.reset(hold);
+            return Err(ParseError::Fail("no appropriate integer found", st.index()));
+        }
+        let intstr: String;
+        if widebuf.is_none() {
+            intstr = buf[..i].iter().collect();
+        } else {
+            intstr = widebuf.unwrap().iter().collect();
+        }
+        match IType::from_str(&intstr) {
+            Ok(i) => {
+                st.release(hold);
+                Ok(i)
+            }
+            Err(e) => {
+                st.reset(hold);
+                Err(ParseError::ExecFail(e.description().to_string()))
+            }
+        }
     }
 }
 
-pub struct OneOf(HashSet<char>);
+/// OneOf matches any character that is in its specification.
+pub struct OneOf(HashSet<char>, bool);
 
 impl OneOf {
     pub fn new<S: AsRef<str>>(chars: S) -> OneOf {
-        OneOf(chars.as_ref().chars().collect())
+        OneOf(chars.as_ref().chars().collect(), false)
+    }
+    /// Create a OneOf parser that parses all characters *not* in the given set.
+    pub fn new_inverse<S: AsRef<str>>(chars: S) -> OneOf {
+        OneOf(chars.as_ref().chars().collect(), true)
     }
 }
 
@@ -94,11 +165,24 @@
         st: &mut ParseState<impl Iterator<Item = char>>,
     ) -> ParseResult<Self::Result> {
         match st.peek() {
-            Some(c) if self.0.contains(&c) => {
-                st.next();
-                Ok(c)
+            Some(c) => {
+                let present = self.0.contains(&c);
+                if self.1 {
+                    // Inverse mode
+                    if present {
+                        return Err(ParseError::Fail("char (inverse) not matched", st.index()));
+                    }
+                    st.next();
+                    return Ok(c);
+                } else {
+                    if present {
+                        st.next();
+                        return Ok(c);
+                    }
+                    return Err(ParseError::Fail("char not matched", st.index()));
+                }
             }
-            _ => Err(ParseError::Fail("char not matched", st.index())),
+            _ => Err(ParseError::EOF),
         }
     }
 }
@@ -111,6 +195,14 @@
     rp.apply(make_string)
 }
 
+/// A parser that parses a string consisting of any characters not in the set.
+fn string_none_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> {
+    let oo = OneOf::new_inverse(chars);
+    let rp = Repeat::new(oo, rp);
+    let make_string = |charvec: Vec<char>| Ok(String::from_iter(charvec.into_iter()));
+    rp.apply(make_string)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -125,12 +217,24 @@
 
     #[test]
     fn test_parse_int() {
-        let mut s = ParseState::new("-1252 353");
-        let mut ip = Int;
+        let mut s = ParseState::new("-1252 353 354 -1253");
+        let mut ip = Int64::new();
+        let mut up = Uint64::new();
         let mut sp = StringParser::new(" ".to_string());
         assert_eq!(Ok(-1252), ip.parse(&mut s));
         assert_eq!(Ok(" ".to_string()), sp.parse(&mut s));
         assert_eq!(Ok(353), ip.parse(&mut s));
+        assert_eq!(Ok(" ".to_string()), sp.parse(&mut s));
+        assert_eq!(Ok(354), up.parse(&mut s));
+        assert_eq!(Ok(" ".to_string()), sp.parse(&mut s));
+        assert!(up.parse(&mut s).is_err());
+    }
+
+    #[test]
+    fn test_parse_long_int() {
+        let mut s = ParseState::new("123456789");
+        let mut up = Uint128::new();
+        assert_eq!(Ok(123456789 as u128), up.parse(&mut s));
     }
 
     #[test]
@@ -139,4 +243,11 @@
         let mut p = string_of("abcd", RepeatSpec::Min(1));
         assert_eq!(Ok("aaabc".to_string()), p.parse(&mut st));
     }
+
+    #[test]
+    fn test_string_none_of() {
+        let mut st = ParseState::new("aaabcxxzy");
+        let mut p = string_none_of("xyz", RepeatSpec::Min(1));
+        assert_eq!(Ok("aaabc".to_string()), p.parse(&mut st));
+    }
 }