changeset 0:6286f0ed89a7

Initial commit
author Lewin Bormann <lewin@lewin-bormann.info>
date Wed, 29 May 2019 23:32:37 +0200
parents
children 0ad857b005ae
files .gitignore Cargo.toml src/lib.rs src/parser.rs src/primitives.rs src/state.rs
diffstat 6 files changed, 245 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.gitignore	Wed May 29 23:32:37 2019 +0200
@@ -0,0 +1,3 @@
+/target
+**/*.rs.bk
+Cargo.lock
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Cargo.toml	Wed May 29 23:32:37 2019 +0200
@@ -0,0 +1,10 @@
+[package]
+name = "rcombinators"
+version = "0.1.0"
+authors = ["Lewin Bormann <lewin@lewin-bormann.info>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+utf8reader = "0.1.0"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib.rs	Wed May 29 23:32:37 2019 +0200
@@ -0,0 +1,5 @@
+#![allow(dead_code)]
+
+mod state;
+mod parser;
+mod primitives;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parser.rs	Wed May 29 23:32:37 2019 +0200
@@ -0,0 +1,26 @@
+
+use std::fmt;
+
+use crate::state::ParseState;
+
+#[derive(Debug, PartialEq)]
+pub enum ParseError {
+    EOF,
+    Fail(&'static str, usize),
+}
+
+impl fmt::Display for ParseError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            ParseError::EOF => f.write_str("EOF"),
+            ParseError::Fail(s, pos) => write!(f, "Parse fail: {} at {}", s, pos),
+        }
+    }
+}
+
+pub type ParseResult<R> = Result<R, ParseError>;
+
+pub trait Parser {
+    type Result;
+    fn parse(&mut self, st: &mut ParseState<impl Iterator<Item=char>>) -> ParseResult<Self::Result>;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/primitives.rs	Wed May 29 23:32:37 2019 +0200
@@ -0,0 +1,47 @@
+
+use crate::state::ParseState;
+use crate::parser::{Parser, ParseResult, ParseError};
+
+pub struct StringParser(pub String);
+
+impl Parser for StringParser {
+    type Result = String;
+    fn parse(&mut self, st: &mut ParseState<impl Iterator<Item=char>>) -> ParseResult<Self::Result> {
+        let mut cs = self.0.chars();
+        let expect = self.0.len();
+        let mut have = 0;
+        let hold = st.hold();
+        loop {
+            let (next, pk) = (cs.next(), st.peek());
+            if next.is_none() || pk.is_none() {
+                break
+            }
+            if next != pk {
+                break
+            }
+            let c = st.next().unwrap();
+            have += c.len_utf8();
+        }
+        if expect == have {
+            st.release(hold);
+            return Ok(self.0.clone());
+        }
+        let ix = st.index();
+        st.reset(hold);
+        return Err(ParseError::Fail("string not matched", ix))
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_string() {
+        let mut s = super::ParseState::new("abc def");
+        let mut p = StringParser("abc ".to_owned());
+        assert_eq!(Ok("abc ".to_owned()), p.parse(&mut s));
+        assert_eq!(4, s.index());
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/state.rs	Wed May 29 23:32:37 2019 +0200
@@ -0,0 +1,154 @@
+use std::io;
+use std::str::Chars;
+
+use utf8reader;
+
+struct UTF8Reader<R: io::Read>(utf8reader::UTF8Reader<R>);
+
+impl<R: io::Read> Iterator for UTF8Reader<R> {
+    type Item = char;
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            match self.0.next() {
+                None => return None,
+                Some(Err(_)) => continue,
+                Some(Ok(c)) => return Some(c),
+            }
+        }
+    }
+}
+
+/// ParseState encapsulates a stream of chars.
+#[derive(Debug)]
+pub struct ParseState<Iter: Iterator<Item = char>> {
+    buf: Vec<char>,
+    next: Option<Iter>,
+
+    current: usize,
+    // TODO: Implement garbage collection on `buf`
+}
+
+pub struct Hold(usize);
+
+impl<'a> ParseState<Chars<'a>> {
+    pub fn new(s: &'a str) -> ParseState<Chars<'a>> {
+        ParseState {
+            buf: vec![],
+            next: Some(s.chars()),
+            current: 0,
+        }
+    }
+    pub fn from_reader<R: io::Read>(r: R) -> ParseState<impl Iterator<Item = char>> {
+        ParseState {
+            buf: vec![],
+            next: Some(UTF8Reader(utf8reader::UTF8Reader::new(r))),
+            current: 0,
+        }
+    }
+}
+
+impl<Iter: Iterator<Item = char>> ParseState<Iter> {
+    const PREFILL_DEFAULT: usize = 1024;
+    pub fn index(&mut self) -> usize {
+        self.current
+    }
+    pub fn hold(&mut self) -> Hold {
+        Hold(self.current)
+    }
+    pub fn release(&mut self, _h: Hold) {
+        // TODO: Implement when hold tracking is needed (for garbage collection).
+    }
+    pub fn reset(&mut self, h: Hold) {
+        self.current = h.0;
+    }
+    pub fn finished(&self) -> bool {
+        self.next.is_none() && self.current == self.buf.len()
+    }
+
+    pub fn current(&self) -> Option<Iter::Item> {
+        if self.current < self.buf.len() {
+            Some(self.buf[self.current])
+        } else {
+            None
+        }
+    }
+
+    fn prefill(&mut self, n: usize) {
+        if let Some(next) = self.next.as_mut() {
+            let mut v: Vec<char> = next.take(n).collect();
+            self.buf.append(&mut v)
+        }
+    }
+    pub fn peek(&mut self) -> Option<Iter::Item> {
+        if self.current + 1 < self.buf.len() {
+            Some(self.buf[self.current + 1])
+        } else {
+            let c = self.next();
+            if c == None {
+                return None;
+            }
+            self.current -= 1;
+            c
+        }
+    }
+}
+
+impl<Iter: Iterator<Item = char>> Iterator for ParseState<Iter> {
+    type Item = char;
+
+    fn next(&mut self) -> Option<Iter::Item> {
+        if self.current < self.buf.len() {
+            self.current += 1;
+            Some(self.buf[self.current - 1])
+        } else if let Some(cs) = self.next.as_mut() {
+            if let Some(c) = cs.next() {
+                self.buf.push(c);
+                self.current += 1;
+                Some(c)
+            } else {
+                self.next = None;
+                None
+            }
+        } else {
+            None
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::parser::Parser;
+
+    #[test]
+    fn init() {
+        let mut s = ParseState::new("Hello");
+        assert_eq!(Some('H'), s.next());
+        let rest: String = s.collect();
+        assert_eq!("ello", rest);
+
+        let mut s = ParseState::new("Hello");
+        let hold = s.hold();
+        s.next();
+        s.next();
+        s.next();
+        assert_eq!(Some('l'), s.peek());
+        assert_eq!(Some('l'), s.next());
+        s.reset(hold);
+        let rest: String = s.collect();
+        assert_eq!("Hello", rest);
+    }
+
+    use crate::primitives;
+
+    #[test]
+    fn test_utf8_stream() {
+        let s = "Hüðslþ".to_owned();
+        let mut ps = ParseState::from_reader(s.as_bytes());
+        assert_eq!(Some('H'), ps.next());
+        assert_eq!(
+            Ok("üð".to_string()),
+            primitives::StringParser("üð".to_string()).parse(&mut ps)
+        );
+    }
+}