Mercurial > lbo > hg > rcombinators
changeset 0:6286f0ed89a7
Initial commit
author | Lewin Bormann <lewin@lewin-bormann.info> |
---|---|
date | Wed, 29 May 2019 23:32:37 +0200 |
parents | |
children | 0ad857b005ae |
files | .gitignore Cargo.toml src/lib.rs src/parser.rs src/primitives.rs src/state.rs |
diffstat | 6 files changed, 245 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gitignore Wed May 29 23:32:37 2019 +0200 @@ -0,0 +1,3 @@ +/target +**/*.rs.bk +Cargo.lock
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Cargo.toml Wed May 29 23:32:37 2019 +0200 @@ -0,0 +1,10 @@ +[package] +name = "rcombinators" +version = "0.1.0" +authors = ["Lewin Bormann <lewin@lewin-bormann.info>"] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +utf8reader = "0.1.0"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib.rs Wed May 29 23:32:37 2019 +0200 @@ -0,0 +1,5 @@ +#![allow(dead_code)] + +mod state; +mod parser; +mod primitives;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/parser.rs Wed May 29 23:32:37 2019 +0200 @@ -0,0 +1,26 @@ + +use std::fmt; + +use crate::state::ParseState; + +#[derive(Debug, PartialEq)] +pub enum ParseError { + EOF, + Fail(&'static str, usize), +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ParseError::EOF => f.write_str("EOF"), + ParseError::Fail(s, pos) => write!(f, "Parse fail: {} at {}", s, pos), + } + } +} + +pub type ParseResult<R> = Result<R, ParseError>; + +pub trait Parser { + type Result; + fn parse(&mut self, st: &mut ParseState<impl Iterator<Item=char>>) -> ParseResult<Self::Result>; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/primitives.rs Wed May 29 23:32:37 2019 +0200 @@ -0,0 +1,47 @@ + +use crate::state::ParseState; +use crate::parser::{Parser, ParseResult, ParseError}; + +pub struct StringParser(pub String); + +impl Parser for StringParser { + type Result = String; + fn parse(&mut self, st: &mut ParseState<impl Iterator<Item=char>>) -> ParseResult<Self::Result> { + let mut cs = self.0.chars(); + let expect = self.0.len(); + let mut have = 0; + let hold = st.hold(); + loop { + let (next, pk) = (cs.next(), st.peek()); + if next.is_none() || pk.is_none() { + break + } + if next != pk { + break + } + let c = st.next().unwrap(); + have += c.len_utf8(); + } + if expect == have { + st.release(hold); + return Ok(self.0.clone()); + } + let ix = st.index(); + st.reset(hold); + return Err(ParseError::Fail("string not matched", ix)) + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_string() { + let mut s = super::ParseState::new("abc def"); + let mut p = StringParser("abc ".to_owned()); + assert_eq!(Ok("abc ".to_owned()), p.parse(&mut s)); + assert_eq!(4, s.index()); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/state.rs Wed May 29 23:32:37 2019 +0200 @@ -0,0 +1,154 @@ +use std::io; +use std::str::Chars; + +use utf8reader; + +struct UTF8Reader<R: io::Read>(utf8reader::UTF8Reader<R>); + +impl<R: io::Read> Iterator for UTF8Reader<R> { + type Item = char; + fn next(&mut self) -> Option<Self::Item> { + loop { + match self.0.next() { + None => return None, + Some(Err(_)) => continue, + Some(Ok(c)) => return Some(c), + } + } + } +} + +/// ParseState encapsulates a stream of chars. +#[derive(Debug)] +pub struct ParseState<Iter: Iterator<Item = char>> { + buf: Vec<char>, + next: Option<Iter>, + + current: usize, + // TODO: Implement garbage collection on `buf` +} + +pub struct Hold(usize); + +impl<'a> ParseState<Chars<'a>> { + pub fn new(s: &'a str) -> ParseState<Chars<'a>> { + ParseState { + buf: vec![], + next: Some(s.chars()), + current: 0, + } + } + pub fn from_reader<R: io::Read>(r: R) -> ParseState<impl Iterator<Item = char>> { + ParseState { + buf: vec![], + next: Some(UTF8Reader(utf8reader::UTF8Reader::new(r))), + current: 0, + } + } +} + +impl<Iter: Iterator<Item = char>> ParseState<Iter> { + const PREFILL_DEFAULT: usize = 1024; + pub fn index(&mut self) -> usize { + self.current + } + pub fn hold(&mut self) -> Hold { + Hold(self.current) + } + pub fn release(&mut self, _h: Hold) { + // TODO: Implement when hold tracking is needed (for garbage collection). + } + pub fn reset(&mut self, h: Hold) { + self.current = h.0; + } + pub fn finished(&self) -> bool { + self.next.is_none() && self.current == self.buf.len() + } + + pub fn current(&self) -> Option<Iter::Item> { + if self.current < self.buf.len() { + Some(self.buf[self.current]) + } else { + None + } + } + + fn prefill(&mut self, n: usize) { + if let Some(next) = self.next.as_mut() { + let mut v: Vec<char> = next.take(n).collect(); + self.buf.append(&mut v) + } + } + pub fn peek(&mut self) -> Option<Iter::Item> { + if self.current + 1 < self.buf.len() { + Some(self.buf[self.current + 1]) + } else { + let c = self.next(); + if c == None { + return None; + } + self.current -= 1; + c + } + } +} + +impl<Iter: Iterator<Item = char>> Iterator for ParseState<Iter> { + type Item = char; + + fn next(&mut self) -> Option<Iter::Item> { + if self.current < self.buf.len() { + self.current += 1; + Some(self.buf[self.current - 1]) + } else if let Some(cs) = self.next.as_mut() { + if let Some(c) = cs.next() { + self.buf.push(c); + self.current += 1; + Some(c) + } else { + self.next = None; + None + } + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::Parser; + + #[test] + fn init() { + let mut s = ParseState::new("Hello"); + assert_eq!(Some('H'), s.next()); + let rest: String = s.collect(); + assert_eq!("ello", rest); + + let mut s = ParseState::new("Hello"); + let hold = s.hold(); + s.next(); + s.next(); + s.next(); + assert_eq!(Some('l'), s.peek()); + assert_eq!(Some('l'), s.next()); + s.reset(hold); + let rest: String = s.collect(); + assert_eq!("Hello", rest); + } + + use crate::primitives; + + #[test] + fn test_utf8_stream() { + let s = "Hüðslþ".to_owned(); + let mut ps = ParseState::from_reader(s.as_bytes()); + assert_eq!(Some('H'), ps.next()); + assert_eq!( + Ok("üð".to_string()), + primitives::StringParser("üð".to_string()).parse(&mut ps) + ); + } +}