Mercurial > lbo > hg > rcombinators
changeset 21:d90066bf03d0
Add more documentation
author | Lewin Bormann <lewin@lewin-bormann.info> |
---|---|
date | Tue, 04 Jun 2019 23:30:18 +0200 |
parents | 728109a7df09 |
children | 435e4e609128 |
files | src/combinators.rs src/lib.rs src/parser.rs src/primitives.rs src/state.rs |
diffstat | 5 files changed, 118 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/src/combinators.rs Tue Jun 04 18:49:36 2019 +0200 +++ b/src/combinators.rs Tue Jun 04 23:30:18 2019 +0200 @@ -353,7 +353,6 @@ } } - #[cfg(test)] mod tests { use super::*;
--- a/src/lib.rs Tue Jun 04 18:49:36 2019 +0200 +++ b/src/lib.rs Tue Jun 04 23:30:18 2019 +0200 @@ -2,17 +2,60 @@ //! rcombinators is a parser combinator library without special magic. It aims to be both easy to //! use and reasonably fast, without using too much special syntax or macros. +//! +//! You will notice two kinds of parsers that however differ only in minor aspects: +//! +//! * Ones starting with a capital letter are `struct`s (such as `Int`, `Sequence`). You can +//! create them using `ParserName::new()`, or a specialized constructor method. +//! * Ones starting with a lower case letter (and in snake case, such as `string_of`). Those are +//! functions returning `Parser` objects combined from one or more elementary parsers. +//! +//! The resulting objects implementing the `Parser` trait are identical to use. +//! +//! Note that not all primitives and combinators are exported at the crate level! Only "important" +//! ones are. +//! +//! Here's a short example of how to use it: +//! +//! ``` +//! use rcombinators::combinators; +//! use rcombinators::primitives; +//! use rcombinators::ParseState; +//! use rcombinators::Parser; +//! +//! // Goal: Parse the string between the parentheses, and then the float. +//! let mut ps = ParseState::new("(a1b3c4) -1.25e-1"); +//! +//! let mut some_string = combinators::Alternative::new( +//! (primitives::StringParser::new("xyz"), +//! primitives::string_of("abcde12345", +//! combinators::RepeatSpec::Min(1)))); +//! let mut in_parens = combinators::Sequence::new( +//! (primitives::StringParser::new("("), +//! some_string, +//! primitives::StringParser::new(")"))); +//! assert_eq!(Ok( +//! ("(".to_string(), +//! "a1b3c4".to_string(), +//! ")".to_string())), in_parens.parse(&mut ps)); +//! +//! // You can continue using a ParseState, for example when implementing your own parsers. +//! let _ = primitives::whitespace().parse(&mut ps); +//! // Parsers returned by functions such as float() should be cached when used more frequently. +//! // This saves time due to not needing to construct the parsers repeatedly. +//! assert_eq!(Ok(-0.125), primitives::float().parse(&mut ps)); +//! ``` #[allow(unused_imports)] #[macro_use] extern crate time_test; -mod combinators; -mod parser; -mod primitives; +pub mod combinators; +pub mod parser; +pub mod primitives; mod state; -pub use combinators::*; -pub use parser::*; -pub use primitives::*; -pub use state::*; +pub use combinators::{Alternative, PartialSequence, Repeat, Sequence, Transform}; +pub use parser::{execerr, Parser}; +pub use primitives::{float, string_none_of, string_of, whitespace, Int, StringParser}; +pub use state::ParseState;
--- a/src/parser.rs Tue Jun 04 18:49:36 2019 +0200 +++ b/src/parser.rs Tue Jun 04 23:30:18 2019 +0200 @@ -1,6 +1,6 @@ use std::fmt; -use crate::combinators; +use crate::combinators::Transform; use crate::state::ParseState; #[derive(Debug, PartialEq)] @@ -36,6 +36,25 @@ pub type ParseResult<R> = Result<R, ParseError>; +/// Parser is the central trait of `rcombinators`. Every object that can convert input into a Rust +/// value implements this trait. +/// +/// For example, the `Int32` parser parses a 32 bit signed integer, +/// the `whitespace` parser consumes whitespace, and the `Sequence` combinator runs a sequence of +/// sub-parser, succeeding only if every parser succeeds: +/// +/// ``` +/// use rcombinators::combinators; +/// use rcombinators::primitives; +/// use rcombinators::ParseState; +/// use rcombinators::Parser; +/// +/// let mut ps = ParseState::new("123 456"); +/// let mut parser = combinators::Sequence::new((primitives::Int32::new(), +/// primitives::whitespace(), primitives::Int32::new())); +/// assert_eq!(Ok((123, (), 456)), parser.parse(&mut ps)); +/// ``` +/// pub trait Parser { type Result; @@ -49,10 +68,10 @@ fn apply<R2, F: Fn(Self::Result) -> ParseResult<R2>>( self, f: F, - ) -> combinators::Transform<Self::Result, R2, Self, F> + ) -> Transform<Self::Result, R2, Self, F> where Self: std::marker::Sized, { - combinators::Transform::new(self, f) + Transform::new(self, f) } }
--- a/src/primitives.rs Tue Jun 04 18:49:36 2019 +0200 +++ b/src/primitives.rs Tue Jun 04 23:30:18 2019 +0200 @@ -53,15 +53,25 @@ /// This is an optimized parser, not using combinators. pub struct Int<IType: Default + str::FromStr>(IType); +/// Parse a 128 bit signed integer. pub type Int128 = Int<i128>; +/// Parse a 64 bit signed integer. pub type Int64 = Int<i64>; +/// Parse a 32 bit signed integer. pub type Int32 = Int<i32>; +/// Parse a 16 bit signed integer. pub type Int16 = Int<i16>; +/// Parse a 8 bit signed integer. pub type Int8 = Int<i8>; +/// Parse a 128 bit unsigned integer. pub type Uint128 = Int<u128>; +/// Parse a 64 bit unsigned integer. pub type Uint64 = Int<u64>; +/// Parse a 32 bit unsigned integer. pub type Uint32 = Int<u32>; +/// Parse a 16 bit unsigned integer. pub type Uint16 = Int<u16>; +/// Parse a 8 bit unsigned integer. pub type Uint8 = Int<u8>; impl<IType: Default + str::FromStr> Int<IType> { @@ -175,21 +185,24 @@ } let mut multiplier: f64 = if s.is_some() { -1. } else { 1. }; if let Some((_, e)) = exp { - multiplier = (10. as f64).powi(e); + multiplier *= (10. as f64).powi(e); } return Ok(multiplier * (bigf + littlef)); } /// float parses floats in the format of `[-]dd[.[dd]][e[-]ddd]`. /// -/// TODO: Compare with "native" parser, i.e. without combinators, and keep this as example. +/// TODO: Compare speed with "native" parser, i.e. without combinators, and keep this as example. pub fn float() -> impl Parser<Result = f64> { let digits_set = "0123456789"; let minus = Maybe::new(Ignore::new(StringParser::new("-"))); let digits = string_of(digits_set, RepeatSpec::Min(1)); let point = Maybe::new(StringParser::new(".")); let smalldigits = Maybe::new(string_of(digits_set, RepeatSpec::Min(1))); - let exp = Maybe::new(Sequence::new((Ignore::new(StringParser::new("e")), Int32::new()))); + let exp = Maybe::new(Sequence::new(( + Ignore::new(StringParser::new("e")), + Int32::new(), + ))); let parser = Sequence::new((minus, digits, point, smalldigits, exp)) .apply(|(m, d, p, sd, exp)| assemble_float(m, d, p, sd, exp)); parser @@ -251,7 +264,7 @@ } /// A parser that parses a string consisting of characters `chars`. -fn string_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> { +pub fn string_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> { let oo = OneOf::new(chars); let rp = Repeat::new(oo, rp); let make_string = |charvec: Vec<char>| Ok(String::from_iter(charvec.into_iter())); @@ -259,13 +272,18 @@ } /// A parser that parses a string consisting of any characters not in the set. -fn string_none_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> { +pub fn string_none_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> { let oo = OneOf::new_none_of(chars); let rp = Repeat::new(oo, rp); let make_string = |charvec: Vec<char>| Ok(String::from_iter(charvec.into_iter())); rp.apply(make_string) } +/// whitespace consumes any number of tabs, spaces, newlines. +pub fn whitespace() -> impl Parser<Result = ()> { + Ignore::new(Repeat::new(OneOf::new(" \n\r\t"), RepeatSpec::Any)) +} + #[cfg(test)] mod tests { use super::*; @@ -306,9 +324,9 @@ #[test] fn test_parse_floats() { - let mut ps = ParseState::new("1 1. 1.5 -1.5 -1.75 2.5e-4"); + let mut ps = ParseState::new("1 1. 1.5 -1.5 -1.75 2.5e-4 -2e-2"); let mut p = float(); - let want = vec![1., 1., 1.5, -1.5, -1.75, 2.5e-4]; + let want = vec![1., 1., 1.5, -1.5, -1.75, 2.5e-4, -0.02]; for &f in want.iter() { assert_eq!(Ok(f), p.parse(&mut ps)); let _ = StringParser::new(" ").parse(&mut ps);
--- a/src/state.rs Tue Jun 04 18:49:36 2019 +0200 +++ b/src/state.rs Tue Jun 04 23:30:18 2019 +0200 @@ -28,6 +28,9 @@ // TODO: Implement garbage collection on `buf` } +/// A Hold represents the parsing state at a certain point. It can be used to "un-consume" input. +/// Currently, a panic occurs if a `Hold` object is dropped without first releasing or resetting it +/// using `ParseState::release()` or `ParseState::drop()`. pub struct Hold { ix: usize, released: bool, @@ -52,6 +55,7 @@ } impl<'a> ParseState<Chars<'a>> { + /// Initialize ParseState from a string. pub fn new(s: &'a str) -> ParseState<Chars<'a>> { ParseState { buf: vec![], @@ -59,6 +63,7 @@ current: 0, } } + /// Initialize ParseState from a UTF-8 encoded source. pub fn from_reader<R: io::Read>(r: R) -> ParseState<impl Iterator<Item = char>> { ParseState { buf: vec![], @@ -70,41 +75,50 @@ impl<Iter: Iterator<Item = char>> ParseState<Iter> { const PREFILL_DEFAULT: usize = 1024; + + /// Return current index in input. pub fn index(&mut self) -> usize { self.current } + + /// Remember the current position in the input. pub fn hold(&mut self) -> Hold { Hold::new(self.current) } + + /// Notifiy the ParseState that a `Hold` is no longer needed (and the referenced piece of input + /// could be cleaned up, for example). pub fn release(&mut self, mut h: Hold) { // TODO: Implement when hold tracking is needed (for garbage collection). h.defuse(); } + + /// Reset state to what it was when `h` was created. pub fn reset(&mut self, mut h: Hold) { self.current = h.ix; h.defuse(); } + + /// Returns true if no input is left. pub fn finished(&self) -> bool { self.next.is_none() && self.current == self.buf.len() } + + /// Shorthand for using a hold to undo a single call to `next()`. pub fn undo_next(&mut self) { assert!(self.current > 0); self.current -= 1; } - pub fn current(&self) -> Option<Iter::Item> { - if self.current < self.buf.len() { - Some(self.buf[self.current]) - } else { - None - } - } + /// Fill buffer from source with at most `n` characters. fn prefill(&mut self, n: usize) { if let Some(next) = self.next.as_mut() { let mut v: Vec<char> = next.take(n).collect(); self.buf.append(&mut v) } } + + /// Return next character in input without advancing. pub fn peek(&mut self) -> Option<Iter::Item> { if self.current < self.buf.len() { return Some(self.buf[self.current]);