changeset 21:d90066bf03d0

Add more documentation
author Lewin Bormann <lewin@lewin-bormann.info>
date Tue, 04 Jun 2019 23:30:18 +0200
parents 728109a7df09
children 435e4e609128
files src/combinators.rs src/lib.rs src/parser.rs src/primitives.rs src/state.rs
diffstat 5 files changed, 118 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/src/combinators.rs	Tue Jun 04 18:49:36 2019 +0200
+++ b/src/combinators.rs	Tue Jun 04 23:30:18 2019 +0200
@@ -353,7 +353,6 @@
     }
 }
 
-
 #[cfg(test)]
 mod tests {
     use super::*;
--- a/src/lib.rs	Tue Jun 04 18:49:36 2019 +0200
+++ b/src/lib.rs	Tue Jun 04 23:30:18 2019 +0200
@@ -2,17 +2,60 @@
 
 //! rcombinators is a parser combinator library without special magic. It aims to be both easy to
 //! use and reasonably fast, without using too much special syntax or macros.
+//!
+//! You will notice two kinds of parsers that however differ only in minor aspects:
+//!
+//!   * Ones starting with a capital letter are `struct`s (such as `Int`, `Sequence`). You can
+//!     create them using `ParserName::new()`, or a specialized constructor method.
+//!   * Ones starting with a lower case letter (and in snake case, such as `string_of`). Those are
+//!     functions returning `Parser` objects combined from one or more elementary parsers.
+//!
+//! The resulting objects implementing the `Parser` trait are identical to use.
+//!
+//! Note that not all primitives and combinators are exported at the crate level! Only "important"
+//! ones are.
+//!
+//! Here's a short example of how to use it:
+//!
+//! ```
+//! use rcombinators::combinators;
+//! use rcombinators::primitives;
+//! use rcombinators::ParseState;
+//! use rcombinators::Parser;
+//!
+//! // Goal: Parse the string between the parentheses, and then the float.
+//! let mut ps = ParseState::new("(a1b3c4) -1.25e-1");
+//!
+//! let mut some_string = combinators::Alternative::new(
+//!     (primitives::StringParser::new("xyz"),
+//!      primitives::string_of("abcde12345",
+//!      combinators::RepeatSpec::Min(1))));
+//! let mut in_parens = combinators::Sequence::new(
+//!     (primitives::StringParser::new("("),
+//!      some_string,
+//!      primitives::StringParser::new(")")));
+//! assert_eq!(Ok(
+//!     ("(".to_string(),
+//!      "a1b3c4".to_string(),
+//!      ")".to_string())), in_parens.parse(&mut ps));
+//!
+//! // You can continue using a ParseState, for example when implementing your own parsers.
+//! let _ = primitives::whitespace().parse(&mut ps);
+//! // Parsers returned by functions such as float() should be cached when used more frequently.
+//! // This saves time due to not needing to construct the parsers repeatedly.
+//! assert_eq!(Ok(-0.125), primitives::float().parse(&mut ps));
+//! ```
 
 #[allow(unused_imports)]
 #[macro_use]
 extern crate time_test;
 
-mod combinators;
-mod parser;
-mod primitives;
+pub mod combinators;
+pub mod parser;
+pub mod primitives;
 mod state;
 
-pub use combinators::*;
-pub use parser::*;
-pub use primitives::*;
-pub use state::*;
+pub use combinators::{Alternative, PartialSequence, Repeat, Sequence, Transform};
+pub use parser::{execerr, Parser};
+pub use primitives::{float, string_none_of, string_of, whitespace, Int, StringParser};
+pub use state::ParseState;
--- a/src/parser.rs	Tue Jun 04 18:49:36 2019 +0200
+++ b/src/parser.rs	Tue Jun 04 23:30:18 2019 +0200
@@ -1,6 +1,6 @@
 use std::fmt;
 
-use crate::combinators;
+use crate::combinators::Transform;
 use crate::state::ParseState;
 
 #[derive(Debug, PartialEq)]
@@ -36,6 +36,25 @@
 
 pub type ParseResult<R> = Result<R, ParseError>;
 
+/// Parser is the central trait of `rcombinators`. Every object that can convert input into a Rust
+/// value implements this trait.
+///
+/// For example, the `Int32` parser parses a 32 bit signed integer,
+/// the `whitespace` parser consumes whitespace, and the `Sequence` combinator runs a sequence of
+/// sub-parser, succeeding only if every parser succeeds:
+///
+/// ```
+/// use rcombinators::combinators;
+/// use rcombinators::primitives;
+/// use rcombinators::ParseState;
+/// use rcombinators::Parser;
+///
+/// let mut ps = ParseState::new("123 456");
+/// let mut parser = combinators::Sequence::new((primitives::Int32::new(),
+///     primitives::whitespace(), primitives::Int32::new()));
+/// assert_eq!(Ok((123, (), 456)), parser.parse(&mut ps));
+/// ```
+///
 pub trait Parser {
     type Result;
 
@@ -49,10 +68,10 @@
     fn apply<R2, F: Fn(Self::Result) -> ParseResult<R2>>(
         self,
         f: F,
-    ) -> combinators::Transform<Self::Result, R2, Self, F>
+    ) -> Transform<Self::Result, R2, Self, F>
     where
         Self: std::marker::Sized,
     {
-        combinators::Transform::new(self, f)
+        Transform::new(self, f)
     }
 }
--- a/src/primitives.rs	Tue Jun 04 18:49:36 2019 +0200
+++ b/src/primitives.rs	Tue Jun 04 23:30:18 2019 +0200
@@ -53,15 +53,25 @@
 /// This is an optimized parser, not using combinators.
 pub struct Int<IType: Default + str::FromStr>(IType);
 
+/// Parse a 128 bit signed integer.
 pub type Int128 = Int<i128>;
+/// Parse a 64 bit signed integer.
 pub type Int64 = Int<i64>;
+/// Parse a 32 bit signed integer.
 pub type Int32 = Int<i32>;
+/// Parse a 16 bit signed integer.
 pub type Int16 = Int<i16>;
+/// Parse a 8 bit signed integer.
 pub type Int8 = Int<i8>;
+/// Parse a 128 bit unsigned integer.
 pub type Uint128 = Int<u128>;
+/// Parse a 64 bit unsigned integer.
 pub type Uint64 = Int<u64>;
+/// Parse a 32 bit unsigned integer.
 pub type Uint32 = Int<u32>;
+/// Parse a 16 bit unsigned integer.
 pub type Uint16 = Int<u16>;
+/// Parse a 8 bit unsigned integer.
 pub type Uint8 = Int<u8>;
 
 impl<IType: Default + str::FromStr> Int<IType> {
@@ -175,21 +185,24 @@
     }
     let mut multiplier: f64 = if s.is_some() { -1. } else { 1. };
     if let Some((_, e)) = exp {
-        multiplier = (10. as f64).powi(e);
+        multiplier *= (10. as f64).powi(e);
     }
     return Ok(multiplier * (bigf + littlef));
 }
 
 /// float parses floats in the format of `[-]dd[.[dd]][e[-]ddd]`.
 ///
-/// TODO: Compare with "native" parser, i.e. without combinators, and keep this as example.
+/// TODO: Compare speed with "native" parser, i.e. without combinators, and keep this as example.
 pub fn float() -> impl Parser<Result = f64> {
     let digits_set = "0123456789";
     let minus = Maybe::new(Ignore::new(StringParser::new("-")));
     let digits = string_of(digits_set, RepeatSpec::Min(1));
     let point = Maybe::new(StringParser::new("."));
     let smalldigits = Maybe::new(string_of(digits_set, RepeatSpec::Min(1)));
-    let exp = Maybe::new(Sequence::new((Ignore::new(StringParser::new("e")), Int32::new())));
+    let exp = Maybe::new(Sequence::new((
+        Ignore::new(StringParser::new("e")),
+        Int32::new(),
+    )));
     let parser = Sequence::new((minus, digits, point, smalldigits, exp))
         .apply(|(m, d, p, sd, exp)| assemble_float(m, d, p, sd, exp));
     parser
@@ -251,7 +264,7 @@
 }
 
 /// A parser that parses a string consisting of characters `chars`.
-fn string_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> {
+pub fn string_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> {
     let oo = OneOf::new(chars);
     let rp = Repeat::new(oo, rp);
     let make_string = |charvec: Vec<char>| Ok(String::from_iter(charvec.into_iter()));
@@ -259,13 +272,18 @@
 }
 
 /// A parser that parses a string consisting of any characters not in the set.
-fn string_none_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> {
+pub fn string_none_of<S: AsRef<str>>(chars: S, rp: RepeatSpec) -> impl Parser<Result = String> {
     let oo = OneOf::new_none_of(chars);
     let rp = Repeat::new(oo, rp);
     let make_string = |charvec: Vec<char>| Ok(String::from_iter(charvec.into_iter()));
     rp.apply(make_string)
 }
 
+/// whitespace consumes any number of tabs, spaces, newlines.
+pub fn whitespace() -> impl Parser<Result = ()> {
+    Ignore::new(Repeat::new(OneOf::new(" \n\r\t"), RepeatSpec::Any))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -306,9 +324,9 @@
 
     #[test]
     fn test_parse_floats() {
-        let mut ps = ParseState::new("1 1. 1.5 -1.5 -1.75 2.5e-4");
+        let mut ps = ParseState::new("1 1. 1.5 -1.5 -1.75 2.5e-4 -2e-2");
         let mut p = float();
-        let want = vec![1., 1., 1.5, -1.5, -1.75, 2.5e-4];
+        let want = vec![1., 1., 1.5, -1.5, -1.75, 2.5e-4, -0.02];
         for &f in want.iter() {
             assert_eq!(Ok(f), p.parse(&mut ps));
             let _ = StringParser::new(" ").parse(&mut ps);
--- a/src/state.rs	Tue Jun 04 18:49:36 2019 +0200
+++ b/src/state.rs	Tue Jun 04 23:30:18 2019 +0200
@@ -28,6 +28,9 @@
     // TODO: Implement garbage collection on `buf`
 }
 
+/// A Hold represents the parsing state at a certain point. It can be used to "un-consume" input.
+/// Currently, a panic occurs if a `Hold` object is dropped without first releasing or resetting it
+/// using `ParseState::release()` or `ParseState::drop()`.
 pub struct Hold {
     ix: usize,
     released: bool,
@@ -52,6 +55,7 @@
 }
 
 impl<'a> ParseState<Chars<'a>> {
+    /// Initialize ParseState from a string.
     pub fn new(s: &'a str) -> ParseState<Chars<'a>> {
         ParseState {
             buf: vec![],
@@ -59,6 +63,7 @@
             current: 0,
         }
     }
+    /// Initialize ParseState from a UTF-8 encoded source.
     pub fn from_reader<R: io::Read>(r: R) -> ParseState<impl Iterator<Item = char>> {
         ParseState {
             buf: vec![],
@@ -70,41 +75,50 @@
 
 impl<Iter: Iterator<Item = char>> ParseState<Iter> {
     const PREFILL_DEFAULT: usize = 1024;
+
+    /// Return current index in input.
     pub fn index(&mut self) -> usize {
         self.current
     }
+
+    /// Remember the current position in the input.
     pub fn hold(&mut self) -> Hold {
         Hold::new(self.current)
     }
+
+    /// Notifiy the ParseState that a `Hold` is no longer needed (and the referenced piece of input
+    /// could be cleaned up, for example).
     pub fn release(&mut self, mut h: Hold) {
         // TODO: Implement when hold tracking is needed (for garbage collection).
         h.defuse();
     }
+
+    /// Reset state to what it was when `h` was created.
     pub fn reset(&mut self, mut h: Hold) {
         self.current = h.ix;
         h.defuse();
     }
+
+    /// Returns true if no input is left.
     pub fn finished(&self) -> bool {
         self.next.is_none() && self.current == self.buf.len()
     }
+
+    /// Shorthand for using a hold to undo a single call to `next()`.
     pub fn undo_next(&mut self) {
         assert!(self.current > 0);
         self.current -= 1;
     }
-    pub fn current(&self) -> Option<Iter::Item> {
-        if self.current < self.buf.len() {
-            Some(self.buf[self.current])
-        } else {
-            None
-        }
-    }
 
+    /// Fill buffer from source with at most `n` characters.
     fn prefill(&mut self, n: usize) {
         if let Some(next) = self.next.as_mut() {
             let mut v: Vec<char> = next.take(n).collect();
             self.buf.append(&mut v)
         }
     }
+
+    /// Return next character in input without advancing.
     pub fn peek(&mut self) -> Option<Iter::Item> {
         if self.current < self.buf.len() {
             return Some(self.buf[self.current]);