From 21bb9536eeaa0325cba789e0c8b74f6c24aeaa74 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Thu, 16 Dec 2021 19:43:47 +0000 Subject: [PATCH] Updated changelog, bumped version --- CHANGELOG.md | 43 +++++++++++++++++++++++++++++++++ Cargo.toml | 2 +- examples/brainfuck.rs | 20 +++++++++------- examples/json.rs | 2 +- examples/nano_rust.rs | 2 +- src/chain.rs | 1 - src/combinator.rs | 38 ++++++++++++++++-------------- src/error.rs | 2 +- src/lib.rs | 22 ++++++++++------- src/primitive.rs | 55 +++++++++++++++++++++++++++---------------- src/recovery.rs | 6 ++++- src/recursive.rs | 4 +--- 12 files changed, 133 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90dc2429..6065e5aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +# [0.7.0] - 2021-12-16 + +### Added + +- A new [tutorial](tutorial.md) to help new users + +- `select` macro, a wrapper over `filter_map` that makes extracting data from specific tokens easy +- `choice` parser, a better alternative to long `or` chains (which sometimes have poor compilation performance) +- `todo` parser, that panics when used (but not when created) (akin to Rust's `todo!` macro, but for parsers) +- `keyword` parser, that parses *exact* identifiers + +- `from_str` combinator to allow converting a pattern to a value inline, using `std::str::FromStr` +- `unwrapped` combinator, to automatically unwrap an output value inline +- `rewind` combinator, that allows reverting the input stream on success. It's most useful when requiring that a + pattern is followed by some terminating pattern without the first parser greedily consuming it +- `map_err_with_span` combinator, to allow fetching the span of the input that was parsed by a parser before an error + was encountered + +- `or_else` combinator, to allow processing and potentially recovering from a parser error +- `SeparatedBy::at_most` to require that a separated pattern appear at most a specific number of times +- `SeparatedBy::exactly` to require that a separated pattern be repeated exactly a specific number of times +- `Repeated::exactly` to require that a pattern be repeated exactly a specific number of times + +- More trait implementations for various things, making the crate more useful + +### Changed + +- Made `just`, `one_of`, and `none_of` significant more useful. They can now accept strings, arrays, slices, vectors, + sets, or just single tokens as before +- Added the return type of each parser to its documentation +- More explicit documentation of parser behaviour +- More doc examples +- Deprecated `seq` (`just` has been generalised and can now be used to parse specific input sequences) +- Sealed the `Character` trait so that future changes are not breaking +- Sealed the `Chain` trait and made it more powerful +- Moved trait constraints on `Parser` to where clauses for improved readability + +### Fixed + +- Fixed a subtle bug that allowed `separated_by` to parse an extra trailing separator when it shouldn't +- Filled a 'hole' in the `Error` trait's API that conflated a lack of expected tokens with expectation of end of input +- Made recursive parsers use weak reference-counting to avoid memory leaks + # [0.6.0] - 2021-11-22 ### Added diff --git a/Cargo.toml b/Cargo.toml index d9762eef..450e9d37 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "chumsky" -version = "0.6.0" +version = "0.7.0" description = "A parser library for humans with powerful error recovery" authors = ["Joshua Barretto "] repository = "https://github.com/zesterer/chumsky" diff --git a/examples/brainfuck.rs b/examples/brainfuck.rs index af92e88e..3be81fc0 100644 --- a/examples/brainfuck.rs +++ b/examples/brainfuck.rs @@ -22,18 +22,20 @@ enum Instr { fn parser() -> impl Parser, Error = Simple> { use Instr::*; - recursive(|bf| choice(( - just('<').to(Left), - just('>').to(Right), - just('+').to(Incr), - just('-').to(Decr), - just(',').to(Read), - just('.').to(Write), - )) + recursive(|bf| { + choice(( + just('<').to(Left), + just('>').to(Right), + just('+').to(Incr), + just('-').to(Decr), + just(',').to(Read), + just('.').to(Write), + )) .or(bf.delimited_by('[', ']').map(Loop)) .recover_with(nested_delimiters('[', ']', [], |_| Invalid)) .recover_with(skip_then_retry_until([']'])) - .repeated()) + .repeated() + }) .then_ignore(end()) } diff --git a/examples/json.rs b/examples/json.rs index 6fc7c5aa..3a30e8ee 100644 --- a/examples/json.rs +++ b/examples/json.rs @@ -116,7 +116,7 @@ fn main() { e.expected() .map(|expected| match expected { Some(expected) => expected.to_string(), - None => "end of input".to_string() + None => "end of input".to_string(), }) .collect::>() .join(", ") diff --git a/examples/nano_rust.rs b/examples/nano_rust.rs index bb901277..f38b5f52 100644 --- a/examples/nano_rust.rs +++ b/examples/nano_rust.rs @@ -614,7 +614,7 @@ fn main() { e.expected() .map(|expected| match expected { Some(expected) => expected.to_string(), - None => "end of input".to_string() + None => "end of input".to_string(), }) .collect::>() .join(", ") diff --git a/src/chain.rs b/src/chain.rs index 93b4cd51..35d57730 100644 --- a/src/chain.rs +++ b/src/chain.rs @@ -1,4 +1,3 @@ - mod private { pub trait Sealed {} diff --git a/src/combinator.rs b/src/combinator.rs index a23a6d6e..3a971375 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -31,6 +31,8 @@ impl, B: Parser, E: Err let a_state = stream.save(); // If the first parser succeeded and produced no secondary errors, don't bother trying the second parser + // TODO: Perhaps we should *alwaus* take this route, even if recoverable errors did occur? Seems like an + // inconsistent application of PEG rules... if a_res.0.is_empty() { if let (a_errors, Ok(a_out)) = a_res { return (a_errors, Ok(a_out)); @@ -428,7 +430,7 @@ impl SeparatedBy { /// /// ``` /// # use chumsky::prelude::*; - /// let r#enum = seq::<_, _, Simple>("enum".chars()) + /// let r#enum = text::keyword::<_, _, Simple>("enum") /// .padded() /// .ignore_then(text::ident() /// .padded() @@ -987,8 +989,9 @@ impl, F: Fn(E) -> E, E: Error> Parser let (errors, res) = debugger.invoke(&self.0, stream); let mapper = |e: Located| e.map(&self.1); ( - errors,//errors.into_iter().map(mapper).collect(), - res/*.map(|(out, alt)| (out, alt.map(mapper)))*/.map_err(mapper), + errors, //errors.into_iter().map(mapper).collect(), + res /*.map(|(out, alt)| (out, alt.map(mapper)))*/ + .map_err(mapper), ) } @@ -1025,14 +1028,14 @@ impl, F: Fn(E, E::Span) -> E, E: Error| { let at = e.at; e.map(|e| { - let span = stream.attempt(|stream| { stream.revert(at); (false, stream.span_since(start)) }); + let span = stream.attempt(|stream| { + stream.revert(at); + (false, stream.span_since(start)) + }); (self.1)(e, span) }) }; - ( - errors, - res.map_err(mapper), - ) + (errors, res.map_err(mapper)) } #[inline] @@ -1103,13 +1106,8 @@ impl< #[derive(Copy, Clone)] pub struct OrElse(pub(crate) A, pub(crate) F); -impl< - I: Clone, - O, - A: Parser, - F: Fn(E) -> Result, - E: Error, - > Parser for OrElse +impl, F: Fn(E) -> Result, E: Error> Parser + for OrElse { type Error = E; @@ -1125,7 +1123,11 @@ impl< let res = match res { Ok(out) => Ok(out), Err(err) => match (&self.1)(err.error) { - Err(e) => Err(Located { at: err.at, error: e, phantom: PhantomData }), + Err(e) => Err(Located { + at: err.at, + error: e, + phantom: PhantomData, + }), Ok(out) => Ok((out, None)), }, }; @@ -1167,8 +1169,8 @@ impl, L: Into + Clone, E: Erro /* TODO: Not this? */ /*if e.at > pre_state {*/ - // Only add the label if we committed to this pattern somewhat - e.map(|e| e.with_label(self.1.clone().into())) + // Only add the label if we committed to this pattern somewhat + e.map(|e| e.with_label(self.1.clone().into())) /*} else { e }*/ diff --git a/src/error.rs b/src/error.rs index 2bc982bb..47d90bf9 100644 --- a/src/error.rs +++ b/src/error.rs @@ -296,7 +296,7 @@ impl fmt::Display for Simple { } match self.expected.len() { - 0 => {},//write!(f, " but end of input was expected")?, + 0 => {} //write!(f, " but end of input was expected")?, 1 => write!( f, " but {} was expected", diff --git a/src/lib.rs b/src/lib.rs index aa38a391..85063231 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,7 +60,8 @@ pub mod prelude { pub use super::{ error::{Error as _, Simple}, primitive::{ - any, empty, end, filter, filter_map, just, none_of, one_of, seq, take_until, todo, choice, + any, choice, empty, end, filter, filter_map, just, none_of, one_of, seq, take_until, + todo, }, recovery::{nested_delimiters, skip_then_retry_until, skip_until}, recursive::{recursive, Recursive}, @@ -346,6 +347,10 @@ pub trait Parser { /// Map the primary error of this parser to a result. If the result is [`Ok`], the parser succeeds with that value. /// + /// Note that even if the function returns an [`Ok`], the input stream will still be 'stuck' at the input following + /// the input that triggered the error. You'll need to follow uses of this combinator with a parser that resets + /// the input stream to a known-good state (for example, [`take_until`]). + /// /// The output type of this parser is `U`, the [`Ok`] type of the result. fn or_else(self, f: F) -> OrElse where @@ -879,13 +884,14 @@ pub trait Parser { /// Apply a fallback recovery strategy to this parser should it fail. /// /// There is no silver bullet for error recovery, so this function allows you to specify one of several different - /// strategies at the location of your choice. - /// - /// Note that for implementation reasons, adding an error recovery strategy can cause a parser to 'over-commit', - /// missing potentially valid alternative parse routes (*TODO: document this and explain why and when it happens*). - /// Rest assured that this case is generally quite rare and only happens for very loose, almost-ambiguous syntax. - /// If you run into cases that you believe should parse but do not, try removing or moving recovery strategies to - /// fix the problem. + /// strategies at the location of your choice. Prefer an error recovery strategy that more precisely mirrors valid + /// syntax where possible to make error recovery more reliable. + /// + /// Because chumsky is a [PEG](https://en.m.wikipedia.org/wiki/Parsing_expression_grammar) parser, which always + /// take the first successful parsing route through a grammar, recovering from an error may cause the parser to + /// erroneously miss alternative valid routes through the grammar that do not generate recoverable errors. If you + /// run into cases where valid syntax fails to parse without errors, this might be happening: consider removing + /// error recovery or switching to a more specific error recovery strategy. /// /// The output type of this parser is `O`, the same as the original parser. /// diff --git a/src/primitive.rs b/src/primitive.rs index 0d33a07a..eaa6cba8 100644 --- a/src/primitive.rs +++ b/src/primitive.rs @@ -120,10 +120,27 @@ pub fn end() -> End { End(PhantomData) } +mod private { + pub trait Sealed {} + + impl Sealed for T {} + impl Sealed for String {} + impl<'a> Sealed for &'a str {} + impl<'a, T> Sealed for &'a [T] {} + impl Sealed for [T; N] {} + impl<'a, T, const N: usize> Sealed for &'a [T; N] {} + impl Sealed for Vec {} + impl Sealed for std::collections::LinkedList {} + impl Sealed for std::collections::VecDeque {} + impl Sealed for std::collections::HashSet {} + impl Sealed for std::collections::BTreeSet {} + impl Sealed for std::collections::BinaryHeap {} +} + /// A utility trait to abstract over linear container-like things. /// /// This trait is likely to change in future versions of the crate, so avoid implementing it yourself. -pub trait Container { +pub trait Container: private::Sealed { /// An iterator over the items within this container, by value. type Iter: Iterator; /// Iterate over the elements of the container (using internal iteration because GATs are unstable). @@ -375,15 +392,13 @@ impl, E: Error> Parser for OneOf< (_, _, Some(tok)) if self.0.get_iter().any(|not| not == tok) => { (Vec::new(), Ok((tok, None))) } - (at, span, found) => { - ( - Vec::new(), - Err(Located::at( - at, - E::expected_input_found(span, self.0.get_iter().map(Some), found), - )), - ) - } + (at, span, found) => ( + Vec::new(), + Err(Located::at( + at, + E::expected_input_found(span, self.0.get_iter().map(Some), found), + )), + ), } } @@ -475,15 +490,13 @@ impl, E: Error> Parser for NoneOf (_, _, Some(tok)) if self.0.get_iter().all(|not| not != tok) => { (Vec::new(), Ok((tok, None))) } - (at, span, found) => { - ( - Vec::new(), - Err(Located::at( - at, - E::expected_input_found(span, Vec::new(), found), - )), - ) - } + (at, span, found) => ( + Vec::new(), + Err(Located::at( + at, + E::expected_input_found(span, Vec::new(), found), + )), + ), } } @@ -836,7 +849,9 @@ pub struct Choice(pub(crate) T, pub(crate) PhantomData); impl Copy for Choice {} impl Clone for Choice { - fn clone(&self) -> Self { Self(self.0.clone(), PhantomData) } + fn clone(&self) -> Self { + Self(self.0.clone(), PhantomData) + } } macro_rules! impl_for_tuple { diff --git a/src/recovery.rs b/src/recovery.rs index 63037d13..22772b0e 100644 --- a/src/recovery.rs +++ b/src/recovery.rs @@ -193,7 +193,11 @@ impl O, E: Error, const N: usize> ), None => Located::at( at, - P::Error::expected_input_found(span, Some(Some(self.1.clone())), None), + P::Error::expected_input_found( + span, + Some(Some(self.1.clone())), + None, + ), ), }); } diff --git a/src/recursive.rs b/src/recursive.rs index 126577ee..66e6e449 100644 --- a/src/recursive.rs +++ b/src/recursive.rs @@ -30,9 +30,7 @@ type OnceParser<'a, I, O, E> = OnceCell + 'a>>; /// [definition](Recursive::define). /// /// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible. -pub struct Recursive<'a, I, O, E: Error>( - RecursiveInner>, -); +pub struct Recursive<'a, I, O, E: Error>(RecursiveInner>); impl<'a, I: Clone, O, E: Error> Recursive<'a, I, O, E> { fn cell(&self) -> Rc> {