syn/
discouraged.rs

1//! Extensions to the parsing API with niche applicability.
2
3use super::*;
4
5/// Extensions to the `ParseStream` API to support speculative parsing.
6pub trait Speculative {
7    /// Advance this parse stream to the position of a forked parse stream.
8    ///
9    /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
10    /// parse stream, perform some speculative parsing, then join the original
11    /// stream to the fork to "commit" the parsing from the fork to the main
12    /// stream.
13    ///
14    /// If you can avoid doing this, you should, as it limits the ability to
15    /// generate useful errors. That said, it is often the only way to parse
16    /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
17    /// is that when the fork fails to parse an `A`, it's impossible to tell
18    /// whether that was because of a syntax error and the user meant to provide
19    /// an `A`, or that the `A`s are finished and it's time to start parsing
20    /// `B`s. Use with care.
21    ///
22    /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
23    /// parsing `B*` and removing the leading members of `A` from the
24    /// repetition, bypassing the need to involve the downsides associated with
25    /// speculative parsing.
26    ///
27    /// [`ParseStream::fork`]: ParseBuffer::fork
28    ///
29    /// # Example
30    ///
31    /// There has been chatter about the possibility of making the colons in the
32    /// turbofish syntax like `path::to::<T>` no longer required by accepting
33    /// `path::to<T>` in expression position. Specifically, according to [RFC
34    /// 2544], [`PathSegment`] parsing should always try to consume a following
35    /// `<` token as the start of generic arguments, and reset to the `<` if
36    /// that fails (e.g. the token is acting as a less-than operator).
37    ///
38    /// This is the exact kind of parsing behavior which requires the "fork,
39    /// try, commit" behavior that [`ParseStream::fork`] discourages. With
40    /// `advance_to`, we can avoid having to parse the speculatively parsed
41    /// content a second time.
42    ///
43    /// This change in behavior can be implemented in syn by replacing just the
44    /// `Parse` implementation for `PathSegment`:
45    ///
46    /// ```
47    /// # use syn::ext::IdentExt;
48    /// use syn::parse::discouraged::Speculative;
49    /// # use syn::parse::{Parse, ParseStream};
50    /// # use syn::{Ident, PathArguments, Result, Token};
51    ///
52    /// pub struct PathSegment {
53    ///     pub ident: Ident,
54    ///     pub arguments: PathArguments,
55    /// }
56    /// #
57    /// # impl<T> From<T> for PathSegment
58    /// # where
59    /// #     T: Into<Ident>,
60    /// # {
61    /// #     fn from(ident: T) -> Self {
62    /// #         PathSegment {
63    /// #             ident: ident.into(),
64    /// #             arguments: PathArguments::None,
65    /// #         }
66    /// #     }
67    /// # }
68    ///
69    /// impl Parse for PathSegment {
70    ///     fn parse(input: ParseStream) -> Result<Self> {
71    ///         if input.peek(Token![super])
72    ///             || input.peek(Token![self])
73    ///             || input.peek(Token![Self])
74    ///             || input.peek(Token![crate])
75    ///         {
76    ///             let ident = input.call(Ident::parse_any)?;
77    ///             return Ok(PathSegment::from(ident));
78    ///         }
79    ///
80    ///         let ident = input.parse()?;
81    ///         if input.peek(Token![::]) && input.peek3(Token![<]) {
82    ///             return Ok(PathSegment {
83    ///                 ident,
84    ///                 arguments: PathArguments::AngleBracketed(input.parse()?),
85    ///             });
86    ///         }
87    ///         if input.peek(Token![<]) && !input.peek(Token![<=]) {
88    ///             let fork = input.fork();
89    ///             if let Ok(arguments) = fork.parse() {
90    ///                 input.advance_to(&fork);
91    ///                 return Ok(PathSegment {
92    ///                     ident,
93    ///                     arguments: PathArguments::AngleBracketed(arguments),
94    ///                 });
95    ///             }
96    ///         }
97    ///         Ok(PathSegment::from(ident))
98    ///     }
99    /// }
100    ///
101    /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
102    /// ```
103    ///
104    /// # Drawbacks
105    ///
106    /// The main drawback of this style of speculative parsing is in error
107    /// presentation. Even if the lookahead is the "correct" parse, the error
108    /// that is shown is that of the "fallback" parse. To use the same example
109    /// as the turbofish above, take the following unfinished "turbofish":
110    ///
111    /// ```text
112    /// let _ = f<&'a fn(), for<'a> serde::>();
113    /// ```
114    ///
115    /// If this is parsed as generic arguments, we can provide the error message
116    ///
117    /// ```text
118    /// error: expected identifier
119    ///  --> src.rs:L:C
120    ///   |
121    /// L | let _ = f<&'a fn(), for<'a> serde::>();
122    ///   |                                    ^
123    /// ```
124    ///
125    /// but if parsed using the above speculative parsing, it falls back to
126    /// assuming that the `<` is a less-than when it fails to parse the generic
127    /// arguments, and tries to interpret the `&'a` as the start of a labelled
128    /// loop, resulting in the much less helpful error
129    ///
130    /// ```text
131    /// error: expected `:`
132    ///  --> src.rs:L:C
133    ///   |
134    /// L | let _ = f<&'a fn(), for<'a> serde::>();
135    ///   |               ^^
136    /// ```
137    ///
138    /// This can be mitigated with various heuristics (two examples: show both
139    /// forks' parse errors, or show the one that consumed more tokens), but
140    /// when you can control the grammar, sticking to something that can be
141    /// parsed LL(3) and without the LL(*) speculative parsing this makes
142    /// possible, displaying reasonable errors becomes much more simple.
143    ///
144    /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
145    /// [`PathSegment`]: crate::PathSegment
146    ///
147    /// # Performance
148    ///
149    /// This method performs a cheap fixed amount of work that does not depend
150    /// on how far apart the two streams are positioned.
151    ///
152    /// # Panics
153    ///
154    /// The forked stream in the argument of `advance_to` must have been
155    /// obtained by forking `self`. Attempting to advance to any other stream
156    /// will cause a panic.
157    fn advance_to(&self, fork: &Self);
158}
159
160impl<'a> Speculative for ParseBuffer<'a> {
161    fn advance_to(&self, fork: &Self) {
162        if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
163            panic!("Fork was not derived from the advancing parse stream");
164        }
165
166        let (self_unexp, self_sp) = inner_unexpected(self);
167        let (fork_unexp, fork_sp) = inner_unexpected(fork);
168        if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
169            match (fork_sp, self_sp) {
170                // Unexpected set on the fork, but not on `self`, copy it over.
171                (Some(span), None) => {
172                    self_unexp.set(Unexpected::Some(span));
173                }
174                // Unexpected unset. Use chain to propagate errors from fork.
175                (None, None) => {
176                    fork_unexp.set(Unexpected::Chain(self_unexp));
177
178                    // Ensure toplevel 'unexpected' tokens from the fork don't
179                    // bubble up the chain by replacing the root `unexpected`
180                    // pointer, only 'unexpected' tokens from existing group
181                    // parsers should bubble.
182                    fork.unexpected
183                        .set(Some(Rc::new(Cell::new(Unexpected::None))));
184                }
185                // Unexpected has been set on `self`. No changes needed.
186                (_, Some(_)) => {}
187            }
188        }
189
190        // See comment on `cell` in the struct definition.
191        self.cell
192            .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
193    }
194}