ndslice/
parse.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::any::type_name;
10use std::num::ParseIntError;
11use std::str::FromStr;
12
13/// Errors that can occur while parsing a string.
14#[derive(Debug, thiserror::Error)]
15pub enum ParserError {
16    #[error("parse error: expected '{expected}' but got end of input")]
17    UnexpectedEndOfInput { expected: &'static str },
18
19    #[error("parse error: expected '{expected}' but got '{actual}'")]
20    WrongToken {
21        expected: &'static str,
22        actual: String,
23    },
24
25    #[error("parse error: token '{actual}' is not a '{expected_type}'")]
26    WrongTokenType {
27        expected_type: &'static str,
28        actual: String,
29    },
30
31    #[error("parse error: {error}: expected integer but got '{token}'")]
32    NotAnInteger {
33        token: String,
34        #[source]
35        error: ParseIntError,
36    },
37}
38
39/// A simple parser, focused on providing an ergonomic API to consume lexemes and
40/// to encourage useful errors.
41///
42/// At its simplest, a parser is an iterator over lexemes; it additionally provides
43/// more advanced methods with which to (sometimes optionally) consume lexemes.
44pub struct Parser<'a> {
45    str: &'a str,
46    delims: &'a [&'a str],
47}
48
49impl<'a> Parser<'a> {
50    /// Create a new parser that uses the provided delimiters to to define
51    /// lexical boundaries. Each delimiter is also a lexeme.
52    pub fn new(str: &'a str, delims: &'a [&'a str]) -> Self {
53        Self { str, delims }
54    }
55
56    /// Peek the next available lexeme, returning `None` if the the parser has
57    /// reached the end of its input
58    pub fn peek(&self) -> Option<&'a str> {
59        self.split().map(|(token, _)| token)
60    }
61
62    /// Peek the next raw char (no trimming). Useful to detect a
63    /// starting quote.
64    pub fn peek_char(&self) -> Option<char> {
65        self.str.chars().next()
66    }
67
68    /// Like `peek`, but return a parsing error if the parser has reached the
69    /// end of its input.
70    pub fn peek_or_err(&self, expected: &'static str) -> Result<&'a str, ParserError> {
71        self.split()
72            .map(|(token, _)| token)
73            .ok_or(ParserError::UnexpectedEndOfInput { expected })
74    }
75
76    /// Returns an error if the next token is not `expected`. The token is consumed
77    /// if it is `expected`.
78    pub fn expect(&mut self, expected: &'static str) -> Result<(), ParserError> {
79        let token = self.peek_or_err(expected)?;
80        if token != expected {
81            Err(ParserError::WrongToken {
82                expected,
83                actual: token.to_string(),
84            })
85        } else {
86            let _ = self.next();
87            Ok(())
88        }
89    }
90
91    /// Returns the next token, or an error if the parser has reached the end of
92    /// its input.
93    pub fn next_or_err(&mut self, expected: &'static str) -> Result<&'a str, ParserError> {
94        self.next()
95            .ok_or(ParserError::UnexpectedEndOfInput { expected })
96    }
97
98    /// Try to parse the next token as a `T`. The token is consumed if on success.
99    pub fn try_parse<T: FromStr>(&mut self) -> Result<T, ParserError> {
100        let token = self.peek_or_err("a token")?;
101        let result = token.parse().map_err(|_e| ParserError::WrongTokenType {
102            expected_type: type_name::<T>(),
103            actual: token.to_string(),
104        });
105        if result.is_ok() {
106            let _ = self.next();
107        }
108        result
109    }
110
111    /// Returns true if the parser has reached the end of its input.
112    pub fn is_empty(&self) -> bool {
113        self.str.trim().is_empty()
114    }
115
116    fn split(&self) -> Option<(&'a str, &'a str)> {
117        if self.str.is_empty() {
118            return None;
119        }
120
121        match self
122            .delims
123            .iter()
124            .enumerate()
125            .flat_map(|(index, d)| self.str.find(d).map(|pos| (index, pos)))
126            .min_by_key(|&(_, v)| v)
127        {
128            Some((index, 0)) => Some((self.delims[index], &self.str[self.delims[index].len()..])),
129            Some((_, pos)) => Some((self.str[..pos].trim(), &self.str[pos..])),
130            None => Some((self.str.trim(), "")),
131        }
132    }
133
134    /// Parse a double-quoted string literal, returning the unescaped
135    /// contents. Supports \" \\ \n \r \t escapes. Leaves the rest of
136    /// the input intact.
137    pub fn parse_string_literal(&mut self) -> Result<String, ParserError> {
138        let mut s = self.str;
139
140        if !s.starts_with('"') {
141            let tok = self.peek_or_err("\"")?;
142            return Err(ParserError::WrongToken {
143                expected: "\"",
144                actual: tok.to_string(),
145            });
146        }
147
148        // Skip the opening quote
149        s = &s[1..];
150        let mut out = String::new();
151        let mut consumed = 1; // We already consumed the opening quote.
152
153        let mut chars = s.chars();
154        while let Some(c) = chars.next() {
155            consumed += c.len_utf8();
156            match c {
157                '\\' => {
158                    // Escape sequence.
159                    if let Some(e) = chars.next() {
160                        consumed += e.len_utf8();
161                        match e {
162                            '\\' => out.push('\\'),
163                            '"' => out.push('"'),
164                            'n' => out.push('\n'),
165                            'r' => out.push('\r'),
166                            't' => out.push('\t'),
167                            other => {
168                                // Pass through unknown escapes
169                                // verbatim.
170                                out.push('\\');
171                                out.push(other);
172                            }
173                        }
174                    } else {
175                        return Err(ParserError::UnexpectedEndOfInput {
176                            expected: "escape sequence",
177                        });
178                    }
179                }
180                '"' => {
181                    // closing quote
182                    self.str = &self.str[consumed..];
183                    return Ok(out);
184                }
185                _ => out.push(c),
186            }
187        }
188
189        Err(ParserError::UnexpectedEndOfInput {
190            expected: "closing quote",
191        })
192    }
193}
194
195impl<'a> Iterator for Parser<'a> {
196    type Item = &'a str;
197
198    fn next(&mut self) -> Option<Self::Item> {
199        self.split().map(|(token, rest)| {
200            self.str = rest;
201            token
202        })
203    }
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209
210    #[test]
211    fn test_basic() {
212        let mut p = Parser::new("foo,bar", &[","]);
213        assert_eq!(p.next(), Some("foo"));
214        assert_eq!(p.next(), Some(","));
215        assert_eq!(p.peek(), Some("bar"));
216        assert_eq!(p.next(), Some("bar"));
217        assert_eq!(p.next(), None);
218        assert_eq!(p.peek(), None);
219    }
220}