1use std::collections::VecDeque;
13
14#[derive(thiserror::Error, Debug, PartialEq, Eq)]
16pub enum ParseError {
17 #[error("expected '{0}', got '{1}'")]
18 Expected(String, String),
19
20 #[error("invalid token '{0}'")]
21 InvalidToken(String),
22}
23
24#[derive(Debug, PartialEq, Eq)]
26pub enum Token<'a> {
27 LeftBracket,
29 RightBracket,
31 LessThan,
33 GreaterThan,
35 Uint(usize),
37 Elem(&'a str),
39 At,
41 Dot,
43 Comma,
45
46 InvalidElem(&'a str),
49}
50
51pub struct Lexer<'a> {
54 tokens: VecDeque<&'a str>,
55}
56
57impl<'a> Lexer<'a> {
58 pub fn new(input: &'a str) -> Self {
60 Self {
61 tokens: chop(input, &["[", "]", "<", ">", ".", "@", ","]).collect(),
64 }
65 }
66}
67
68impl<'a> Iterator for Lexer<'a> {
69 type Item = Token<'a>;
70
71 fn next(&mut self) -> Option<Token<'a>> {
72 match self.tokens.pop_front() {
73 None => None,
74 Some("[") => Some(Token::LeftBracket),
75 Some("]") => Some(Token::RightBracket),
76 Some("<") => Some(Token::LessThan),
77 Some(">") => Some(Token::GreaterThan),
78 Some("@") => Some(Token::At),
79 Some(".") => Some(Token::Dot),
80 Some(",") => Some(Token::Comma),
81 Some(elem) => Some({
82 if let Ok(uint) = elem.parse::<usize>() {
83 Token::Uint(uint)
84 } else if is_valid_elem(elem) {
85 Token::Elem(elem)
86 } else {
87 Token::InvalidElem(elem)
88 }
89 }),
90 }
91 }
92}
93
94macro_rules! parse {
109 (
110 $token_stream:expr;
111 $(
112 $($pattern:pat_param)* => $constructor:expr
113 ),* $(,)?
114 ) => {
115 {
116 let tokens = $token_stream.collect::<Vec<_>>();
117 let result = match tokens[..] {
118 $([$( $pattern ),+] => Some($constructor),)*
119 _ => None,
120 };
121 result.ok_or_else(|| {
122 let all_patterns = vec![$(stringify!($($pattern)*),)*];
123 ParseError::Expected(
124 all_patterns.join(" or "),
125 tokens.iter().map(|tok| format!("{:?}", tok)).collect::<Vec<_>>().join(" "))
126 })
127 }
128 };
129}
130pub(crate) use parse;
131
132fn chop<'a>(mut s: &'a str, delims: &'a [&'a str]) -> impl Iterator<Item = &'a str> + 'a {
134 std::iter::from_fn(move || {
135 if s.is_empty() {
136 return None;
137 }
138
139 match delims
140 .iter()
141 .enumerate()
142 .flat_map(|(index, d)| s.find(d).map(|pos| (index, pos)))
143 .min_by_key(|&(_, v)| v)
144 {
145 Some((index, 0)) => {
146 let delim = delims[index];
147 s = &s[delim.len()..];
148 Some(delim)
149 }
150 Some((_, pos)) => {
151 let token = &s[..pos];
152 s = &s[pos..];
153 Some(token.trim())
154 }
155 None => {
156 let token = s;
157 s = "";
158 Some(token.trim())
159 }
160 }
161 })
162}
163
164pub fn is_valid(token: &str, is_continue: fn(char) -> bool) -> bool {
165 if token.starts_with("r#") || token.is_empty() {
167 return false;
168 }
169 let mut chars = token.chars();
170 let mut first = true;
171 while let Some(ch) = chars.next() {
172 let valid = if ch == ':' {
173 chars.next() == Some(':')
174 } else if first {
175 ch == '_' || unicode_ident::is_xid_start(ch)
176 } else {
177 is_continue(ch)
178 };
179 if !valid {
180 return false;
181 }
182 first = false;
183 }
184 true
185}
186
187pub fn is_valid_ident(token: &str) -> bool {
194 is_valid(token, unicode_ident::is_xid_continue)
195}
196
197fn is_valid_elem(token: &str) -> bool {
201 fn is_continue(ch: char) -> bool {
202 ch == '-' || unicode_ident::is_xid_continue(ch)
203 }
204 is_valid(token, is_continue)
205}
206
207#[cfg(test)]
208mod tests {
209 use std::assert_matches::assert_matches;
210
211 use super::*;
212
213 #[test]
214 fn test_lexer() {
215 let tokens = Lexer::new("foo.bar[123],baz");
216 assert_eq!(
217 tokens.collect::<Vec<Token>>(),
218 vec![
219 Token::Elem("foo"),
220 Token::Dot,
221 Token::Elem("bar"),
222 Token::LeftBracket,
223 Token::Uint(123),
224 Token::RightBracket,
225 Token::Comma,
226 Token::Elem("baz"),
227 ]
228 )
229 }
230
231 #[test]
232 fn test_valid_idents() {
233 let idents = vec![
234 "foo", "foo_bar", "東京", "_foo", "foo-bar", "::foo", "foo::bar",
235 ];
236
237 for ident in idents {
238 let tokens = Lexer::new(ident);
239
240 assert_matches!(tokens.collect::<Vec<Token>>()[..], [Token::Elem(ident_)] if ident_ == ident,);
241 }
242 }
243
244 #[test]
245 fn test_invalid_idents() {
246 let idents = vec!["-bar", "foo/bar", "r#true"];
247
248 for ident in idents {
249 let tokens = Lexer::new(ident);
250 assert_matches!(
251 &tokens.collect::<Vec<Token>>()[..],
252 [Token::InvalidElem(ident_)] if *ident_ == ident,
253 );
254 }
255 }
256
257 #[test]
258 fn test_parse() {
259 let tokens = Lexer::new("foo.bar[123]");
260 let parsed = parse!(
261 tokens;
262 Token::Elem(first) Token::Dot Token::Elem(second)
263 Token::LeftBracket Token::Uint(num) Token::RightBracket => (first, second, num)
264 );
265 assert_eq!(parsed.unwrap(), ("foo", "bar", 123usize));
266 }
267
268 #[test]
269 fn test_parse_failure() {
270 let tokens = Lexer::new("foo.bar[123]");
271 let parsed = parse!(
272 tokens;
273 Token::Elem(first) Token::Elem(second)
274 Token::LeftBracket Token::Uint(num) Token::RightBracket => (first, second, num)
275 );
276 assert!(parsed.is_err())
277 }
278}