1use std::iter::Peekable;
12use std::str::Chars;
13use std::sync::LazyLock;
14
15use crate::reference::name::FLICKR_BASE_58;
16use crate::reference::name::Ident;
17use crate::reference::name::Uid;
18
19static FLICKR_BASE_58_ORD: LazyLock<[Option<usize>; 256]> = LazyLock::new(|| {
21 let mut table = [None; 256];
22 for (i, c) in FLICKR_BASE_58.chars().enumerate() {
23 table[c as usize] = Some(i);
24 }
25 table
26});
27
28#[derive(Debug, thiserror::Error, PartialEq, Eq)]
30pub enum ParseError {
31 #[error("expected '{0}', got '{1}'")]
33 Expected(Token, Token),
34}
35
36#[derive(Debug, PartialEq, Eq)]
38pub enum Token {
39 LeftBracket,
41 RightBracket,
43 Uint(usize),
45 Dot,
47 Uid(Uid),
49 Colon,
51 DoubleSlash,
53 BracketTypename(String),
56 Ident(Ident),
58
59 Error(String),
62
63 Eof,
65}
66
67impl Token {
68 pub fn into_left_bracket(self) -> Result<(), ParseError> {
70 match self {
71 Token::LeftBracket => Ok(()),
72 other => Err(ParseError::Expected(Token::LeftBracket, other)),
73 }
74 }
75
76 pub fn into_right_bracket(self) -> Result<(), ParseError> {
78 match self {
79 Token::RightBracket => Ok(()),
80 other => Err(ParseError::Expected(Token::RightBracket, other)),
81 }
82 }
83
84 pub fn into_uint(self) -> Result<usize, ParseError> {
86 match self {
87 Token::Uint(value) => Ok(value),
88 other => Err(ParseError::Expected(Token::Uint(0), other)),
89 }
90 }
91
92 pub fn into_dot(self) -> Result<(), ParseError> {
94 match self {
95 Token::Dot => Ok(()),
96 other => Err(ParseError::Expected(Token::Dot, other)),
97 }
98 }
99
100 pub fn into_uid(self) -> Result<Uid, ParseError> {
102 match self {
103 Token::Uid(uid) => Ok(uid),
104 other => Err(ParseError::Expected(Token::Uid(Uid::zero()), other)),
105 }
106 }
107
108 pub fn into_colon(self) -> Result<(), ParseError> {
110 match self {
111 Token::Colon => Ok(()),
112 other => Err(ParseError::Expected(Token::Colon, other)),
113 }
114 }
115
116 pub fn into_double_slash(self) -> Result<(), ParseError> {
118 match self {
119 Token::DoubleSlash => Ok(()),
120 other => Err(ParseError::Expected(Token::DoubleSlash, other)),
121 }
122 }
123
124 pub fn into_bracket_typename(self) -> Result<String, ParseError> {
126 match self {
127 Token::BracketTypename(value) => Ok(value),
128 other => Err(ParseError::Expected(
129 Token::BracketTypename(String::new()),
130 other,
131 )),
132 }
133 }
134
135 pub fn into_ident(self) -> Result<Ident, ParseError> {
137 match self {
138 Token::Ident(value) => Ok(value),
139 other => Err(ParseError::Expected(
140 Token::Ident("ident".parse().unwrap()),
141 other,
142 )),
143 }
144 }
145
146 pub fn into_error(self) -> Result<String, ParseError> {
148 match self {
149 Token::Error(value) => Ok(value),
150 other => Err(ParseError::Expected(Token::Error(String::new()), other)),
151 }
152 }
153}
154
155impl std::fmt::Display for Token {
156 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
157 match self {
158 Token::LeftBracket => write!(f, "["),
159 Token::RightBracket => write!(f, "]"),
160 Token::Uint(value) => write!(f, "{}", value),
161 Token::Dot => write!(f, "."),
162 Token::Uid(uid) => write!(f, "-{}", uid),
163 Token::Colon => write!(f, ":"),
164 Token::DoubleSlash => write!(f, "//"),
165 Token::BracketTypename(value) => write!(f, "{}", value),
166 Token::Ident(value) => write!(f, "{}", value),
167 Token::Error(value) => write!(f, "<error {}>", value),
168 Token::Eof => write!(f, "<eof>"),
169 }
170 }
171}
172
173#[derive(Default, Debug)]
175pub(crate) enum Lexer<'a> {
176 Next(Peekable<Chars<'a>>),
177 Inbracket(Peekable<Chars<'a>>),
178 #[default]
179 Invalid,
180}
181
182impl<'a> Lexer<'a> {
183 pub(crate) fn new(input: &'a str) -> Self {
185 Lexer::Next(input.chars().peekable())
186 }
187
188 pub(crate) fn next_or_eof(&mut self) -> Token {
191 self.next().unwrap_or(Token::Eof)
192 }
193
194 pub(crate) fn expect(&mut self, token: Token) -> Result<(), ParseError> {
196 let next = self.next_or_eof();
197 if next != token {
198 return Err(ParseError::Expected(token, next));
199 }
200 Ok(())
201 }
202
203 fn parse_uint(iter: &mut Peekable<Chars<'a>>) -> usize {
204 let mut value = iter.next().unwrap().to_digit(10).unwrap() as usize;
205 while let Some(&ch) = iter.peek() {
206 if let Some(d) = ch.to_digit(10) {
207 value = value * 10 + d as usize;
208 iter.next();
209 } else {
210 break;
211 }
212 }
213 value
214 }
215
216 fn parse_ident(iter: &mut Peekable<Chars<'a>>) -> Ident {
217 let mut ident = String::new();
218
219 let ch = iter.next().unwrap();
220 assert!(unicode_ident::is_xid_start(ch) || ch == '_');
221 ident.push(ch);
222
223 while let Some(&ch) = iter.peek()
224 && unicode_ident::is_xid_continue(ch)
225 {
226 ident.push(iter.next().unwrap());
227 }
228 Ident::new(ident).unwrap()
229 }
230
231 fn parse_bracket_typename(iter: &mut Peekable<Chars<'a>>) -> String {
232 let mut ident = String::new();
233
234 let ch = iter.next().unwrap();
235 assert!(unicode_ident::is_xid_start(ch) || ch == '_');
236 ident.push(ch);
237
238 while let Some(&ch) = iter.peek()
239 && (unicode_ident::is_xid_continue(ch) || ch == ':')
240 {
241 ident.push(iter.next().unwrap());
242 }
243 ident
244 }
245
246 fn parse_uid(iter: &mut Peekable<Chars<'a>>) -> Option<Uid> {
247 let base = FLICKR_BASE_58.len() as u64;
248 let mut num = 0u64;
249
250 for _i in 0..12 {
251 let &ch = iter.peek()?;
252 let pos = FLICKR_BASE_58_ORD[ch as usize]?;
253 let _ = iter.next();
254 num *= base;
255 num += pos as u64;
256 }
257
258 Some(num.into())
259 }
260}
261
262impl<'a> Iterator for Lexer<'a> {
263 type Item = Token;
264
265 fn next(&mut self) -> Option<Self::Item> {
266 let (new_state, token) = match std::mem::take(self) {
267 Lexer::Next(mut iter) => match iter.peek()? {
268 '[' => {
269 let _ = iter.next();
270 (Lexer::Inbracket(iter), Some(Token::LeftBracket))
271 }
272 '.' => {
273 let _ = iter.next();
274 (Lexer::Next(iter), Some(Token::Dot))
275 }
276 '/' => {
277 let _ = iter.next();
278 match iter.next() {
279 Some('/') => (Lexer::Next(iter), Some(Token::DoubleSlash)),
280 _ => (Lexer::Invalid, Some(Token::Error(iter.collect()))),
281 }
282 }
283 ':' => {
284 let _ = iter.next();
285 (Lexer::Next(iter), Some(Token::Colon))
286 }
287 '-' => {
288 let _ = iter.next();
289 match Lexer::parse_uid(&mut iter) {
290 Some(uid) => (Lexer::Next(iter), Some(Token::Uid(uid))),
291 None => (Lexer::Invalid, Some(Token::Error(iter.collect()))),
292 }
293 }
294 '0'..='9' => {
296 let uint = Lexer::parse_uint(&mut iter);
297 (Lexer::Next(iter), Some(Token::Uint(uint)))
298 }
299 ch if unicode_ident::is_xid_start(*ch) || *ch == '_' => {
300 let ident = Lexer::parse_ident(&mut iter);
301 (Lexer::Next(iter), Some(Token::Ident(ident)))
302 }
303 _ => (Lexer::Invalid, Some(Token::Error(iter.collect()))),
304 },
305 Lexer::Inbracket(mut iter) => match iter.peek()? {
306 '0'..='9' => {
307 let uint = Lexer::parse_uint(&mut iter);
308 (Lexer::Inbracket(iter), Some(Token::Uint(uint)))
309 }
310 ch if unicode_ident::is_xid_start(*ch) => {
311 let typename = Lexer::parse_bracket_typename(&mut iter);
312 (
313 Lexer::Inbracket(iter),
314 Some(Token::BracketTypename(typename)),
315 )
316 }
317 ':' => {
318 let _ = iter.next();
319 (Lexer::Inbracket(iter), Some(Token::Colon))
320 }
321 ']' => {
322 let _ = iter.next();
323 (Lexer::Next(iter), Some(Token::RightBracket))
324 }
325 _ => (Lexer::Invalid, Some(Token::Error(iter.collect()))),
326 },
327 Lexer::Invalid => (Lexer::Invalid, None),
328 };
329
330 *self = new_state;
331 token
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use Token::*;
338
339 use super::*;
340
341 #[test]
342 fn test_basic() {
343 assert_eq!(
344 Lexer::new("foo.bar[123:foo]//blah").collect::<Vec<_>>(),
345 vec![
346 Ident("foo".parse().unwrap()),
347 Dot,
348 Ident("bar".parse().unwrap()),
349 LeftBracket,
350 Uint(123),
351 Colon,
352 BracketTypename("foo".to_string()),
353 RightBracket,
354 DoubleSlash,
355 Ident("blah".parse().unwrap())
356 ]
357 );
358 assert_eq!(
359 Lexer::new("foo.ba)r[123:foo]//blah").collect::<Vec<_>>(),
360 vec![
361 Ident("foo".parse().unwrap()),
362 Dot,
363 Ident("ba".parse().unwrap()),
364 Error(")r[123:foo]//blah".to_string())
365 ]
366 );
367 }
368}