monarch_conda/
replace.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::collections::HashMap;
10use std::collections::hash_map::Entry;
11use std::path::Path;
12use std::path::PathBuf;
13
14use aho_corasick::AhoCorasick;
15use aho_corasick::AhoCorasickBuilder;
16use aho_corasick::MatchKind;
17use anyhow::Context;
18use anyhow::Result;
19use anyhow::bail;
20use anyhow::ensure;
21use itertools::Itertools;
22
23pub struct Replacer<'a> {
24    /// Paths and their replacements, in order of longest to shortest (the order in which
25    /// we should perform replacements so that longest prefixes are matched first).
26    sorted_paths: Vec<(&'a Path, &'a Path)>,
27    /// Above paths as bytestrings to be replaced, ordered in a vec for use with
28    /// `AhoCorasick` matcher.
29    needles: Vec<&'a [u8]>,
30    replacements: Vec<&'a [u8]>,
31    /// `AhoCorasick` matcher.
32    matcher: AhoCorasick,
33}
34
35pub struct ReplacerBuilder<'a> {
36    map: HashMap<&'a Path, &'a Path>,
37}
38
39impl<'a> ReplacerBuilder<'a> {
40    pub fn new() -> Self {
41        Self {
42            map: HashMap::new(),
43        }
44    }
45
46    pub fn build_if_non_empty(self) -> Result<Option<Replacer<'a>>> {
47        Ok(if self.map.is_empty() {
48            None
49        } else {
50            Some(Replacer::from(self.map)?)
51        })
52    }
53
54    pub fn build(self) -> Result<Replacer<'a>> {
55        Replacer::from(self.map)
56    }
57
58    pub fn add(&mut self, path: &'a Path, repl: &'a Path) -> Result<()> {
59        match self.map.entry(path) {
60            Entry::Occupied(o) => {
61                if *o.get() != repl {
62                    bail!(
63                        "conflicting replacements for {}: {} != {}",
64                        path.display(),
65                        o.get().display(),
66                        repl.display()
67                    )
68                }
69            }
70            Entry::Vacant(v) => {
71                v.insert(repl);
72            }
73        }
74        Ok(())
75    }
76}
77
78fn replace_bytestring(vec: &mut Vec<u8>, from: &[u8], to: &[u8]) {
79    let mut i = 0;
80    while from.len() + i <= vec.len() {
81        if &vec[i..i + from.len()] == from {
82            vec.splice(i..i + from.len(), to.iter().cloned());
83            i += to.len(); // Skip past the inserted section
84        } else {
85            i += 1;
86        }
87    }
88}
89
90impl<'a> Replacer<'a> {
91    pub fn from(paths: HashMap<&'a Path, &'a Path>) -> Result<Self> {
92        let sorted_paths = paths
93            .iter()
94            .sorted_by_key(|(s, _)| 0 - (s.as_os_str().as_encoded_bytes().len() as isize))
95            .map(|(s, d)| (*s, *d))
96            .collect::<Vec<_>>();
97        let (needles, replacements) = paths
98            .iter()
99            .map(|(s, d)| {
100                (
101                    s.as_os_str().as_encoded_bytes(),
102                    d.as_os_str().as_encoded_bytes(),
103                )
104            })
105            .sorted_by_key(|(s, _)| 0 - (s.len() as isize))
106            .collect::<(Vec<_>, Vec<_>)>();
107
108        // Build AC automaton over all source prefixes.  Use leftmost-longest to
109        // avoid a shorter key stealing a longer one that shares a prefix.
110        //let needles: Vec<&[u8]> = bytes.keys().copied().collect();
111        let matcher = AhoCorasickBuilder::new()
112            .match_kind(MatchKind::LeftmostLongest)
113            .build(&needles)?;
114
115        Ok(Replacer {
116            sorted_paths,
117            needles,
118            replacements,
119            matcher,
120        })
121    }
122
123    /// Perform in-place replacements, where the replacement is padded with nul
124    /// characters to match the length of the replacee.  Fails if the replacement
125    /// is longer than the replacee.
126    pub fn replace_inplace_padded(&self, buf: &mut [u8]) -> Result<()> {
127        if self.needles.is_empty() {
128            return Ok(());
129        }
130
131        let mut offset = 0;
132        while let Some(m) = self.matcher.find(&buf[offset..]) {
133            let buf = &mut buf[offset..];
134
135            let start = m.start();
136            let end = m.end(); // end is exclusive
137            let pat_idx = m.pattern();
138
139            // Check trailing byte condition: `/`, `\0`, or EOF
140            let trailing_ok = match buf.get(end) {
141                None => true, // EOF
142                Some(b) => *b == b'/' || *b == 0,
143            };
144            if !trailing_ok {
145                offset = end + 1;
146                continue;
147            }
148
149            // Copy in the replacement over the original path, making sure that it's not too big.
150            let pat = self.needles[pat_idx];
151            let repl = self.replacements[pat_idx];
152            ensure!(
153                repl.len() <= pat.len(),
154                "Input is longer than target length"
155            );
156            buf[start..start + repl.len()].copy_from_slice(repl);
157            // Find where the nul byte is in the original path, after any path suffixing the prefix
158            // we're replacing.
159            let nul_idx = end + buf[end..].iter().position(|b| *b == 0u8).context("nul")?;
160            // Shift the path suffix over to meet the replacment (in the case where the replacement
161            // is shorter than the original path).
162            buf.copy_within(end..nul_idx, start + repl.len());
163            // Pad the remaining space with nul bytes (in the case where the replacement is shorter
164            // than the original path).
165            buf[(nul_idx - (pat.len() - repl.len()))..nul_idx].fill(0);
166
167            // Safety: lengths are equal by construction
168            offset = nul_idx + 1;
169        }
170
171        Ok(())
172    }
173
174    /// Perform in-place replacements, which may modify the size of the
175    /// bytestring.
176    pub fn replace_inplace(&self, buf: &mut Vec<u8>) {
177        for (src, dst) in self.needles.iter().zip(self.replacements.iter()) {
178            replace_bytestring(buf, src, dst);
179        }
180    }
181
182    /// Replace any matching prefix of the given path.
183    pub fn replace_path(&self, path: PathBuf) -> PathBuf {
184        for (pattern, repl) in self.sorted_paths.iter() {
185            if let Ok(suffix) = path.strip_prefix(pattern) {
186                return repl.join(suffix);
187            }
188        }
189        path
190    }
191}
192
193#[cfg(test)]
194mod tests {
195    use std::path::Path;
196
197    use super::*;
198
199    #[test]
200    fn test_replace_inplace_padded() -> Result<()> {
201        // Create a replacer that replaces "/old/path" with "/new"
202        let mut builder = ReplacerBuilder::new();
203        builder.add(Path::new("/old/path"), Path::new("/new"))?;
204        let replacer = builder.build()?;
205
206        // Test 1: Basic replacement with trailing null byte
207        let mut buf = b"/old/path\0some other data".to_vec();
208        replacer.replace_inplace_padded(&mut buf)?;
209        let expected: &[u8] = b"/new\0\0\0\0\0\0some other data";
210        assert_eq!(buf, expected);
211
212        // Test 2: Replacement with trailing slash
213        let mut buf = b"/old/path/subdir\0".to_vec();
214        replacer.replace_inplace_padded(&mut buf)?;
215        let expected: &[u8] = b"/new/subdir\0\0\0\0\0\0";
216        assert_eq!(buf, expected);
217
218        // Test 3: Replacement at end of buffer (EOF condition)
219        let mut buf = b"/old/path\0".to_vec();
220        replacer.replace_inplace_padded(&mut buf)?;
221        let expected: &[u8] = b"/new\0\0\0\0\0\0";
222        assert_eq!(buf, expected);
223
224        // Test 4: No replacement when trailing byte condition is not met
225        let mut buf = b"/old/pathX".to_vec();
226        replacer.replace_inplace_padded(&mut buf)?;
227        let expected: &[u8] = b"/old/pathX";
228        assert_eq!(buf, expected);
229
230        // Test 5: Multiple replacements in same buffer
231        let mut buf = b"/old/path\0/old/path/subdir\0".to_vec();
232        replacer.replace_inplace_padded(&mut buf)?;
233        let expected: &[u8] = b"/new\0\0\0\0\0\0/new/subdir\0\0\0\0\0\0";
234        assert_eq!(buf, expected);
235
236        // Test 6: Empty buffer
237        let mut buf: Vec<u8> = vec![];
238        replacer.replace_inplace_padded(&mut buf)?;
239        let expected: Vec<u8> = vec![];
240        assert_eq!(buf, expected);
241
242        // Test 7: Buffer without any matches
243        let mut buf = b"no matches here".to_vec();
244        replacer.replace_inplace_padded(&mut buf)?;
245        let expected: &[u8] = b"no matches here";
246        assert_eq!(buf, expected);
247
248        Ok(())
249    }
250
251    #[test]
252    fn test_replace_inplace_padded_empty_replacer() -> Result<()> {
253        // Test with empty replacer (no paths to replace)
254        let builder = ReplacerBuilder::new();
255        let replacer = builder.build()?;
256
257        let mut buf = b"/some/path".to_vec();
258        replacer.replace_inplace_padded(&mut buf)?;
259        let expected: &[u8] = b"/some/path";
260        assert_eq!(buf, expected);
261
262        Ok(())
263    }
264
265    #[test]
266    fn test_replace_inplace_padded_replacement_too_long() -> Result<()> {
267        // Test that replacement fails when replacement is longer than original
268        let mut builder = ReplacerBuilder::new();
269        builder.add(Path::new("/a"), Path::new("/very/long/path"))?;
270
271        // This should fail during padding since "/very/long/path" is longer than "/a"
272        let replacer = builder.build()?;
273        let mut buf = b"/a\0".to_vec();
274        let result = replacer.replace_inplace_padded(&mut buf);
275        assert!(result.is_err());
276
277        Ok(())
278    }
279
280    #[test]
281    fn test_replace_inplace_padded_multiple_patterns() -> Result<()> {
282        // Test with multiple replacement patterns
283        let mut builder = ReplacerBuilder::new();
284        builder.add(Path::new("/old"), Path::new("/new"))?;
285        builder.add(Path::new("/temp"), Path::new("/tmp"))?;
286        let replacer = builder.build()?;
287
288        let mut buf = b"/old/file\0/temp/data\0".to_vec();
289        replacer.replace_inplace_padded(&mut buf)?;
290        let expected: &[u8] = b"/new/file\0/tmp/data\0\0";
291        assert_eq!(buf, expected);
292
293        Ok(())
294    }
295
296    #[test]
297    fn test_replace_inplace() -> Result<()> {
298        // Test replace_inplace which resizes the buffer
299        let mut builder = ReplacerBuilder::new();
300        builder.add(Path::new("/old/path"), Path::new("/new"))?;
301        builder.add(Path::new("/usr/local"), Path::new("/usr"))?;
302        let replacer = builder.build()?;
303
304        // Test 1: Replacement makes buffer smaller
305        let mut buf = b"/old/path/file.txt and /usr/local/bin/prog".to_vec();
306        replacer.replace_inplace(&mut buf);
307        let expected: &[u8] = b"/new/file.txt and /usr/bin/prog";
308        assert_eq!(buf, expected);
309
310        // Test 2: Replacement makes buffer larger
311        let mut builder = ReplacerBuilder::new();
312        builder.add(Path::new("/a"), Path::new("/very/long/path"))?;
313        let replacer = builder.build()?;
314
315        let mut buf = b"/a/file".to_vec();
316        replacer.replace_inplace(&mut buf);
317        let expected: &[u8] = b"/very/long/path/file";
318        assert_eq!(buf, expected);
319
320        // Test 3: Multiple replacements of different sizes
321        let mut builder = ReplacerBuilder::new();
322        builder.add(Path::new("/short"), Path::new("/very/long/replacement"))?;
323        builder.add(Path::new("/long/path/here"), Path::new("/x"))?;
324        let replacer = builder.build()?;
325
326        let mut buf = b"/short and /long/path/here".to_vec();
327        replacer.replace_inplace(&mut buf);
328        let expected: &[u8] = b"/very/long/replacement and /x";
329        assert_eq!(buf, expected);
330
331        // Test 4: Empty buffer
332        let mut buf: Vec<u8> = vec![];
333        replacer.replace_inplace(&mut buf);
334        assert!(buf.is_empty());
335
336        // Test 5: No matches
337        let mut buf = b"no matches in this text".to_vec();
338        replacer.replace_inplace(&mut buf);
339        let expected: &[u8] = b"no matches in this text";
340        assert_eq!(buf, expected);
341
342        Ok(())
343    }
344
345    #[test]
346    fn test_prefix_priority_longer_before_shorter() -> Result<()> {
347        // Test that longer prefixes are matched and replaced before shorter ones
348        let mut builder = ReplacerBuilder::new();
349        builder.add(Path::new("/foo"), Path::new("/short"))?;
350        builder.add(Path::new("/foo/bar"), Path::new("/long"))?;
351        let replacer = builder.build()?;
352
353        // Test 1: Longer prefix should be matched first with replace_inplace
354        let mut buf = b"/foo/bar/file.txt".to_vec();
355        replacer.replace_inplace(&mut buf);
356        let expected: &[u8] = b"/long/file.txt";
357        assert_eq!(buf, expected);
358
359        // Test 2: Shorter prefix should match when longer doesn't
360        let mut buf = b"/foo/baz/file.txt".to_vec();
361        replacer.replace_inplace(&mut buf);
362        let expected: &[u8] = b"/short/baz/file.txt";
363        assert_eq!(buf, expected);
364
365        // Test 3: With replace_inplace_padded
366        let mut buf = b"/foo/bar\0".to_vec();
367        replacer.replace_inplace_padded(&mut buf)?;
368        let expected: &[u8] = b"/long\0\0\0\0";
369        assert_eq!(buf, expected);
370
371        Ok(())
372    }
373
374    #[test]
375    fn test_prefix_priority_complex() -> Result<()> {
376        // Test with multiple overlapping prefixes of different lengths
377        let mut builder = ReplacerBuilder::new();
378        builder.add(Path::new("/a"), Path::new("/1"))?;
379        builder.add(Path::new("/a/b"), Path::new("/2"))?;
380        builder.add(Path::new("/a/b/c"), Path::new("/3"))?;
381        builder.add(Path::new("/a/b/c/d"), Path::new("/4"))?;
382        let replacer = builder.build()?;
383
384        // Test that the longest matching prefix wins
385        let mut buf = b"/a/b/c/d/e/file.txt".to_vec();
386        replacer.replace_inplace(&mut buf);
387        let expected: &[u8] = b"/4/e/file.txt";
388        assert_eq!(buf, expected);
389
390        let mut buf = b"/a/b/c/x/file.txt".to_vec();
391        replacer.replace_inplace(&mut buf);
392        let expected: &[u8] = b"/3/x/file.txt";
393        assert_eq!(buf, expected);
394
395        let mut buf = b"/a/b/x/file.txt".to_vec();
396        replacer.replace_inplace(&mut buf);
397        let expected: &[u8] = b"/2/x/file.txt";
398        assert_eq!(buf, expected);
399
400        let mut buf = b"/a/x/file.txt".to_vec();
401        replacer.replace_inplace(&mut buf);
402        let expected: &[u8] = b"/1/x/file.txt";
403        assert_eq!(buf, expected);
404
405        Ok(())
406    }
407
408    #[test]
409    fn test_prefix_priority_with_path_method() -> Result<()> {
410        // Test that the replace_path method also respects prefix priority
411        let mut builder = ReplacerBuilder::new();
412        builder.add(Path::new("/usr"), Path::new("/system"))?;
413        builder.add(Path::new("/usr/local"), Path::new("/local"))?;
414        builder.add(Path::new("/usr/local/bin"), Path::new("/bin"))?;
415        let replacer = builder.build()?;
416
417        // Longest matching prefix should win
418        let path = PathBuf::from("/usr/local/bin/python");
419        let result = replacer.replace_path(path);
420        assert_eq!(result, PathBuf::from("/bin/python"));
421
422        let path = PathBuf::from("/usr/local/share/data");
423        let result = replacer.replace_path(path);
424        assert_eq!(result, PathBuf::from("/local/share/data"));
425
426        let path = PathBuf::from("/usr/share/data");
427        let result = replacer.replace_path(path);
428        assert_eq!(result, PathBuf::from("/system/share/data"));
429
430        let path = PathBuf::from("/opt/data");
431        let result = replacer.replace_path(path);
432        assert_eq!(result, PathBuf::from("/opt/data")); // No replacement
433
434        Ok(())
435    }
436}