hyperactor_mesh/value_mesh/
rle.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::mem::replace;
10use std::ops::Range;
11
12/// Run-length encodes a dense sequence into a table of unique values
13/// and a run list, using a caller-provided equivalence predicate.
14///
15/// Consumes the dense `values` vector and produces:
16/// - `table`: the deduplicated values in **first-occurrence order**
17/// - `runs`: a list of `(start..end, id)` where `id` indexes into
18///   `table`
19///
20/// Two adjacent elements `a`, `b` belong to the same run iff `same(a,
21/// b)` returns `true`. In practice `same` should behave like an
22/// equivalence relation over adjacent elements (reflexive and
23/// symmetric; transitive is not required globally because only
24/// **adjacency** is consulted).
25///
26/// # Parameters
27/// - `values`: dense sequence; exactly one value per rank (consumed)
28/// - `same`: predicate deciding adjacency-equality (`same(&a, &b)` =>
29///   merge)
30///
31/// # Returns
32/// `(table, runs)` where:
33/// - `table.len() >= 0`
34/// - `runs` is empty iff `values` is empty
35///
36/// # Invariants on the output
37/// - `runs` is sorted by `start` and **non-overlapping**
38/// - `runs` **exactly covers** `0..values.len()` with contiguous,
39///   non-empty half-open intervals
40/// - Adjacent runs always refer to **different** `id`s under `same`
41/// - For every `(r, id)` in `runs`, `id < table.len()`
42/// - For every index `i` in `r`, `values[i]` is `same`-equal to
43///   `table[id]`
44/// - `table` preserves first-appearance order of distinct (under
45///   `same`) values encountered while scanning left-to-right
46///
47/// # Examples
48/// ```
49/// // values: [A, A, B, B, B, A]
50/// // table:  [A, B, A]
51/// // runs:   [(0..2, 0), (2..5, 1), (5..6, 2)]
52/// ```
53///
54/// Note: `table` preserves **first-appearance order of each distinct
55/// contiguous run**, not global uniqueness - the same logical value
56/// may appear multiple times in `table` if it occurs in disjoint
57/// runs.
58pub fn rle_from_dense<T, F>(values: Vec<T>, mut same: F) -> (Vec<T>, Vec<(Range<usize>, u32)>)
59where
60    F: FnMut(&T, &T) -> bool,
61{
62    if values.is_empty() {
63        return (Vec::new(), Vec::new());
64    }
65
66    // Consume `values` so we can move (not clone) elements into the table.
67    let mut iter = values.into_iter();
68    let first = iter.next().unwrap();
69
70    let mut table: Vec<T> = Vec::new();
71    let mut runs: Vec<(Range<usize>, u32)> = Vec::new();
72
73    let mut start = 0usize;
74    table.push(first); // move, not clone
75    let mut cur_id: u32 = 0;
76    let mut len = 1usize;
77
78    // Walk through all subsequent elements, closing and opening runs
79    // whenever the value changes.
80    for (i, v) in iter.enumerate() {
81        let idx = i + 1;
82        if !same(&v, &table[cur_id as usize]) {
83            runs.push((start..idx, cur_id));
84
85            // Start a new run
86            start = idx;
87            table.push(v); // move, not clone
88            cur_id = (table.len() - 1) as u32;
89        }
90        len = idx + 1;
91    }
92
93    // Close the final run
94    runs.push((start..len, cur_id));
95
96    (table, runs)
97}
98
99/// Converts **normalized** value-bearing runs into a deduplicated
100/// table and a run list, coalescing adjacent runs that carry the same
101/// value.
102///
103/// # Input
104///
105/// - `runs`: a vector of `(start..end, value)` pairs that is already
106///   **normalized**:
107///   - sorted by `(start, end)` in ascending order,
108///   - non-empty (`start < end`),
109///   - **non-overlapping** (touching is allowed).
110///
111/// # Behavior
112///
113/// - Builds `table` by **global** deduplication of values in
114///   first-appearance order. The same logical value appearing in
115///   disjoint places will share a single entry in `table`.
116/// - Produces `out` as a list of `(start..end, id)`, where `id`
117///   indexes into `table`.
118/// - If two consecutive input runs are **touching** (`prev.end ==
119///   next.start`) and their values are `==`, they are **coalesced**
120///   into a single output run referencing the same `id`.
121///
122/// # Output invariants
123/// - `out` is sorted by `start` and **non-overlapping**; touching
124///   runs have different `id`s.
125/// - Each `(r, id)` in `out` satisfies `id < table.len()`.
126/// - `table` preserves **first-appearance order** of distinct values
127///   across the entire input.
128/// - The union of ranges in `out` equals the union of ranges in the
129///   input `runs`.
130///
131/// # Preconditions
132/// This function assumes the input `runs` is normalized as described
133/// above; it does **not** revalidate or resort the input. Supplying
134/// unsorted or overlapping ranges results in unspecified behavior.
135///
136/// # Example
137/// ```
138/// // Input runs (already sorted & non-overlapping):
139/// // [(0..2, A), (2..5, A), (5..6, B), (8..10, A)]
140/// // After coalescing equal-adjacent:
141/// // [(0..5, A), (5..6, B), (8..10, A)]
142/// // table  = [A, B]
143/// // out    = [(0..5, 0), (5..6, 1), (8..10, 0)]
144/// ```
145pub fn rle_from_value_runs<T: Clone + PartialEq>(
146    mut runs: Vec<(Range<usize>, T)>,
147) -> (Vec<T>, Vec<(Range<usize>, u32)>) {
148    if runs.is_empty() {
149        return (Vec::new(), Vec::new());
150    }
151    // Runs are already normalized by caller (sorted,
152    // non-overlapping).
153    let mut table: Vec<T> = Vec::new();
154    let mut out: Vec<(Range<usize>, u32)> = Vec::new();
155
156    let mut push_run = |range: Range<usize>, v: &T| {
157        // De-dup table.
158        let id = if let Some(idx) = table.iter().position(|x| x == v) {
159            idx as u32
160        } else {
161            table.push(v.clone());
162            (table.len() - 1) as u32
163        };
164        // Coalesce equal-adjacent ids.
165        if let Some((last_r, last_id)) = out.last_mut() {
166            if last_r.end == range.start && *last_id == id {
167                last_r.end = range.end;
168                return;
169            }
170        }
171        out.push((range, id));
172    };
173
174    for (r, v) in runs.drain(..) {
175        push_run(r, &v);
176    }
177    (table, out)
178}
179
180/// True iff the two half-open ranges overlap.
181#[inline]
182pub(crate) fn ranges_overlap(a: &Range<usize>, b: &Range<usize>) -> bool {
183    a.start < b.end && b.start < a.end
184}
185
186/// Merge two normalized `(Range, T)` run lists with "right wins"
187/// overwrite semantics.
188///
189/// - Inputs must each be sorted, non-overlapping, coalesced
190///   (equal-adjacent merged).
191/// - Output is sorted, non-overlapping, coalesced.
192/// - On overlaps, `right_in` overwrites `left_in` (last-writer-wins).
193pub fn merge_value_runs<T: Eq + Clone>(
194    left_in: Vec<(Range<usize>, T)>,
195    right_in: Vec<(Range<usize>, T)>,
196) -> Vec<(Range<usize>, T)> {
197    // `out` will hold the merged, coalesced result.
198    let mut out: Vec<(Range<usize>, T)> = Vec::new();
199
200    // Local helper that appends a run butg coalesces equal-adjacent
201    // (like RankedValues::append)
202    let mut append = |range: Range<usize>, value: T| {
203        if let Some((last_r, last_v)) = out.last_mut() {
204            if last_r.end == range.start && *last_v == value {
205                last_r.end = range.end;
206                return;
207            }
208        }
209        out.push((range, value));
210    };
211
212    // Turn each input into forward iterators.
213    let mut left_iter = left_in.into_iter();
214    let mut right_iter = right_in.into_iter();
215
216    // `left` and `right` are the current cursor items
217    // (`Option<(Range, T)>`).
218    let mut left = left_iter.next();
219    let mut right = right_iter.next();
220
221    // Main merge loop: runs as long as both sides have a current
222    // item.
223    //
224    // VO-1 postcondition: all runs emitted so far are sorted, non-overlapping,
225    // and coalesced; `left` and `right` point to the next unprocessed
226    // run on each side.
227    while left.is_some() && right.is_some() {
228        // Mutable refs to the current (range, val) pairs so we can
229        // adjust their start as we carve off emitted pieces.
230        let (left_ranks, left_value) = left.as_mut().unwrap();
231        let (right_ranks, right_value) = right.as_mut().unwrap();
232
233        if ranges_overlap(left_ranks, right_ranks) {
234            // Overlap case (half-open ranges): a.start < b.end &&
235            // b.start < a.end.
236            if *left_value == *right_value {
237                // Equal-value overlap: merge into one unified run.
238                //
239                // We extend the emitted range up to right.end — not
240                // max(left.end, right.end). This choice ensures:
241                //   - The entire right run is consumed and the right
242                //     iterator advances (guaranteeing progress).
243                //   - If the left run extends further, we just shrink
244                //     its start and handle its tail next iteration.
245                // This avoids overlap or lookahead while keeping
246                // output normalized.
247                let ranks = (left_ranks.start.min(right_ranks.start))..right_ranks.end;
248                // `replace` consumes the whole right run and advances
249                // to the next.
250
251                // Consume the right run entirely and move right
252                // forward.
253                let (_, value) = replace(&mut right, right_iter.next()).unwrap();
254                // Advance `left_ranks.start` to the end of what we
255                // just emitted; if left run is now empty, advance the
256                // left iterator.
257                left_ranks.start = ranks.end;
258                if left_ranks.is_empty() {
259                    left = left_iter.next();
260                }
261                // Append (coalescing if it touches the previous
262                // output with the same value).
263                append(ranks, value);
264            } else if left_ranks.start < right_ranks.start {
265                // Different values; left starts first. Emit the
266                // prefix of left up to right.start — this portion
267                // cannot be overwritten and can be finalized.
268                let ranks = left_ranks.start..right_ranks.start;
269                left_ranks.start = ranks.end;
270                append(ranks, left_value.clone());
271            } else {
272                // Different values; right starts earlier or equal.
273                // "Right wins" — emit the right chunk as-is, consuming it
274                // fully and moving left forward as needed. Then make sure
275                // no leftover left segments still overlap that right range.
276                let (ranks, value) = replace(&mut right, right_iter.next()).unwrap();
277
278                // Clamp the current left start so it never extends into
279                // the just-emitted right region.
280                left_ranks.start = left_ranks.start.max(ranks.end);
281                if left_ranks.is_empty() {
282                    left = left_iter.next();
283                }
284
285                // Trim or skip any following left runs that still overlap
286                // this right run's extent.
287                while let Some((next_r, _)) = left.as_mut() {
288                    if next_r.start < ranks.end {
289                        next_r.start = next_r.start.max(ranks.end);
290                        if next_r.is_empty() {
291                            left = left_iter.next();
292                            continue;
293                        }
294                    }
295                    break;
296                }
297                append(ranks, value);
298            }
299        } else if left_ranks.start < right_ranks.start {
300            // No overlap, left starts earlier.
301            let (ranks, value) = replace(&mut left, left_iter.next()).unwrap();
302            append(ranks, value);
303        } else {
304            // Nov overlap, right starts earlier.
305            let (ranks, value) = replace(&mut right, right_iter.next()).unwrap();
306            append(ranks, value);
307        }
308    }
309
310    // Drain whichever side still has runs. They are guaranteed to
311    // follow all previously emitted ranges.
312    while let Some((r, v)) = left {
313        append(r, v);
314        left = left_iter.next();
315    }
316    while let Some((r, v)) = right {
317        append(r, v);
318        right = right_iter.next();
319    }
320
321    // Postcondition: `out` is sorted, non-overlapping, coalesced. All
322    // overlaps resolved by "right-wins" semantics.
323    out
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329
330    #[test]
331    fn merge_disjoint_right_after_left() {
332        let left = vec![(0..5, 1)];
333        let right = vec![(7..9, 2)];
334        let out = merge_value_runs(left, right);
335        assert_eq!(out, vec![(0..5, 1), (7..9, 2)]);
336    }
337
338    #[test]
339    fn merge_disjoint_right_before_left() {
340        let left = vec![(7..9, 1)];
341        let right = vec![(0..5, 2)];
342        let out = merge_value_runs(left, right);
343        assert_eq!(out, vec![(0..5, 2), (7..9, 1)]);
344    }
345
346    #[test]
347    fn overlap_right_wins_simple() {
348        let left = vec![(0..10, 1)];
349        let right = vec![(3..6, 2)];
350        let out = merge_value_runs(left, right);
351        // left prefix, right overwrite, left suffix
352        assert_eq!(out, vec![(0..3, 1), (3..6, 2), (6..10, 1)]);
353    }
354
355    #[test]
356    fn overlap_equal_values_union_to_right_end() {
357        let left = vec![(0..4, 5)];
358        let right = vec![(2..6, 5)];
359        let out = merge_value_runs(left, right);
360        // same value: union emits [0..6) as two pieces depending on
361        // algorithm's "extend to right.end" rule, but coalescing
362        // should produce a single run:
363        assert_eq!(out, vec![(0..6, 5)]);
364    }
365
366    #[test]
367    fn overlap_equal_values_with_left_longer() {
368        let left = vec![(0..8, 5)];
369        let right = vec![(2..6, 5)];
370        let out = merge_value_runs(left, right);
371        // equal case extends to right.end first, then left tail
372        // remains and should coalesce to one; because they’re
373        // touching & equal:
374        assert_eq!(out, vec![(0..8, 5)]);
375    }
376
377    #[test]
378    fn overlap_right_starts_earlier_right_wins() {
379        let left = vec![(4..10, 1)];
380        let right = vec![(2..6, 2)];
381        let out = merge_value_runs(left, right);
382        // Right chunk first, then left remainder:
383        assert_eq!(out, vec![(2..6, 2), (6..10, 1)]);
384    }
385
386    #[test]
387    fn touching_coalesces_equal() {
388        let left = vec![(0..3, 1), (3..6, 1)];
389        let right = vec![];
390        let out = merge_value_runs(left, right);
391        assert_eq!(out, vec![(0..6, 1)]);
392    }
393
394    #[test]
395    fn multi_overlap_mixed_values() {
396        let left = vec![(0..5, 1), (5..10, 1), (10..15, 3)];
397        let right = vec![(3..7, 2), (12..20, 4)];
398        let out = merge_value_runs(left, right);
399        assert_eq!(
400            out,
401            vec![(0..3, 1), (3..7, 2), (7..10, 1), (10..12, 3), (12..20, 4)]
402        );
403    }
404
405    #[test]
406    fn overlap_mixed_values_right_inside_left() {
407        // Right run sits strictly within a left run of a different
408        // value.
409        let left = vec![(0..10, 1)];
410        let right = vec![(3..7, 2)];
411        let out = merge_value_runs(left, right);
412        assert_eq!(
413            out,
414            vec![(0..3, 1), (3..7, 2), (7..10, 1)],
415            "right overwrites interior portion of left run"
416        );
417    }
418
419    #[test]
420    fn overlap_mixed_values_right_spans_multiple_left_runs() {
421        // Right spans two left runs, overwriting parts of both.
422        let left = vec![(0..5, 1), (5..10, 2)];
423        let right = vec![(3..7, 9)];
424        let out = merge_value_runs(left, right);
425        assert_eq!(
426            out,
427            vec![(0..3, 1), (3..7, 9), (7..10, 2)],
428            "right spans two left runs; left prefix/tail preserved"
429        );
430    }
431
432    #[test]
433    fn overlap_mixed_values_right_starts_before_left() {
434        // Right begins before left and overlaps into it.
435        let left = vec![(5..10, 1)];
436        let right = vec![(0..7, 2)];
437        let out = merge_value_runs(left, right);
438        assert_eq!(
439            out,
440            vec![(0..7, 2), (7..10, 1)],
441            "right overwrites head and extends beyond left start"
442        );
443    }
444
445    #[test]
446    fn overlap_mixed_values_right_ends_after_left() {
447        // Right starts inside left but extends beyond its end.
448        let left = vec![(0..5, 1)];
449        let right = vec![(3..8, 2)];
450        let out = merge_value_runs(left, right);
451        assert_eq!(
452            out,
453            vec![(0..3, 1), (3..8, 2)],
454            "right overwrites tail and extends past left"
455        );
456    }
457
458    #[test]
459    fn overlap_mixed_values_multiple_cascading_overlaps() {
460        // Stress: multiple right runs each cutting through several
461        // lefts.
462        let left = vec![(0..4, 1), (4..8, 1), (8..12, 2), (12..16, 3)];
463        let right = vec![(2..6, 9), (10..14, 9)];
464        let out = merge_value_runs(left, right);
465        assert_eq!(
466            out,
467            vec![
468                (0..2, 1),
469                (2..6, 9),
470                (6..8, 1),
471                (8..10, 2),
472                (10..14, 9),
473                (14..16, 3)
474            ],
475            "multiple right runs spanning multiple left runs, non-overlapping output"
476        );
477    }
478
479    #[test]
480    fn empty_inputs() {
481        let out = merge_value_runs::<i32>(vec![], vec![]);
482        assert!(out.is_empty());
483
484        let out = merge_value_runs(vec![(0..2, 9)], vec![]);
485        assert_eq!(out, vec![(0..2, 9)]);
486
487        let out = merge_value_runs(vec![], vec![(5..7, 4)]);
488        assert_eq!(out, vec![(5..7, 4)]);
489    }
490}