Skip to main content

hyperactor_mesh/value_mesh/
rle.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::mem::replace;
10use std::ops::Range;
11
12/// Run-length encodes a dense sequence into a table of unique values
13/// and a run list, using a caller-provided equivalence predicate.
14///
15/// Consumes the dense `values` vector and produces:
16/// - `table`: the deduplicated values in **first-occurrence order**
17/// - `runs`: a list of `(start..end, id)` where `id` indexes into
18///   `table`
19///
20/// Two adjacent elements `a`, `b` belong to the same run iff `same(a,
21/// b)` returns `true`. In practice `same` should behave like an
22/// equivalence relation over adjacent elements (reflexive and
23/// symmetric; transitive is not required globally because only
24/// **adjacency** is consulted).
25///
26/// # Parameters
27/// - `values`: dense sequence; exactly one value per rank (consumed)
28/// - `same`: predicate deciding adjacency-equality (`same(&a, &b)` =>
29///   merge)
30///
31/// # Returns
32/// `(table, runs)` where:
33/// - `table.len() >= 0`
34/// - `runs` is empty iff `values` is empty
35///
36/// # Invariants on the output
37/// - `runs` is sorted by `start` and **non-overlapping**
38/// - `runs` **exactly covers** `0..values.len()` with contiguous,
39///   non-empty half-open intervals
40/// - Adjacent runs always refer to **different** `id`s under `same`
41/// - For every `(r, id)` in `runs`, `id < table.len()`
42/// - For every index `i` in `r`, `values[i]` is `same`-equal to
43///   `table[id]`
44/// - `table` preserves first-appearance order of distinct (under
45///   `same`) values encountered while scanning left-to-right
46///
47/// # Examples
48/// ```
49/// // values: [A, A, B, B, B, A]
50/// // table:  [A, B, A]
51/// // runs:   [(0..2, 0), (2..5, 1), (5..6, 2)]
52/// ```
53///
54/// Note: `table` preserves **first-appearance order of each distinct
55/// contiguous run**, not global uniqueness - the same logical value
56/// may appear multiple times in `table` if it occurs in disjoint
57/// runs.
58pub fn rle_from_dense<T, F>(values: Vec<T>, mut same: F) -> (Vec<T>, Vec<(Range<usize>, u32)>)
59where
60    F: FnMut(&T, &T) -> bool,
61{
62    if values.is_empty() {
63        return (Vec::new(), Vec::new());
64    }
65
66    // Consume `values` so we can move (not clone) elements into the table.
67    let mut iter = values.into_iter();
68    let first = iter.next().unwrap();
69
70    let mut table: Vec<T> = Vec::new();
71    let mut runs: Vec<(Range<usize>, u32)> = Vec::new();
72
73    let mut start = 0usize;
74    table.push(first); // move, not clone
75    let mut cur_id: u32 = 0;
76    let mut len = 1usize;
77
78    // Walk through all subsequent elements, closing and opening runs
79    // whenever the value changes.
80    for (i, v) in iter.enumerate() {
81        let idx = i + 1;
82        if !same(&v, &table[cur_id as usize]) {
83            runs.push((start..idx, cur_id));
84
85            // Start a new run
86            start = idx;
87            table.push(v); // move, not clone
88            cur_id = (table.len() - 1) as u32;
89        }
90        len = idx + 1;
91    }
92
93    // Close the final run
94    runs.push((start..len, cur_id));
95
96    (table, runs)
97}
98
99/// Converts **normalized** value-bearing runs into a deduplicated
100/// table and a run list, coalescing adjacent runs that carry the same
101/// value.
102///
103/// # Input
104///
105/// - `runs`: a vector of `(start..end, value)` pairs that is already
106///   **normalized**:
107///   - sorted by `(start, end)` in ascending order,
108///   - non-empty (`start < end`),
109///   - **non-overlapping** (touching is allowed).
110///
111/// # Behavior
112///
113/// - Builds `table` by **global** deduplication of values in
114///   first-appearance order. The same logical value appearing in
115///   disjoint places will share a single entry in `table`.
116/// - Produces `out` as a list of `(start..end, id)`, where `id`
117///   indexes into `table`.
118/// - If two consecutive input runs are **touching** (`prev.end ==
119///   next.start`) and their values are `==`, they are **coalesced**
120///   into a single output run referencing the same `id`.
121///
122/// # Output invariants
123/// - `out` is sorted by `start` and **non-overlapping**; touching
124///   runs have different `id`s.
125/// - Each `(r, id)` in `out` satisfies `id < table.len()`.
126/// - `table` preserves **first-appearance order** of distinct values
127///   across the entire input.
128/// - The union of ranges in `out` equals the union of ranges in the
129///   input `runs`.
130///
131/// # Preconditions
132/// This function assumes the input `runs` is normalized as described
133/// above; it does **not** revalidate or resort the input. Supplying
134/// unsorted or overlapping ranges results in unspecified behavior.
135///
136/// # Example
137/// ```
138/// // Input runs (already sorted & non-overlapping):
139/// // [(0..2, A), (2..5, A), (5..6, B), (8..10, A)]
140/// // After coalescing equal-adjacent:
141/// // [(0..5, A), (5..6, B), (8..10, A)]
142/// // table  = [A, B]
143/// // out    = [(0..5, 0), (5..6, 1), (8..10, 0)]
144/// ```
145pub fn rle_from_value_runs<T: Clone + PartialEq>(
146    mut runs: Vec<(Range<usize>, T)>,
147) -> (Vec<T>, Vec<(Range<usize>, u32)>) {
148    if runs.is_empty() {
149        return (Vec::new(), Vec::new());
150    }
151    // Runs are already normalized by caller (sorted,
152    // non-overlapping).
153    let mut table: Vec<T> = Vec::new();
154    let mut out: Vec<(Range<usize>, u32)> = Vec::new();
155
156    let mut push_run = |range: Range<usize>, v: &T| {
157        // De-dup table.
158        let id = if let Some(idx) = table.iter().position(|x| x == v) {
159            idx as u32
160        } else {
161            table.push(v.clone());
162            (table.len() - 1) as u32
163        };
164        // Coalesce equal-adjacent ids.
165        if let Some((last_r, last_id)) = out.last_mut()
166            && last_r.end == range.start
167            && *last_id == id
168        {
169            last_r.end = range.end;
170            return;
171        }
172        out.push((range, id));
173    };
174
175    for (r, v) in runs.drain(..) {
176        push_run(r, &v);
177    }
178    (table, out)
179}
180
181/// True iff the two half-open ranges overlap.
182#[inline]
183pub(crate) fn ranges_overlap(a: &Range<usize>, b: &Range<usize>) -> bool {
184    a.start < b.end && b.start < a.end
185}
186
187/// Merge two normalized `(Range, T)` run lists with "right wins"
188/// overwrite semantics.
189///
190/// - Inputs must each be sorted, non-overlapping, coalesced
191///   (equal-adjacent merged).
192/// - Output is sorted, non-overlapping, coalesced.
193/// - On overlaps, `right_in` overwrites `left_in` (last-writer-wins).
194pub fn merge_value_runs<T: Eq + Clone>(
195    left_in: Vec<(Range<usize>, T)>,
196    right_in: Vec<(Range<usize>, T)>,
197) -> Vec<(Range<usize>, T)> {
198    // `out` will hold the merged, coalesced result.
199    let mut out: Vec<(Range<usize>, T)> = Vec::new();
200
201    // Local helper that appends a run butg coalesces equal-adjacent
202    // (like RankedValues::append)
203    let mut append = |range: Range<usize>, value: T| {
204        if let Some((last_r, last_v)) = out.last_mut()
205            && last_r.end == range.start
206            && *last_v == value
207        {
208            last_r.end = range.end;
209            return;
210        }
211        out.push((range, value));
212    };
213
214    // Turn each input into forward iterators.
215    let mut left_iter = left_in.into_iter();
216    let mut right_iter = right_in.into_iter();
217
218    // `left` and `right` are the current cursor items
219    // (`Option<(Range, T)>`).
220    let mut left = left_iter.next();
221    let mut right = right_iter.next();
222
223    // Main merge loop: runs as long as both sides have a current
224    // item.
225    //
226    // VO-1 postcondition: all runs emitted so far are sorted, non-overlapping,
227    // and coalesced; `left` and `right` point to the next unprocessed
228    // run on each side.
229    while left.is_some() && right.is_some() {
230        // Mutable refs to the current (range, val) pairs so we can
231        // adjust their start as we carve off emitted pieces.
232        let (left_ranks, left_value) = left.as_mut().unwrap();
233        let (right_ranks, right_value) = right.as_mut().unwrap();
234
235        if ranges_overlap(left_ranks, right_ranks) {
236            // Overlap case (half-open ranges): a.start < b.end &&
237            // b.start < a.end.
238            if *left_value == *right_value {
239                // Equal-value overlap: merge into one unified run.
240                //
241                // We extend the emitted range up to right.end — not
242                // max(left.end, right.end). This choice ensures:
243                //   - The entire right run is consumed and the right
244                //     iterator advances (guaranteeing progress).
245                //   - If the left run extends further, we just shrink
246                //     its start and handle its tail next iteration.
247                // This avoids overlap or lookahead while keeping
248                // output normalized.
249                let ranks = (left_ranks.start.min(right_ranks.start))..right_ranks.end;
250                // `replace` consumes the whole right run and advances
251                // to the next.
252
253                // Consume the right run entirely and move right
254                // forward.
255                let (_, value) = replace(&mut right, right_iter.next()).unwrap();
256                // Advance `left_ranks.start` to the end of what we
257                // just emitted; if left run is now empty, advance the
258                // left iterator.
259                left_ranks.start = ranks.end;
260                if left_ranks.is_empty() {
261                    left = left_iter.next();
262                }
263                // Append (coalescing if it touches the previous
264                // output with the same value).
265                append(ranks, value);
266            } else if left_ranks.start < right_ranks.start {
267                // Different values; left starts first. Emit the
268                // prefix of left up to right.start — this portion
269                // cannot be overwritten and can be finalized.
270                let ranks = left_ranks.start..right_ranks.start;
271                left_ranks.start = ranks.end;
272                append(ranks, left_value.clone());
273            } else {
274                // Different values; right starts earlier or equal.
275                // "Right wins" — emit the right chunk as-is, consuming it
276                // fully and moving left forward as needed. Then make sure
277                // no leftover left segments still overlap that right range.
278                let (ranks, value) = replace(&mut right, right_iter.next()).unwrap();
279
280                // Clamp the current left start so it never extends into
281                // the just-emitted right region.
282                left_ranks.start = left_ranks.start.max(ranks.end);
283                if left_ranks.is_empty() {
284                    left = left_iter.next();
285                }
286
287                // Trim or skip any following left runs that still overlap
288                // this right run's extent.
289                while let Some((next_r, _)) = left.as_mut() {
290                    if next_r.start < ranks.end {
291                        next_r.start = next_r.start.max(ranks.end);
292                        if next_r.is_empty() {
293                            left = left_iter.next();
294                            continue;
295                        }
296                    }
297                    break;
298                }
299                append(ranks, value);
300            }
301        } else if left_ranks.start < right_ranks.start {
302            // No overlap, left starts earlier.
303            let (ranks, value) = replace(&mut left, left_iter.next()).unwrap();
304            append(ranks, value);
305        } else {
306            // Nov overlap, right starts earlier.
307            let (ranks, value) = replace(&mut right, right_iter.next()).unwrap();
308            append(ranks, value);
309        }
310    }
311
312    // Drain whichever side still has runs. They are guaranteed to
313    // follow all previously emitted ranges.
314    while let Some((r, v)) = left {
315        append(r, v);
316        left = left_iter.next();
317    }
318    while let Some((r, v)) = right {
319        append(r, v);
320        right = right_iter.next();
321    }
322
323    // Postcondition: `out` is sorted, non-overlapping, coalesced. All
324    // overlaps resolved by "right-wins" semantics.
325    out
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn merge_disjoint_right_after_left() {
334        let left = vec![(0..5, 1)];
335        let right = vec![(7..9, 2)];
336        let out = merge_value_runs(left, right);
337        assert_eq!(out, vec![(0..5, 1), (7..9, 2)]);
338    }
339
340    #[test]
341    fn merge_disjoint_right_before_left() {
342        let left = vec![(7..9, 1)];
343        let right = vec![(0..5, 2)];
344        let out = merge_value_runs(left, right);
345        assert_eq!(out, vec![(0..5, 2), (7..9, 1)]);
346    }
347
348    #[test]
349    fn overlap_right_wins_simple() {
350        let left = vec![(0..10, 1)];
351        let right = vec![(3..6, 2)];
352        let out = merge_value_runs(left, right);
353        // left prefix, right overwrite, left suffix
354        assert_eq!(out, vec![(0..3, 1), (3..6, 2), (6..10, 1)]);
355    }
356
357    #[test]
358    fn overlap_equal_values_union_to_right_end() {
359        let left = vec![(0..4, 5)];
360        let right = vec![(2..6, 5)];
361        let out = merge_value_runs(left, right);
362        // same value: union emits [0..6) as two pieces depending on
363        // algorithm's "extend to right.end" rule, but coalescing
364        // should produce a single run:
365        assert_eq!(out, vec![(0..6, 5)]);
366    }
367
368    #[test]
369    fn overlap_equal_values_with_left_longer() {
370        let left = vec![(0..8, 5)];
371        let right = vec![(2..6, 5)];
372        let out = merge_value_runs(left, right);
373        // equal case extends to right.end first, then left tail
374        // remains and should coalesce to one; because they’re
375        // touching & equal:
376        assert_eq!(out, vec![(0..8, 5)]);
377    }
378
379    #[test]
380    fn overlap_right_starts_earlier_right_wins() {
381        let left = vec![(4..10, 1)];
382        let right = vec![(2..6, 2)];
383        let out = merge_value_runs(left, right);
384        // Right chunk first, then left remainder:
385        assert_eq!(out, vec![(2..6, 2), (6..10, 1)]);
386    }
387
388    #[test]
389    fn touching_coalesces_equal() {
390        let left = vec![(0..3, 1), (3..6, 1)];
391        let right = vec![];
392        let out = merge_value_runs(left, right);
393        assert_eq!(out, vec![(0..6, 1)]);
394    }
395
396    #[test]
397    fn multi_overlap_mixed_values() {
398        let left = vec![(0..5, 1), (5..10, 1), (10..15, 3)];
399        let right = vec![(3..7, 2), (12..20, 4)];
400        let out = merge_value_runs(left, right);
401        assert_eq!(
402            out,
403            vec![(0..3, 1), (3..7, 2), (7..10, 1), (10..12, 3), (12..20, 4)]
404        );
405    }
406
407    #[test]
408    fn overlap_mixed_values_right_inside_left() {
409        // Right run sits strictly within a left run of a different
410        // value.
411        let left = vec![(0..10, 1)];
412        let right = vec![(3..7, 2)];
413        let out = merge_value_runs(left, right);
414        assert_eq!(
415            out,
416            vec![(0..3, 1), (3..7, 2), (7..10, 1)],
417            "right overwrites interior portion of left run"
418        );
419    }
420
421    #[test]
422    fn overlap_mixed_values_right_spans_multiple_left_runs() {
423        // Right spans two left runs, overwriting parts of both.
424        let left = vec![(0..5, 1), (5..10, 2)];
425        let right = vec![(3..7, 9)];
426        let out = merge_value_runs(left, right);
427        assert_eq!(
428            out,
429            vec![(0..3, 1), (3..7, 9), (7..10, 2)],
430            "right spans two left runs; left prefix/tail preserved"
431        );
432    }
433
434    #[test]
435    fn overlap_mixed_values_right_starts_before_left() {
436        // Right begins before left and overlaps into it.
437        let left = vec![(5..10, 1)];
438        let right = vec![(0..7, 2)];
439        let out = merge_value_runs(left, right);
440        assert_eq!(
441            out,
442            vec![(0..7, 2), (7..10, 1)],
443            "right overwrites head and extends beyond left start"
444        );
445    }
446
447    #[test]
448    fn overlap_mixed_values_right_ends_after_left() {
449        // Right starts inside left but extends beyond its end.
450        let left = vec![(0..5, 1)];
451        let right = vec![(3..8, 2)];
452        let out = merge_value_runs(left, right);
453        assert_eq!(
454            out,
455            vec![(0..3, 1), (3..8, 2)],
456            "right overwrites tail and extends past left"
457        );
458    }
459
460    #[test]
461    fn overlap_mixed_values_multiple_cascading_overlaps() {
462        // Stress: multiple right runs each cutting through several
463        // lefts.
464        let left = vec![(0..4, 1), (4..8, 1), (8..12, 2), (12..16, 3)];
465        let right = vec![(2..6, 9), (10..14, 9)];
466        let out = merge_value_runs(left, right);
467        assert_eq!(
468            out,
469            vec![
470                (0..2, 1),
471                (2..6, 9),
472                (6..8, 1),
473                (8..10, 2),
474                (10..14, 9),
475                (14..16, 3)
476            ],
477            "multiple right runs spanning multiple left runs, non-overlapping output"
478        );
479    }
480
481    #[test]
482    fn empty_inputs() {
483        let out = merge_value_runs::<i32>(vec![], vec![]);
484        assert!(out.is_empty());
485
486        let out = merge_value_runs(vec![(0..2, 9)], vec![]);
487        assert_eq!(out, vec![(0..2, 9)]);
488
489        let out = merge_value_runs(vec![], vec![(5..7, 4)]);
490        assert_eq!(out, vec![(5..7, 4)]);
491    }
492}