hyperactor_mesh/value_mesh/rle.rs
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::mem::replace;
10use std::ops::Range;
11
12/// Run-length encodes a dense sequence into a table of unique values
13/// and a run list, using a caller-provided equivalence predicate.
14///
15/// Consumes the dense `values` vector and produces:
16/// - `table`: the deduplicated values in **first-occurrence order**
17/// - `runs`: a list of `(start..end, id)` where `id` indexes into
18/// `table`
19///
20/// Two adjacent elements `a`, `b` belong to the same run iff `same(a,
21/// b)` returns `true`. In practice `same` should behave like an
22/// equivalence relation over adjacent elements (reflexive and
23/// symmetric; transitive is not required globally because only
24/// **adjacency** is consulted).
25///
26/// # Parameters
27/// - `values`: dense sequence; exactly one value per rank (consumed)
28/// - `same`: predicate deciding adjacency-equality (`same(&a, &b)` =>
29/// merge)
30///
31/// # Returns
32/// `(table, runs)` where:
33/// - `table.len() >= 0`
34/// - `runs` is empty iff `values` is empty
35///
36/// # Invariants on the output
37/// - `runs` is sorted by `start` and **non-overlapping**
38/// - `runs` **exactly covers** `0..values.len()` with contiguous,
39/// non-empty half-open intervals
40/// - Adjacent runs always refer to **different** `id`s under `same`
41/// - For every `(r, id)` in `runs`, `id < table.len()`
42/// - For every index `i` in `r`, `values[i]` is `same`-equal to
43/// `table[id]`
44/// - `table` preserves first-appearance order of distinct (under
45/// `same`) values encountered while scanning left-to-right
46///
47/// # Examples
48/// ```
49/// // values: [A, A, B, B, B, A]
50/// // table: [A, B, A]
51/// // runs: [(0..2, 0), (2..5, 1), (5..6, 2)]
52/// ```
53///
54/// Note: `table` preserves **first-appearance order of each distinct
55/// contiguous run**, not global uniqueness - the same logical value
56/// may appear multiple times in `table` if it occurs in disjoint
57/// runs.
58pub fn rle_from_dense<T, F>(values: Vec<T>, mut same: F) -> (Vec<T>, Vec<(Range<usize>, u32)>)
59where
60 F: FnMut(&T, &T) -> bool,
61{
62 if values.is_empty() {
63 return (Vec::new(), Vec::new());
64 }
65
66 // Consume `values` so we can move (not clone) elements into the table.
67 let mut iter = values.into_iter();
68 let first = iter.next().unwrap();
69
70 let mut table: Vec<T> = Vec::new();
71 let mut runs: Vec<(Range<usize>, u32)> = Vec::new();
72
73 let mut start = 0usize;
74 table.push(first); // move, not clone
75 let mut cur_id: u32 = 0;
76 let mut len = 1usize;
77
78 // Walk through all subsequent elements, closing and opening runs
79 // whenever the value changes.
80 for (i, v) in iter.enumerate() {
81 let idx = i + 1;
82 if !same(&v, &table[cur_id as usize]) {
83 runs.push((start..idx, cur_id));
84
85 // Start a new run
86 start = idx;
87 table.push(v); // move, not clone
88 cur_id = (table.len() - 1) as u32;
89 }
90 len = idx + 1;
91 }
92
93 // Close the final run
94 runs.push((start..len, cur_id));
95
96 (table, runs)
97}
98
99/// Converts **normalized** value-bearing runs into a deduplicated
100/// table and a run list, coalescing adjacent runs that carry the same
101/// value.
102///
103/// # Input
104///
105/// - `runs`: a vector of `(start..end, value)` pairs that is already
106/// **normalized**:
107/// - sorted by `(start, end)` in ascending order,
108/// - non-empty (`start < end`),
109/// - **non-overlapping** (touching is allowed).
110///
111/// # Behavior
112///
113/// - Builds `table` by **global** deduplication of values in
114/// first-appearance order. The same logical value appearing in
115/// disjoint places will share a single entry in `table`.
116/// - Produces `out` as a list of `(start..end, id)`, where `id`
117/// indexes into `table`.
118/// - If two consecutive input runs are **touching** (`prev.end ==
119/// next.start`) and their values are `==`, they are **coalesced**
120/// into a single output run referencing the same `id`.
121///
122/// # Output invariants
123/// - `out` is sorted by `start` and **non-overlapping**; touching
124/// runs have different `id`s.
125/// - Each `(r, id)` in `out` satisfies `id < table.len()`.
126/// - `table` preserves **first-appearance order** of distinct values
127/// across the entire input.
128/// - The union of ranges in `out` equals the union of ranges in the
129/// input `runs`.
130///
131/// # Preconditions
132/// This function assumes the input `runs` is normalized as described
133/// above; it does **not** revalidate or resort the input. Supplying
134/// unsorted or overlapping ranges results in unspecified behavior.
135///
136/// # Example
137/// ```
138/// // Input runs (already sorted & non-overlapping):
139/// // [(0..2, A), (2..5, A), (5..6, B), (8..10, A)]
140/// // After coalescing equal-adjacent:
141/// // [(0..5, A), (5..6, B), (8..10, A)]
142/// // table = [A, B]
143/// // out = [(0..5, 0), (5..6, 1), (8..10, 0)]
144/// ```
145pub fn rle_from_value_runs<T: Clone + PartialEq>(
146 mut runs: Vec<(Range<usize>, T)>,
147) -> (Vec<T>, Vec<(Range<usize>, u32)>) {
148 if runs.is_empty() {
149 return (Vec::new(), Vec::new());
150 }
151 // Runs are already normalized by caller (sorted,
152 // non-overlapping).
153 let mut table: Vec<T> = Vec::new();
154 let mut out: Vec<(Range<usize>, u32)> = Vec::new();
155
156 let mut push_run = |range: Range<usize>, v: &T| {
157 // De-dup table.
158 let id = if let Some(idx) = table.iter().position(|x| x == v) {
159 idx as u32
160 } else {
161 table.push(v.clone());
162 (table.len() - 1) as u32
163 };
164 // Coalesce equal-adjacent ids.
165 if let Some((last_r, last_id)) = out.last_mut()
166 && last_r.end == range.start
167 && *last_id == id
168 {
169 last_r.end = range.end;
170 return;
171 }
172 out.push((range, id));
173 };
174
175 for (r, v) in runs.drain(..) {
176 push_run(r, &v);
177 }
178 (table, out)
179}
180
181/// True iff the two half-open ranges overlap.
182#[inline]
183pub(crate) fn ranges_overlap(a: &Range<usize>, b: &Range<usize>) -> bool {
184 a.start < b.end && b.start < a.end
185}
186
187/// Merge two normalized `(Range, T)` run lists with "right wins"
188/// overwrite semantics.
189///
190/// - Inputs must each be sorted, non-overlapping, coalesced
191/// (equal-adjacent merged).
192/// - Output is sorted, non-overlapping, coalesced.
193/// - On overlaps, `right_in` overwrites `left_in` (last-writer-wins).
194pub fn merge_value_runs<T: Eq + Clone>(
195 left_in: Vec<(Range<usize>, T)>,
196 right_in: Vec<(Range<usize>, T)>,
197) -> Vec<(Range<usize>, T)> {
198 // `out` will hold the merged, coalesced result.
199 let mut out: Vec<(Range<usize>, T)> = Vec::new();
200
201 // Local helper that appends a run butg coalesces equal-adjacent
202 // (like RankedValues::append)
203 let mut append = |range: Range<usize>, value: T| {
204 if let Some((last_r, last_v)) = out.last_mut()
205 && last_r.end == range.start
206 && *last_v == value
207 {
208 last_r.end = range.end;
209 return;
210 }
211 out.push((range, value));
212 };
213
214 // Turn each input into forward iterators.
215 let mut left_iter = left_in.into_iter();
216 let mut right_iter = right_in.into_iter();
217
218 // `left` and `right` are the current cursor items
219 // (`Option<(Range, T)>`).
220 let mut left = left_iter.next();
221 let mut right = right_iter.next();
222
223 // Main merge loop: runs as long as both sides have a current
224 // item.
225 //
226 // VO-1 postcondition: all runs emitted so far are sorted, non-overlapping,
227 // and coalesced; `left` and `right` point to the next unprocessed
228 // run on each side.
229 while left.is_some() && right.is_some() {
230 // Mutable refs to the current (range, val) pairs so we can
231 // adjust their start as we carve off emitted pieces.
232 let (left_ranks, left_value) = left.as_mut().unwrap();
233 let (right_ranks, right_value) = right.as_mut().unwrap();
234
235 if ranges_overlap(left_ranks, right_ranks) {
236 // Overlap case (half-open ranges): a.start < b.end &&
237 // b.start < a.end.
238 if *left_value == *right_value {
239 // Equal-value overlap: merge into one unified run.
240 //
241 // We extend the emitted range up to right.end — not
242 // max(left.end, right.end). This choice ensures:
243 // - The entire right run is consumed and the right
244 // iterator advances (guaranteeing progress).
245 // - If the left run extends further, we just shrink
246 // its start and handle its tail next iteration.
247 // This avoids overlap or lookahead while keeping
248 // output normalized.
249 let ranks = (left_ranks.start.min(right_ranks.start))..right_ranks.end;
250 // `replace` consumes the whole right run and advances
251 // to the next.
252
253 // Consume the right run entirely and move right
254 // forward.
255 let (_, value) = replace(&mut right, right_iter.next()).unwrap();
256 // Advance `left_ranks.start` to the end of what we
257 // just emitted; if left run is now empty, advance the
258 // left iterator.
259 left_ranks.start = ranks.end;
260 if left_ranks.is_empty() {
261 left = left_iter.next();
262 }
263 // Append (coalescing if it touches the previous
264 // output with the same value).
265 append(ranks, value);
266 } else if left_ranks.start < right_ranks.start {
267 // Different values; left starts first. Emit the
268 // prefix of left up to right.start — this portion
269 // cannot be overwritten and can be finalized.
270 let ranks = left_ranks.start..right_ranks.start;
271 left_ranks.start = ranks.end;
272 append(ranks, left_value.clone());
273 } else {
274 // Different values; right starts earlier or equal.
275 // "Right wins" — emit the right chunk as-is, consuming it
276 // fully and moving left forward as needed. Then make sure
277 // no leftover left segments still overlap that right range.
278 let (ranks, value) = replace(&mut right, right_iter.next()).unwrap();
279
280 // Clamp the current left start so it never extends into
281 // the just-emitted right region.
282 left_ranks.start = left_ranks.start.max(ranks.end);
283 if left_ranks.is_empty() {
284 left = left_iter.next();
285 }
286
287 // Trim or skip any following left runs that still overlap
288 // this right run's extent.
289 while let Some((next_r, _)) = left.as_mut() {
290 if next_r.start < ranks.end {
291 next_r.start = next_r.start.max(ranks.end);
292 if next_r.is_empty() {
293 left = left_iter.next();
294 continue;
295 }
296 }
297 break;
298 }
299 append(ranks, value);
300 }
301 } else if left_ranks.start < right_ranks.start {
302 // No overlap, left starts earlier.
303 let (ranks, value) = replace(&mut left, left_iter.next()).unwrap();
304 append(ranks, value);
305 } else {
306 // Nov overlap, right starts earlier.
307 let (ranks, value) = replace(&mut right, right_iter.next()).unwrap();
308 append(ranks, value);
309 }
310 }
311
312 // Drain whichever side still has runs. They are guaranteed to
313 // follow all previously emitted ranges.
314 while let Some((r, v)) = left {
315 append(r, v);
316 left = left_iter.next();
317 }
318 while let Some((r, v)) = right {
319 append(r, v);
320 right = right_iter.next();
321 }
322
323 // Postcondition: `out` is sorted, non-overlapping, coalesced. All
324 // overlaps resolved by "right-wins" semantics.
325 out
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn merge_disjoint_right_after_left() {
334 let left = vec![(0..5, 1)];
335 let right = vec![(7..9, 2)];
336 let out = merge_value_runs(left, right);
337 assert_eq!(out, vec![(0..5, 1), (7..9, 2)]);
338 }
339
340 #[test]
341 fn merge_disjoint_right_before_left() {
342 let left = vec![(7..9, 1)];
343 let right = vec![(0..5, 2)];
344 let out = merge_value_runs(left, right);
345 assert_eq!(out, vec![(0..5, 2), (7..9, 1)]);
346 }
347
348 #[test]
349 fn overlap_right_wins_simple() {
350 let left = vec![(0..10, 1)];
351 let right = vec![(3..6, 2)];
352 let out = merge_value_runs(left, right);
353 // left prefix, right overwrite, left suffix
354 assert_eq!(out, vec![(0..3, 1), (3..6, 2), (6..10, 1)]);
355 }
356
357 #[test]
358 fn overlap_equal_values_union_to_right_end() {
359 let left = vec![(0..4, 5)];
360 let right = vec![(2..6, 5)];
361 let out = merge_value_runs(left, right);
362 // same value: union emits [0..6) as two pieces depending on
363 // algorithm's "extend to right.end" rule, but coalescing
364 // should produce a single run:
365 assert_eq!(out, vec![(0..6, 5)]);
366 }
367
368 #[test]
369 fn overlap_equal_values_with_left_longer() {
370 let left = vec![(0..8, 5)];
371 let right = vec![(2..6, 5)];
372 let out = merge_value_runs(left, right);
373 // equal case extends to right.end first, then left tail
374 // remains and should coalesce to one; because they’re
375 // touching & equal:
376 assert_eq!(out, vec![(0..8, 5)]);
377 }
378
379 #[test]
380 fn overlap_right_starts_earlier_right_wins() {
381 let left = vec![(4..10, 1)];
382 let right = vec![(2..6, 2)];
383 let out = merge_value_runs(left, right);
384 // Right chunk first, then left remainder:
385 assert_eq!(out, vec![(2..6, 2), (6..10, 1)]);
386 }
387
388 #[test]
389 fn touching_coalesces_equal() {
390 let left = vec![(0..3, 1), (3..6, 1)];
391 let right = vec![];
392 let out = merge_value_runs(left, right);
393 assert_eq!(out, vec![(0..6, 1)]);
394 }
395
396 #[test]
397 fn multi_overlap_mixed_values() {
398 let left = vec![(0..5, 1), (5..10, 1), (10..15, 3)];
399 let right = vec![(3..7, 2), (12..20, 4)];
400 let out = merge_value_runs(left, right);
401 assert_eq!(
402 out,
403 vec![(0..3, 1), (3..7, 2), (7..10, 1), (10..12, 3), (12..20, 4)]
404 );
405 }
406
407 #[test]
408 fn overlap_mixed_values_right_inside_left() {
409 // Right run sits strictly within a left run of a different
410 // value.
411 let left = vec![(0..10, 1)];
412 let right = vec![(3..7, 2)];
413 let out = merge_value_runs(left, right);
414 assert_eq!(
415 out,
416 vec![(0..3, 1), (3..7, 2), (7..10, 1)],
417 "right overwrites interior portion of left run"
418 );
419 }
420
421 #[test]
422 fn overlap_mixed_values_right_spans_multiple_left_runs() {
423 // Right spans two left runs, overwriting parts of both.
424 let left = vec![(0..5, 1), (5..10, 2)];
425 let right = vec![(3..7, 9)];
426 let out = merge_value_runs(left, right);
427 assert_eq!(
428 out,
429 vec![(0..3, 1), (3..7, 9), (7..10, 2)],
430 "right spans two left runs; left prefix/tail preserved"
431 );
432 }
433
434 #[test]
435 fn overlap_mixed_values_right_starts_before_left() {
436 // Right begins before left and overlaps into it.
437 let left = vec![(5..10, 1)];
438 let right = vec![(0..7, 2)];
439 let out = merge_value_runs(left, right);
440 assert_eq!(
441 out,
442 vec![(0..7, 2), (7..10, 1)],
443 "right overwrites head and extends beyond left start"
444 );
445 }
446
447 #[test]
448 fn overlap_mixed_values_right_ends_after_left() {
449 // Right starts inside left but extends beyond its end.
450 let left = vec![(0..5, 1)];
451 let right = vec![(3..8, 2)];
452 let out = merge_value_runs(left, right);
453 assert_eq!(
454 out,
455 vec![(0..3, 1), (3..8, 2)],
456 "right overwrites tail and extends past left"
457 );
458 }
459
460 #[test]
461 fn overlap_mixed_values_multiple_cascading_overlaps() {
462 // Stress: multiple right runs each cutting through several
463 // lefts.
464 let left = vec![(0..4, 1), (4..8, 1), (8..12, 2), (12..16, 3)];
465 let right = vec![(2..6, 9), (10..14, 9)];
466 let out = merge_value_runs(left, right);
467 assert_eq!(
468 out,
469 vec![
470 (0..2, 1),
471 (2..6, 9),
472 (6..8, 1),
473 (8..10, 2),
474 (10..14, 9),
475 (14..16, 3)
476 ],
477 "multiple right runs spanning multiple left runs, non-overlapping output"
478 );
479 }
480
481 #[test]
482 fn empty_inputs() {
483 let out = merge_value_runs::<i32>(vec![], vec![]);
484 assert!(out.is_empty());
485
486 let out = merge_value_runs(vec![(0..2, 9)], vec![]);
487 assert_eq!(out, vec![(0..2, 9)]);
488
489 let out = merge_value_runs(vec![], vec![(5..7, 4)]);
490 assert_eq!(out, vec![(5..7, 4)]);
491 }
492}