hyperactor_mesh/value_mesh/rle.rs
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::mem::replace;
10use std::ops::Range;
11
12/// Run-length encodes a dense sequence into a table of unique values
13/// and a run list, using a caller-provided equivalence predicate.
14///
15/// Consumes the dense `values` vector and produces:
16/// - `table`: the deduplicated values in **first-occurrence order**
17/// - `runs`: a list of `(start..end, id)` where `id` indexes into
18/// `table`
19///
20/// Two adjacent elements `a`, `b` belong to the same run iff `same(a,
21/// b)` returns `true`. In practice `same` should behave like an
22/// equivalence relation over adjacent elements (reflexive and
23/// symmetric; transitive is not required globally because only
24/// **adjacency** is consulted).
25///
26/// # Parameters
27/// - `values`: dense sequence; exactly one value per rank (consumed)
28/// - `same`: predicate deciding adjacency-equality (`same(&a, &b)` =>
29/// merge)
30///
31/// # Returns
32/// `(table, runs)` where:
33/// - `table.len() >= 0`
34/// - `runs` is empty iff `values` is empty
35///
36/// # Invariants on the output
37/// - `runs` is sorted by `start` and **non-overlapping**
38/// - `runs` **exactly covers** `0..values.len()` with contiguous,
39/// non-empty half-open intervals
40/// - Adjacent runs always refer to **different** `id`s under `same`
41/// - For every `(r, id)` in `runs`, `id < table.len()`
42/// - For every index `i` in `r`, `values[i]` is `same`-equal to
43/// `table[id]`
44/// - `table` preserves first-appearance order of distinct (under
45/// `same`) values encountered while scanning left-to-right
46///
47/// # Examples
48/// ```
49/// // values: [A, A, B, B, B, A]
50/// // table: [A, B, A]
51/// // runs: [(0..2, 0), (2..5, 1), (5..6, 2)]
52/// ```
53///
54/// Note: `table` preserves **first-appearance order of each distinct
55/// contiguous run**, not global uniqueness - the same logical value
56/// may appear multiple times in `table` if it occurs in disjoint
57/// runs.
58pub fn rle_from_dense<T, F>(values: Vec<T>, mut same: F) -> (Vec<T>, Vec<(Range<usize>, u32)>)
59where
60 F: FnMut(&T, &T) -> bool,
61{
62 if values.is_empty() {
63 return (Vec::new(), Vec::new());
64 }
65
66 // Consume `values` so we can move (not clone) elements into the table.
67 let mut iter = values.into_iter();
68 let first = iter.next().unwrap();
69
70 let mut table: Vec<T> = Vec::new();
71 let mut runs: Vec<(Range<usize>, u32)> = Vec::new();
72
73 let mut start = 0usize;
74 table.push(first); // move, not clone
75 let mut cur_id: u32 = 0;
76 let mut len = 1usize;
77
78 // Walk through all subsequent elements, closing and opening runs
79 // whenever the value changes.
80 for (i, v) in iter.enumerate() {
81 let idx = i + 1;
82 if !same(&v, &table[cur_id as usize]) {
83 runs.push((start..idx, cur_id));
84
85 // Start a new run
86 start = idx;
87 table.push(v); // move, not clone
88 cur_id = (table.len() - 1) as u32;
89 }
90 len = idx + 1;
91 }
92
93 // Close the final run
94 runs.push((start..len, cur_id));
95
96 (table, runs)
97}
98
99/// Converts **normalized** value-bearing runs into a deduplicated
100/// table and a run list, coalescing adjacent runs that carry the same
101/// value.
102///
103/// # Input
104///
105/// - `runs`: a vector of `(start..end, value)` pairs that is already
106/// **normalized**:
107/// - sorted by `(start, end)` in ascending order,
108/// - non-empty (`start < end`),
109/// - **non-overlapping** (touching is allowed).
110///
111/// # Behavior
112///
113/// - Builds `table` by **global** deduplication of values in
114/// first-appearance order. The same logical value appearing in
115/// disjoint places will share a single entry in `table`.
116/// - Produces `out` as a list of `(start..end, id)`, where `id`
117/// indexes into `table`.
118/// - If two consecutive input runs are **touching** (`prev.end ==
119/// next.start`) and their values are `==`, they are **coalesced**
120/// into a single output run referencing the same `id`.
121///
122/// # Output invariants
123/// - `out` is sorted by `start` and **non-overlapping**; touching
124/// runs have different `id`s.
125/// - Each `(r, id)` in `out` satisfies `id < table.len()`.
126/// - `table` preserves **first-appearance order** of distinct values
127/// across the entire input.
128/// - The union of ranges in `out` equals the union of ranges in the
129/// input `runs`.
130///
131/// # Preconditions
132/// This function assumes the input `runs` is normalized as described
133/// above; it does **not** revalidate or resort the input. Supplying
134/// unsorted or overlapping ranges results in unspecified behavior.
135///
136/// # Example
137/// ```
138/// // Input runs (already sorted & non-overlapping):
139/// // [(0..2, A), (2..5, A), (5..6, B), (8..10, A)]
140/// // After coalescing equal-adjacent:
141/// // [(0..5, A), (5..6, B), (8..10, A)]
142/// // table = [A, B]
143/// // out = [(0..5, 0), (5..6, 1), (8..10, 0)]
144/// ```
145pub fn rle_from_value_runs<T: Clone + PartialEq>(
146 mut runs: Vec<(Range<usize>, T)>,
147) -> (Vec<T>, Vec<(Range<usize>, u32)>) {
148 if runs.is_empty() {
149 return (Vec::new(), Vec::new());
150 }
151 // Runs are already normalized by caller (sorted,
152 // non-overlapping).
153 let mut table: Vec<T> = Vec::new();
154 let mut out: Vec<(Range<usize>, u32)> = Vec::new();
155
156 let mut push_run = |range: Range<usize>, v: &T| {
157 // De-dup table.
158 let id = if let Some(idx) = table.iter().position(|x| x == v) {
159 idx as u32
160 } else {
161 table.push(v.clone());
162 (table.len() - 1) as u32
163 };
164 // Coalesce equal-adjacent ids.
165 if let Some((last_r, last_id)) = out.last_mut() {
166 if last_r.end == range.start && *last_id == id {
167 last_r.end = range.end;
168 return;
169 }
170 }
171 out.push((range, id));
172 };
173
174 for (r, v) in runs.drain(..) {
175 push_run(r, &v);
176 }
177 (table, out)
178}
179
180/// True iff the two half-open ranges overlap.
181#[inline]
182pub(crate) fn ranges_overlap(a: &Range<usize>, b: &Range<usize>) -> bool {
183 a.start < b.end && b.start < a.end
184}
185
186/// Merge two normalized `(Range, T)` run lists with "right wins"
187/// overwrite semantics.
188///
189/// - Inputs must each be sorted, non-overlapping, coalesced
190/// (equal-adjacent merged).
191/// - Output is sorted, non-overlapping, coalesced.
192/// - On overlaps, `right_in` overwrites `left_in` (last-writer-wins).
193pub fn merge_value_runs<T: Eq + Clone>(
194 left_in: Vec<(Range<usize>, T)>,
195 right_in: Vec<(Range<usize>, T)>,
196) -> Vec<(Range<usize>, T)> {
197 // `out` will hold the merged, coalesced result.
198 let mut out: Vec<(Range<usize>, T)> = Vec::new();
199
200 // Local helper that appends a run butg coalesces equal-adjacent
201 // (like RankedValues::append)
202 let mut append = |range: Range<usize>, value: T| {
203 if let Some((last_r, last_v)) = out.last_mut() {
204 if last_r.end == range.start && *last_v == value {
205 last_r.end = range.end;
206 return;
207 }
208 }
209 out.push((range, value));
210 };
211
212 // Turn each input into forward iterators.
213 let mut left_iter = left_in.into_iter();
214 let mut right_iter = right_in.into_iter();
215
216 // `left` and `right` are the current cursor items
217 // (`Option<(Range, T)>`).
218 let mut left = left_iter.next();
219 let mut right = right_iter.next();
220
221 // Main merge loop: runs as long as both sides have a current
222 // item.
223 //
224 // VO-1 postcondition: all runs emitted so far are sorted, non-overlapping,
225 // and coalesced; `left` and `right` point to the next unprocessed
226 // run on each side.
227 while left.is_some() && right.is_some() {
228 // Mutable refs to the current (range, val) pairs so we can
229 // adjust their start as we carve off emitted pieces.
230 let (left_ranks, left_value) = left.as_mut().unwrap();
231 let (right_ranks, right_value) = right.as_mut().unwrap();
232
233 if ranges_overlap(left_ranks, right_ranks) {
234 // Overlap case (half-open ranges): a.start < b.end &&
235 // b.start < a.end.
236 if *left_value == *right_value {
237 // Equal-value overlap: merge into one unified run.
238 //
239 // We extend the emitted range up to right.end — not
240 // max(left.end, right.end). This choice ensures:
241 // - The entire right run is consumed and the right
242 // iterator advances (guaranteeing progress).
243 // - If the left run extends further, we just shrink
244 // its start and handle its tail next iteration.
245 // This avoids overlap or lookahead while keeping
246 // output normalized.
247 let ranks = (left_ranks.start.min(right_ranks.start))..right_ranks.end;
248 // `replace` consumes the whole right run and advances
249 // to the next.
250
251 // Consume the right run entirely and move right
252 // forward.
253 let (_, value) = replace(&mut right, right_iter.next()).unwrap();
254 // Advance `left_ranks.start` to the end of what we
255 // just emitted; if left run is now empty, advance the
256 // left iterator.
257 left_ranks.start = ranks.end;
258 if left_ranks.is_empty() {
259 left = left_iter.next();
260 }
261 // Append (coalescing if it touches the previous
262 // output with the same value).
263 append(ranks, value);
264 } else if left_ranks.start < right_ranks.start {
265 // Different values; left starts first. Emit the
266 // prefix of left up to right.start — this portion
267 // cannot be overwritten and can be finalized.
268 let ranks = left_ranks.start..right_ranks.start;
269 left_ranks.start = ranks.end;
270 append(ranks, left_value.clone());
271 } else {
272 // Different values; right starts earlier or equal.
273 // "Right wins" — emit the right chunk as-is, consuming it
274 // fully and moving left forward as needed. Then make sure
275 // no leftover left segments still overlap that right range.
276 let (ranks, value) = replace(&mut right, right_iter.next()).unwrap();
277
278 // Clamp the current left start so it never extends into
279 // the just-emitted right region.
280 left_ranks.start = left_ranks.start.max(ranks.end);
281 if left_ranks.is_empty() {
282 left = left_iter.next();
283 }
284
285 // Trim or skip any following left runs that still overlap
286 // this right run's extent.
287 while let Some((next_r, _)) = left.as_mut() {
288 if next_r.start < ranks.end {
289 next_r.start = next_r.start.max(ranks.end);
290 if next_r.is_empty() {
291 left = left_iter.next();
292 continue;
293 }
294 }
295 break;
296 }
297 append(ranks, value);
298 }
299 } else if left_ranks.start < right_ranks.start {
300 // No overlap, left starts earlier.
301 let (ranks, value) = replace(&mut left, left_iter.next()).unwrap();
302 append(ranks, value);
303 } else {
304 // Nov overlap, right starts earlier.
305 let (ranks, value) = replace(&mut right, right_iter.next()).unwrap();
306 append(ranks, value);
307 }
308 }
309
310 // Drain whichever side still has runs. They are guaranteed to
311 // follow all previously emitted ranges.
312 while let Some((r, v)) = left {
313 append(r, v);
314 left = left_iter.next();
315 }
316 while let Some((r, v)) = right {
317 append(r, v);
318 right = right_iter.next();
319 }
320
321 // Postcondition: `out` is sorted, non-overlapping, coalesced. All
322 // overlaps resolved by "right-wins" semantics.
323 out
324}
325
326#[cfg(test)]
327mod tests {
328 use super::*;
329
330 #[test]
331 fn merge_disjoint_right_after_left() {
332 let left = vec![(0..5, 1)];
333 let right = vec![(7..9, 2)];
334 let out = merge_value_runs(left, right);
335 assert_eq!(out, vec![(0..5, 1), (7..9, 2)]);
336 }
337
338 #[test]
339 fn merge_disjoint_right_before_left() {
340 let left = vec![(7..9, 1)];
341 let right = vec![(0..5, 2)];
342 let out = merge_value_runs(left, right);
343 assert_eq!(out, vec![(0..5, 2), (7..9, 1)]);
344 }
345
346 #[test]
347 fn overlap_right_wins_simple() {
348 let left = vec![(0..10, 1)];
349 let right = vec![(3..6, 2)];
350 let out = merge_value_runs(left, right);
351 // left prefix, right overwrite, left suffix
352 assert_eq!(out, vec![(0..3, 1), (3..6, 2), (6..10, 1)]);
353 }
354
355 #[test]
356 fn overlap_equal_values_union_to_right_end() {
357 let left = vec![(0..4, 5)];
358 let right = vec![(2..6, 5)];
359 let out = merge_value_runs(left, right);
360 // same value: union emits [0..6) as two pieces depending on
361 // algorithm's "extend to right.end" rule, but coalescing
362 // should produce a single run:
363 assert_eq!(out, vec![(0..6, 5)]);
364 }
365
366 #[test]
367 fn overlap_equal_values_with_left_longer() {
368 let left = vec![(0..8, 5)];
369 let right = vec![(2..6, 5)];
370 let out = merge_value_runs(left, right);
371 // equal case extends to right.end first, then left tail
372 // remains and should coalesce to one; because they’re
373 // touching & equal:
374 assert_eq!(out, vec![(0..8, 5)]);
375 }
376
377 #[test]
378 fn overlap_right_starts_earlier_right_wins() {
379 let left = vec![(4..10, 1)];
380 let right = vec![(2..6, 2)];
381 let out = merge_value_runs(left, right);
382 // Right chunk first, then left remainder:
383 assert_eq!(out, vec![(2..6, 2), (6..10, 1)]);
384 }
385
386 #[test]
387 fn touching_coalesces_equal() {
388 let left = vec![(0..3, 1), (3..6, 1)];
389 let right = vec![];
390 let out = merge_value_runs(left, right);
391 assert_eq!(out, vec![(0..6, 1)]);
392 }
393
394 #[test]
395 fn multi_overlap_mixed_values() {
396 let left = vec![(0..5, 1), (5..10, 1), (10..15, 3)];
397 let right = vec![(3..7, 2), (12..20, 4)];
398 let out = merge_value_runs(left, right);
399 assert_eq!(
400 out,
401 vec![(0..3, 1), (3..7, 2), (7..10, 1), (10..12, 3), (12..20, 4)]
402 );
403 }
404
405 #[test]
406 fn overlap_mixed_values_right_inside_left() {
407 // Right run sits strictly within a left run of a different
408 // value.
409 let left = vec![(0..10, 1)];
410 let right = vec![(3..7, 2)];
411 let out = merge_value_runs(left, right);
412 assert_eq!(
413 out,
414 vec![(0..3, 1), (3..7, 2), (7..10, 1)],
415 "right overwrites interior portion of left run"
416 );
417 }
418
419 #[test]
420 fn overlap_mixed_values_right_spans_multiple_left_runs() {
421 // Right spans two left runs, overwriting parts of both.
422 let left = vec![(0..5, 1), (5..10, 2)];
423 let right = vec![(3..7, 9)];
424 let out = merge_value_runs(left, right);
425 assert_eq!(
426 out,
427 vec![(0..3, 1), (3..7, 9), (7..10, 2)],
428 "right spans two left runs; left prefix/tail preserved"
429 );
430 }
431
432 #[test]
433 fn overlap_mixed_values_right_starts_before_left() {
434 // Right begins before left and overlaps into it.
435 let left = vec![(5..10, 1)];
436 let right = vec![(0..7, 2)];
437 let out = merge_value_runs(left, right);
438 assert_eq!(
439 out,
440 vec![(0..7, 2), (7..10, 1)],
441 "right overwrites head and extends beyond left start"
442 );
443 }
444
445 #[test]
446 fn overlap_mixed_values_right_ends_after_left() {
447 // Right starts inside left but extends beyond its end.
448 let left = vec![(0..5, 1)];
449 let right = vec![(3..8, 2)];
450 let out = merge_value_runs(left, right);
451 assert_eq!(
452 out,
453 vec![(0..3, 1), (3..8, 2)],
454 "right overwrites tail and extends past left"
455 );
456 }
457
458 #[test]
459 fn overlap_mixed_values_multiple_cascading_overlaps() {
460 // Stress: multiple right runs each cutting through several
461 // lefts.
462 let left = vec![(0..4, 1), (4..8, 1), (8..12, 2), (12..16, 3)];
463 let right = vec![(2..6, 9), (10..14, 9)];
464 let out = merge_value_runs(left, right);
465 assert_eq!(
466 out,
467 vec![
468 (0..2, 1),
469 (2..6, 9),
470 (6..8, 1),
471 (8..10, 2),
472 (10..14, 9),
473 (14..16, 3)
474 ],
475 "multiple right runs spanning multiple left runs, non-overlapping output"
476 );
477 }
478
479 #[test]
480 fn empty_inputs() {
481 let out = merge_value_runs::<i32>(vec![], vec![]);
482 assert!(out.is_empty());
483
484 let out = merge_value_runs(vec![(0..2, 9)], vec![]);
485 assert_eq!(out, vec![(0..2, 9)]);
486
487 let out = merge_value_runs(vec![], vec![(5..7, 4)]);
488 assert_eq!(out, vec![(5..7, 4)]);
489 }
490}