Skip to main content

hyperactor_mesh/
lib.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! This crate provides hyperactor's mesh abstractions.
10
11#![feature(assert_matches)]
12#![feature(associated_type_defaults)]
13#![feature(exit_status_error)]
14#![feature(impl_trait_in_bindings)]
15#![feature(get_disjoint_mut_helpers)]
16#![feature(exact_size_is_empty)]
17#![feature(async_fn_track_caller)]
18// EnumAsInner generates code that triggers a false positive
19// unused_assignments lint on struct variant fields. #[allow] on the
20// enum itself doesn't propagate into derive-macro-generated code, so
21// the suppression must be at module scope.
22#![allow(unused_assignments)]
23
24pub mod actor_mesh;
25mod assign;
26pub mod bootstrap;
27pub mod casting;
28pub mod comm;
29pub mod config;
30pub mod config_dump;
31pub mod connect;
32pub mod global_context;
33pub mod host;
34pub mod host_mesh;
35pub mod introspect;
36pub mod logging;
37pub mod mesh;
38pub mod mesh_admin;
39pub mod mesh_admin_client;
40pub mod mesh_controller;
41pub mod mesh_id;
42pub mod mesh_selection;
43mod metrics;
44pub mod proc_agent;
45pub mod proc_launcher;
46pub mod proc_mesh;
47pub mod pyspy;
48pub mod reference;
49pub mod resource;
50pub mod shared_cell;
51pub mod shortuuid;
52pub mod supervision;
53#[cfg(target_os = "linux")]
54mod systemd;
55pub mod test_utils;
56pub mod testactor;
57pub mod testing;
58mod testresource;
59pub mod transport;
60pub mod value_mesh;
61
62use std::io;
63
64pub use actor_mesh::ActorMesh;
65pub use actor_mesh::ActorMeshRef;
66pub use bootstrap::Bootstrap;
67pub use bootstrap::bootstrap;
68pub use bootstrap::bootstrap_or_die;
69pub use casting::CastError;
70pub use comm::CommActor;
71pub use dashmap;
72use enum_as_inner::EnumAsInner;
73pub use global_context::GlobalClientActor;
74pub use global_context::GlobalContext;
75pub use global_context::context;
76pub use global_context::this_host;
77pub use global_context::this_proc;
78pub use host_mesh::HostMeshRef;
79use hyperactor::ActorAddr;
80use hyperactor::ActorRef;
81use hyperactor::ProcAddr;
82use hyperactor::mailbox::MailboxSenderError;
83pub use hyperactor_mesh_macros::sel;
84pub use mesh::Mesh;
85// Re-exported for internal test binaries that don't have ndslice as a direct dependency
86pub use ndslice::extent;
87use ndslice::view;
88pub use proc_mesh::ProcMesh;
89pub use proc_mesh::ProcMeshRef;
90pub use value_mesh::ValueMesh;
91
92use crate::host::HostError;
93use crate::host_mesh::HostAgent;
94use crate::host_mesh::HostMeshRefParseError;
95use crate::host_mesh::host_agent::ProcState;
96use crate::resource::RankedValues;
97use crate::resource::Status;
98use crate::supervision::MeshFailure;
99
100/// A mesh of per-rank lifecycle statuses.
101///
102/// `StatusMesh` is `ValueMesh<Status>` and supports dense or
103/// compressed encodings. Updates are applied via sparse overlays with
104/// **last-writer-wins** semantics (see
105/// [`ValueMesh::merge_from_overlay`]). The mesh's `Region` defines
106/// the rank space; all updates must match that region.
107pub type StatusMesh = ValueMesh<Status>;
108
109/// A sparse set of `(Range<usize>, Status)` updates for a
110/// [`StatusMesh`].
111///
112/// `StatusOverlay` carries **normalized** runs (sorted,
113/// non-overlapping, and coalesced). Applying an overlay to a
114/// `StatusMesh` uses **right-wins** semantics on overlap and
115/// preserves first-appearance order in the compressed table.
116/// Construct via `ValueOverlay::try_from_runs` after normalizing.
117pub type StatusOverlay = value_mesh::ValueOverlay<Status>;
118
119/// Errors that occur during mesh operations.
120#[derive(Debug, EnumAsInner, thiserror::Error)]
121pub enum Error {
122    #[error("invalid mesh ref: expected {expected} ranks, but contains {actual} ranks")]
123    InvalidRankCardinality { expected: usize, actual: usize },
124
125    #[error(transparent)]
126    ResourceIdParseError(#[from] mesh_id::ResourceIdParseError),
127
128    #[error(transparent)]
129    HostMeshRefParseError(#[from] HostMeshRefParseError),
130
131    #[error(transparent)]
132    ChannelError(#[from] Box<hyperactor::channel::ChannelError>),
133
134    #[error(transparent)]
135    MailboxError(#[from] Box<hyperactor::mailbox::MailboxError>),
136
137    #[error(transparent)]
138    CodecError(#[from] CodecError),
139
140    #[error("error during mesh configuration: {0}")]
141    ConfigurationError(anyhow::Error),
142
143    // This is a temporary error to ensure we don't create unroutable
144    // meshes.
145    #[error("configuration error: mesh is unroutable")]
146    UnroutableMesh(),
147
148    #[error("error while calling actor {0}: {1}")]
149    CallError(ActorAddr, anyhow::Error),
150
151    #[error("actor not registered for type {0}")]
152    ActorTypeNotRegistered(String),
153
154    // TODO: this should be a valuemesh of statuses
155    #[error("error while spawning actor {0}: {1}")]
156    GspawnError(mesh_id::ActorMeshId, String),
157
158    #[error("error while sending message to actor {0}: {1}")]
159    SendingError(ActorAddr, Box<MailboxSenderError>),
160
161    #[error("error while casting message to {0}: {1}")]
162    CastingError(mesh_id::ActorMeshId, anyhow::Error),
163
164    #[error("error configuring host mesh agent {0}: {1}")]
165    HostMeshAgentConfigurationError(ActorAddr, String),
166
167    /// HM-2 / HM-3 / HM-4: structured per-host failure from
168    /// `HostMeshRef::push_config()`. See the HM-* invariant block in
169    /// `host_mesh.rs` for the contract this surfaces.
170    #[error(transparent)]
171    ConfigPushFailed(#[from] crate::host_mesh::ConfigPushError),
172
173    #[error(
174        "error creating proc (host rank {host_rank}) on host mesh agent {mesh_agent}, state: {state}"
175    )]
176    ProcCreationError {
177        state: Box<resource::State<ProcState>>,
178        host_rank: usize,
179        mesh_agent: ActorRef<HostAgent>,
180    },
181
182    #[error(
183        "error spawning proc mesh: statuses: {}",
184        RankedValues::invert(statuses)
185    )]
186    ProcSpawnError { statuses: RankedValues<Status> },
187
188    #[error(
189        "error spawning actor mesh: statuses: {}",
190        RankedValues::invert(statuses)
191    )]
192    ActorSpawnError { statuses: RankedValues<Status> },
193
194    #[error(
195        "error stopping actor mesh: statuses: {}",
196        RankedValues::invert(statuses)
197    )]
198    ActorStopError { statuses: RankedValues<Status> },
199
200    #[error(
201        "error stopping proc mesh: statuses: {}",
202        RankedValues::invert(statuses)
203    )]
204    ProcMeshStopError { statuses: RankedValues<Status> },
205
206    #[error("error spawning actor: {0}")]
207    SingletonActorSpawnError(anyhow::Error),
208
209    #[error("error spawning controller actor for mesh {0}: {1}")]
210    ControllerActorSpawnError(mesh_id::ResourceId, anyhow::Error),
211
212    #[error("proc {0} must be direct-addressable")]
213    RankedProc(ProcAddr),
214
215    #[error("{0}")]
216    Supervision(Box<MeshFailure>),
217
218    #[error("error: {0} does not exist")]
219    NotExist(mesh_id::ResourceId),
220
221    #[error(transparent)]
222    Io(#[from] io::Error),
223
224    #[error(transparent)]
225    Host(#[from] HostError),
226
227    #[error(transparent)]
228    Other(#[from] anyhow::Error),
229}
230
231/// Errors that occur during serialization and deserialization.
232#[derive(Debug, thiserror::Error)]
233pub enum CodecError {
234    #[error(transparent)]
235    BincodeEncodeError(#[from] Box<bincode::error::EncodeError>),
236    #[error(transparent)]
237    BincodeDecodeError(#[from] Box<bincode::error::DecodeError>),
238    #[error(transparent)]
239    JsonError(#[from] Box<serde_json::Error>),
240    #[error(transparent)]
241    Base64Error(#[from] Box<base64::DecodeError>),
242    #[error(transparent)]
243    Utf8Error(#[from] Box<std::str::Utf8Error>),
244}
245
246impl From<bincode::error::EncodeError> for Error {
247    fn from(e: bincode::error::EncodeError) -> Self {
248        Error::CodecError(Box::new(e).into())
249    }
250}
251
252impl From<bincode::error::DecodeError> for Error {
253    fn from(e: bincode::error::DecodeError) -> Self {
254        Error::CodecError(Box::new(e).into())
255    }
256}
257
258impl From<serde_json::Error> for Error {
259    fn from(e: serde_json::Error) -> Self {
260        Error::CodecError(Box::new(e).into())
261    }
262}
263
264impl From<base64::DecodeError> for Error {
265    fn from(e: base64::DecodeError) -> Self {
266        Error::CodecError(Box::new(e).into())
267    }
268}
269
270impl From<std::str::Utf8Error> for Error {
271    fn from(e: std::str::Utf8Error) -> Self {
272        Error::CodecError(Box::new(e).into())
273    }
274}
275
276impl From<hyperactor::channel::ChannelError> for Error {
277    fn from(e: hyperactor::channel::ChannelError) -> Self {
278        Error::ChannelError(Box::new(e))
279    }
280}
281
282impl From<hyperactor::mailbox::MailboxError> for Error {
283    fn from(e: hyperactor::mailbox::MailboxError) -> Self {
284        Error::MailboxError(Box::new(e))
285    }
286}
287
288impl From<view::InvalidCardinality> for Error {
289    fn from(e: view::InvalidCardinality) -> Self {
290        Error::InvalidRankCardinality {
291            expected: e.expected,
292            actual: e.actual,
293        }
294    }
295}
296
297/// The type of result used in `hyperactor_mesh`.
298pub type Result<T> = std::result::Result<T, Error>;
299
300/// Construct a per-actor display name from a mesh-level base name and a
301/// rank's coordinates. Inserts `point.format_as_dict()` before the last
302/// `>` in `base`, or appends it if no `>` is found. Returns `base`
303/// unchanged for scalar (empty) points.
304pub(crate) fn actor_display_name(base: &str, point: &view::Point) -> String {
305    if point.is_empty() {
306        return base.to_string();
307    }
308    let coords = point.format_as_dict();
309    if let Some(pos) = base.rfind('>') {
310        format!("{}{}{}", &base[..pos], coords, &base[pos..])
311    } else {
312        format!("{}{}", base, coords)
313    }
314}
315
316#[cfg(test)]
317mod tests {
318
319    #[test]
320    fn basic() {
321        use ndslice::selection::dsl;
322        use ndslice::selection::structurally_equal;
323
324        let actual = sel!(*, 0:4, *);
325        let expected = dsl::all(dsl::range(
326            ndslice::shape::Range(0, Some(4), 1),
327            dsl::all(dsl::true_()),
328        ));
329        assert!(structurally_equal(&actual, &expected));
330    }
331
332    #[cfg(false)]
333    #[test]
334    fn shouldnt_compile() {
335        let _ = sel!(foobar);
336    }
337    // error: sel! parse failed: unexpected token: Ident { sym: foobar, span: #0 bytes(605..611) }
338    //   --> fbcode/monarch/hyperactor_mesh_macros/tests/basic.rs:19:13
339    //    |
340    // 19 |     let _ = sel!(foobar);
341    //    |             ^^^^^^^^^^^^ in this macro invocation
342    //   --> fbcode/monarch/hyperactor_mesh_macros/src/lib.rs:12:1
343    //    |
344    //    = note: in this expansion of `sel!`
345
346    use hyperactor_mesh_macros::sel;
347    use ndslice::assert_round_trip;
348    use ndslice::assert_structurally_eq;
349    use ndslice::selection::Selection;
350
351    macro_rules! assert_round_trip_match {
352        ($left:expr, $right:expr) => {{
353            assert_structurally_eq!($left, $right);
354            assert_round_trip!($left);
355            assert_round_trip!($right);
356        }};
357    }
358
359    #[test]
360    fn token_parser() {
361        use ndslice::selection::dsl::*;
362        use ndslice::shape;
363
364        assert_round_trip_match!(all(true_()), sel!(*));
365        assert_round_trip_match!(range(3, true_()), sel!(3));
366        assert_round_trip_match!(range(1..4, true_()), sel!(1:4));
367        assert_round_trip_match!(all(range(1..4, true_())), sel!(*, 1:4));
368        assert_round_trip_match!(range(shape::Range(0, None, 1), true_()), sel!(:));
369        assert_round_trip_match!(any(true_()), sel!(?));
370        assert_round_trip_match!(any(range(1..4, all(true_()))), sel!(?, 1:4, *));
371        assert_round_trip_match!(union(range(0, true_()), range(1, true_())), sel!(0 | 1));
372        assert_round_trip_match!(
373            intersection(range(0..4, true_()), range(2..6, true_())),
374            sel!(0:4 & 2:6)
375        );
376        assert_round_trip_match!(range(shape::Range(0, None, 1), true_()), sel!(:));
377        assert_round_trip_match!(all(true_()), sel!(*));
378        assert_round_trip_match!(any(true_()), sel!(?));
379        assert_round_trip_match!(all(all(all(true_()))), sel!(*, *, *));
380        assert_round_trip_match!(intersection(all(true_()), all(true_())), sel!(* & *));
381        assert_round_trip_match!(
382            all(all(union(
383                range(0..2, true_()),
384                range(shape::Range(6, None, 1), true_())
385            ))),
386            sel!(*, *, (:2|6:))
387        );
388        assert_round_trip_match!(
389            all(all(range(shape::Range(1, None, 2), true_()))),
390            sel!(*, *, 1::2)
391        );
392        assert_round_trip_match!(
393            range(
394                shape::Range(0, Some(1), 1),
395                any(range(shape::Range(0, Some(4), 1), true_()))
396            ),
397            sel!(0, ?, :4)
398        );
399        assert_round_trip_match!(range(shape::Range(1, Some(4), 2), true_()), sel!(1:4:2));
400        assert_round_trip_match!(range(shape::Range(0, None, 2), true_()), sel!(::2));
401        assert_round_trip_match!(
402            union(range(0..4, true_()), range(4..8, true_())),
403            sel!(0:4 | 4:8)
404        );
405        assert_round_trip_match!(
406            intersection(range(0..4, true_()), range(2..6, true_())),
407            sel!(0:4 & 2:6)
408        );
409        assert_round_trip_match!(
410            all(union(range(1..4, all(true_())), range(5..6, all(true_())))),
411            sel!(*, (1:4 | 5:6), *)
412        );
413        assert_round_trip_match!(
414            range(
415                0,
416                intersection(
417                    range(1..4, range(7, true_())),
418                    range(2..5, range(7, true_()))
419                )
420            ),
421            sel!(0, (1:4 & 2:5), 7)
422        );
423        assert_round_trip_match!(
424            all(all(union(
425                union(range(0..2, true_()), range(4..6, true_())),
426                range(shape::Range(6, None, 1), true_())
427            ))),
428            sel!(*, *, (:2 | 4:6 | 6:))
429        );
430        assert_round_trip_match!(intersection(all(true_()), all(true_())), sel!(* & *));
431        assert_round_trip_match!(union(all(true_()), all(true_())), sel!(* | *));
432        assert_round_trip_match!(
433            intersection(
434                range(0..2, true_()),
435                union(range(1, true_()), range(2, true_()))
436            ),
437            sel!(0:2 & (1 | 2))
438        );
439        assert_round_trip_match!(
440            all(all(intersection(
441                range(1..2, true_()),
442                range(2..3, true_())
443            ))),
444            sel!(*,*,(1:2&2:3))
445        );
446        assert_round_trip_match!(
447            intersection(all(all(all(true_()))), all(all(all(true_())))),
448            sel!((*,*,*) & (*,*,*))
449        );
450        assert_round_trip_match!(
451            intersection(
452                range(0, all(all(true_()))),
453                range(0, union(range(1, all(true_())), range(3, all(true_()))))
454            ),
455            sel!((0, *, *) & (0, (1 | 3), *))
456        );
457        assert_round_trip_match!(
458            intersection(
459                range(0, all(all(true_()))),
460                range(
461                    0,
462                    union(
463                        range(1, range(2..5, true_())),
464                        range(3, range(2..5, true_()))
465                    )
466                )
467            ),
468            sel!((0, *, *) & (0, (1 | 3), 2:5))
469        );
470        assert_round_trip_match!(all(true_()), sel!((*)));
471        assert_round_trip_match!(range(1..4, range(2, true_())), sel!(((1:4), 2)));
472        assert_round_trip_match!(sel!(1:4 & 5:6 | 7:8), sel!((1:4 & 5:6) | 7:8));
473        assert_round_trip_match!(
474            union(
475                intersection(all(all(true_())), all(all(true_()))),
476                all(all(true_()))
477            ),
478            sel!((*,*) & (*,*) | (*,*))
479        );
480        assert_round_trip_match!(all(true_()), sel!(*));
481        assert_round_trip_match!(sel!(((1:4))), sel!(1:4));
482        assert_round_trip_match!(sel!(*, (*)), sel!(*, *));
483        assert_round_trip_match!(
484            intersection(
485                range(0, range(1..4, true_())),
486                range(0, union(range(2, all(true_())), range(3, all(true_()))))
487            ),
488            sel!((0,1:4)&(0,(2|3),*))
489        );
490
491        //assert_round_trip_match!(true_(), sel!(foo)); // sel! macro: parse error: Parsing Error: Error { input: "foo", code: Tag }
492
493        assert_round_trip_match!(
494            sel!(0 & (0, (1|3), *)),
495            intersection(
496                range(0, true_()),
497                range(0, union(range(1, all(true_())), range(3, all(true_()))))
498            )
499        );
500        assert_round_trip_match!(
501            sel!(0 & (0, (3|1), *)),
502            intersection(
503                range(0, true_()),
504                range(0, union(range(3, all(true_())), range(1, all(true_()))))
505            )
506        );
507        assert_round_trip_match!(
508            sel!((*, *, *) & (*, *, (2 | 4))),
509            intersection(
510                all(all(all(true_()))),
511                all(all(union(range(2, true_()), range(4, true_()))))
512            )
513        );
514        assert_round_trip_match!(
515            sel!((*, *, *) & (*, *, (4 | 2))),
516            intersection(
517                all(all(all(true_()))),
518                all(all(union(range(4, true_()), range(2, true_()))))
519            )
520        );
521        assert_round_trip_match!(
522            sel!((*, (1|2)) & (*, (2|1))),
523            intersection(
524                all(union(range(1, true_()), range(2, true_()))),
525                all(union(range(2, true_()), range(1, true_())))
526            )
527        );
528        assert_round_trip_match!(
529            sel!((*, *, *) & *),
530            intersection(all(all(all(true_()))), all(true_()))
531        );
532        assert_round_trip_match!(
533            sel!(* & (*, *, *)),
534            intersection(all(true_()), all(all(all(true_()))))
535        );
536
537        assert_round_trip_match!(
538            sel!( (*, *, *) & ((*, *, *) & (*, *, *)) ),
539            intersection(
540                all(all(all(true_()))),
541                intersection(all(all(all(true_()))), all(all(all(true_()))))
542            )
543        );
544        assert_round_trip_match!(
545            sel!((1, *, *) | (0 & (0, 3, *))),
546            union(
547                range(1, all(all(true_()))),
548                intersection(range(0, true_()), range(0, range(3, all(true_()))))
549            )
550        );
551        assert_round_trip_match!(
552            sel!(((0, *)| (1, *)) & ((1, *) | (0, *))),
553            intersection(
554                union(range(0, all(true_())), range(1, all(true_()))),
555                union(range(1, all(true_())), range(0, all(true_())))
556            )
557        );
558        assert_round_trip_match!(sel!(*, 8:8), all(range(8..8, true_())));
559        assert_round_trip_match!(
560            sel!((*, 1) & (*, 8 : 8)),
561            intersection(all(range(1..2, true_())), all(range(8..8, true_())))
562        );
563        assert_round_trip_match!(
564            sel!((*, 8 : 8) | (*, 1)),
565            union(all(range(8..8, true_())), all(range(1..2, true_())))
566        );
567        assert_round_trip_match!(
568            sel!((*, 1) | (*, 2:8)),
569            union(all(range(1..2, true_())), all(range(2..8, true_())))
570        );
571        assert_round_trip_match!(
572            sel!((*, *, *) & (*, *, 2:8)),
573            intersection(all(all(all(true_()))), all(all(range(2..8, true_()))))
574        );
575    }
576}