hyperactor_mesh/
v1.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! A temporary holding space for APIv1 of the Hyperactor Mesh.
10//! This will be moved down to the base module when we graduate
11//! the APIs and fully deprecate the "v0" APIs.
12
13pub mod actor_mesh;
14pub mod host_mesh;
15pub mod mesh_controller;
16pub mod proc_mesh;
17pub mod testactor;
18pub mod testing;
19pub mod value_mesh;
20
21use std::io;
22use std::str::FromStr;
23
24pub use actor_mesh::ActorMesh;
25pub use actor_mesh::ActorMeshRef;
26use enum_as_inner::EnumAsInner;
27pub use host_mesh::HostMeshRef;
28use hyperactor::ActorId;
29use hyperactor::ActorRef;
30use hyperactor::host::HostError;
31use hyperactor::mailbox::MailboxSenderError;
32use ndslice::view;
33pub use proc_mesh::ProcMesh;
34pub use proc_mesh::ProcMeshRef;
35use serde::Deserialize;
36use serde::Serialize;
37pub use value_mesh::ValueMesh;
38
39/// A mesh of per-rank lifecycle statuses.
40///
41/// `StatusMesh` is `ValueMesh<Status>` and supports dense or
42/// compressed encodings. Updates are applied via sparse overlays with
43/// **last-writer-wins** semantics (see
44/// [`ValueMesh::merge_from_overlay`]). The mesh's `Region` defines
45/// the rank space; all updates must match that region.
46pub type StatusMesh = ValueMesh<Status>;
47
48/// A sparse set of `(Range<usize>, Status)` updates for a
49/// [`StatusMesh`].
50///
51/// `StatusOverlay` carries **normalized** runs (sorted,
52/// non-overlapping, and coalesced). Applying an overlay to a
53/// `StatusMesh` uses **right-wins** semantics on overlap and
54/// preserves first-appearance order in the compressed table.
55/// Construct via `ValueOverlay::try_from_runs` after normalizing.
56pub type StatusOverlay = value_mesh::ValueOverlay<Status>;
57
58use crate::resource;
59use crate::resource::RankedValues;
60use crate::resource::Status;
61use crate::shortuuid::ShortUuid;
62use crate::v1::host_mesh::HostMeshAgent;
63use crate::v1::host_mesh::HostMeshRefParseError;
64use crate::v1::host_mesh::mesh_agent::ProcState;
65
66/// Errors that occur during mesh operations.
67#[derive(Debug, EnumAsInner, thiserror::Error)]
68pub enum Error {
69    #[error("invalid mesh ref: expected {expected} ranks, but contains {actual} ranks")]
70    InvalidRankCardinality { expected: usize, actual: usize },
71
72    #[error(transparent)]
73    NameParseError(#[from] NameParseError),
74
75    #[error(transparent)]
76    HostMeshRefParseError(#[from] HostMeshRefParseError),
77
78    #[error(transparent)]
79    AllocatorError(#[from] Box<crate::alloc::AllocatorError>),
80
81    #[error(transparent)]
82    ChannelError(#[from] Box<hyperactor::channel::ChannelError>),
83
84    #[error(transparent)]
85    MailboxError(#[from] Box<hyperactor::mailbox::MailboxError>),
86
87    #[error(transparent)]
88    CodecError(#[from] CodecError),
89
90    #[error("error during mesh configuration: {0}")]
91    ConfigurationError(anyhow::Error),
92
93    // This is a temporary error to ensure we don't create unroutable
94    // meshes.
95    #[error("configuration error: mesh is unroutable")]
96    UnroutableMesh(),
97
98    #[error("error while calling actor {0}: {1}")]
99    CallError(ActorId, anyhow::Error),
100
101    #[error("actor not registered for type {0}")]
102    ActorTypeNotRegistered(String),
103
104    // TODO: this should be a valuemesh of statuses
105    #[error("error while spawning actor {0}: {1}")]
106    GspawnError(Name, String),
107
108    #[error("error while sending message to actor {0}: {1}")]
109    SendingError(ActorId, Box<MailboxSenderError>),
110
111    #[error("error while casting message to {0}: {1}")]
112    CastingError(Name, anyhow::Error),
113
114    #[error("error configuring host mesh agent {0}: {1}")]
115    HostMeshAgentConfigurationError(ActorId, String),
116
117    #[error(
118        "error creating proc (host rank {host_rank}) on host mesh agent {mesh_agent}, state: {state}"
119    )]
120    ProcCreationError {
121        state: resource::State<ProcState>,
122        host_rank: usize,
123        mesh_agent: ActorRef<HostMeshAgent>,
124    },
125
126    #[error(
127        "error spawning proc mesh: statuses: {}",
128        RankedValues::invert(statuses)
129    )]
130    ProcSpawnError { statuses: RankedValues<Status> },
131
132    #[error(
133        "error spawning actor mesh: statuses: {}",
134        RankedValues::invert(statuses)
135    )]
136    ActorSpawnError { statuses: RankedValues<Status> },
137
138    #[error(
139        "error stopping actor mesh: statuses: {}",
140        RankedValues::invert(statuses)
141    )]
142    ActorStopError { statuses: RankedValues<Status> },
143
144    #[error("error spawning actor: {0}")]
145    SingletonActorSpawnError(anyhow::Error),
146
147    #[error("error spawning controller actor for mesh {0}: {1}")]
148    ControllerActorSpawnError(Name, anyhow::Error),
149
150    #[error("error: {0} does not exist")]
151    NotExist(Name),
152
153    #[error(transparent)]
154    Io(#[from] io::Error),
155
156    #[error(transparent)]
157    Host(#[from] HostError),
158}
159
160/// Errors that occur during serialization and deserialization.
161#[derive(Debug, thiserror::Error)]
162pub enum CodecError {
163    #[error(transparent)]
164    BincodeError(#[from] Box<bincode::Error>),
165    #[error(transparent)]
166    JsonError(#[from] Box<serde_json::Error>),
167    #[error(transparent)]
168    Base64Error(#[from] Box<base64::DecodeError>),
169    #[error(transparent)]
170    Utf8Error(#[from] Box<std::str::Utf8Error>),
171}
172
173impl From<bincode::Error> for Error {
174    fn from(e: bincode::Error) -> Self {
175        Error::CodecError(Box::new(e).into())
176    }
177}
178
179impl From<serde_json::Error> for Error {
180    fn from(e: serde_json::Error) -> Self {
181        Error::CodecError(Box::new(e).into())
182    }
183}
184
185impl From<base64::DecodeError> for Error {
186    fn from(e: base64::DecodeError) -> Self {
187        Error::CodecError(Box::new(e).into())
188    }
189}
190
191impl From<std::str::Utf8Error> for Error {
192    fn from(e: std::str::Utf8Error) -> Self {
193        Error::CodecError(Box::new(e).into())
194    }
195}
196
197impl From<crate::alloc::AllocatorError> for Error {
198    fn from(e: crate::alloc::AllocatorError) -> Self {
199        Error::AllocatorError(Box::new(e))
200    }
201}
202
203impl From<hyperactor::channel::ChannelError> for Error {
204    fn from(e: hyperactor::channel::ChannelError) -> Self {
205        Error::ChannelError(Box::new(e))
206    }
207}
208
209impl From<hyperactor::mailbox::MailboxError> for Error {
210    fn from(e: hyperactor::mailbox::MailboxError) -> Self {
211        Error::MailboxError(Box::new(e))
212    }
213}
214
215impl From<view::InvalidCardinality> for crate::v1::Error {
216    fn from(e: view::InvalidCardinality) -> Self {
217        crate::v1::Error::InvalidRankCardinality {
218            expected: e.expected,
219            actual: e.actual,
220        }
221    }
222}
223
224/// The type of result used in `hyperactor_mesh::v1`.
225pub type Result<T> = std::result::Result<T, Error>;
226
227/// Names are used to identify objects in the system. They have a user-provided name,
228/// and a unique UUID.
229///
230/// Names have a concrete syntax--`{name}-{uuid}`--printed by `Display` and parsed by `FromStr`.
231#[derive(
232    Debug,
233    Clone,
234    PartialEq,
235    Eq,
236    PartialOrd,
237    Ord,
238    Hash,
239    Serialize,
240    Deserialize,
241    EnumAsInner
242)]
243pub enum Name {
244    /// Normal names for most actors.
245    Suffixed(String, ShortUuid),
246    /// Reserved names for system actors without UUIDs.
247    Reserved(String),
248}
249
250// The delimiter between the name and the uuid when a Name::Suffixed is stringified.
251// Actor names must be parseable as a Rust identifier, so this delimiter must be
252// something that is part of a valid Rust identifier.
253static NAME_SUFFIX_DELIMITER: &str = "_";
254
255impl Name {
256    /// Create a new `Name` from a user-provided base name.
257    pub fn new(name: impl Into<String>) -> Self {
258        Self::new_with_uuid(name, Some(ShortUuid::generate()))
259    }
260
261    /// Create a Reserved `Name` with no uuid. Only for use by system actors.
262    pub(crate) fn new_reserved(name: impl Into<String>) -> Self {
263        Self::new_with_uuid(name, None)
264    }
265
266    fn new_with_uuid(name: impl Into<String>, uuid: Option<ShortUuid>) -> Self {
267        let mut name = name.into();
268        if name.is_empty() {
269            name = "unnamed".to_string();
270        }
271        if let Some(uuid) = uuid {
272            Self::Suffixed(name, uuid)
273        } else {
274            Self::Reserved(name)
275        }
276    }
277
278    /// The name portion of this `Name`.
279    pub fn name(&self) -> &str {
280        match self {
281            Self::Suffixed(n, _) => n,
282            Self::Reserved(n) => n,
283        }
284    }
285
286    /// The UUID portion of this `Name`.
287    /// Only Some for Name::Suffixed, if called on Name::Reserved it'll be None.
288    pub fn uuid(&self) -> Option<&ShortUuid> {
289        match self {
290            Self::Suffixed(_, uuid) => Some(uuid),
291            Self::Reserved(_) => None,
292        }
293    }
294}
295
296/// Errors that occur when parsing names.
297#[derive(thiserror::Error, Debug)]
298pub enum NameParseError {
299    #[error("invalid name: missing name")]
300    MissingName,
301
302    #[error("invalid name: missing uuid")]
303    MissingUuid,
304
305    #[error(transparent)]
306    InvalidUuid(#[from] <ShortUuid as FromStr>::Err),
307
308    #[error("invalid name: missing separator")]
309    MissingSeparator,
310}
311
312impl FromStr for Name {
313    type Err = NameParseError;
314
315    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
316        // Split from the last in case the name has underscores in it.
317        if let Some((name, uuid)) = s.rsplit_once(NAME_SUFFIX_DELIMITER) {
318            if name.is_empty() {
319                return Err(NameParseError::MissingName);
320            }
321            if uuid.is_empty() {
322                return Err(NameParseError::MissingName);
323            }
324
325            Ok(Name::new_with_uuid(name.to_string(), Some(uuid.parse()?)))
326        } else {
327            if s.is_empty() {
328                return Err(NameParseError::MissingName);
329            }
330            Ok(Name::new_reserved(s))
331        }
332    }
333}
334
335impl std::fmt::Display for Name {
336    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
337        match self {
338            Self::Suffixed(n, uuid) => {
339                write!(f, "{}{}", n, NAME_SUFFIX_DELIMITER)?;
340                uuid.format(f, true /*raw*/)
341            }
342            Self::Reserved(n) => write!(f, "{}", n),
343        }
344    }
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    #[test]
352    fn test_name_unique() {
353        assert_ne!(Name::new("foo"), Name::new("foo"));
354        let name = Name::new("foo");
355        assert_eq!(name, name);
356    }
357
358    #[test]
359    fn test_name_roundtrip() {
360        let uuid = "111111111111".parse::<ShortUuid>().unwrap();
361        let name = Name::new_with_uuid("foo", Some(uuid));
362        let str = name.to_string();
363        assert_eq!(str, "foo_111111111111");
364        assert_eq!(name, Name::from_str(&str).unwrap());
365    }
366
367    #[test]
368    fn test_name_roundtrip_with_underscore() {
369        // A ShortUuid may have an underscore prefix if the first character is a digit.
370        // Make sure this doesn't impact parsing.
371        let uuid = "_1a2b3c4d5e6f".parse::<ShortUuid>().unwrap();
372        let name = Name::new_with_uuid("foo", Some(uuid));
373        let str = name.to_string();
374        // Leading underscore is stripped as not needed.
375        assert_eq!(str, "foo_1a2b3c4d5e6f");
376        assert_eq!(name, Name::from_str(&str).unwrap());
377    }
378
379    #[test]
380    fn test_name_roundtrip_random() {
381        let name = Name::new("foo");
382        assert_eq!(name, Name::from_str(&name.to_string()).unwrap());
383    }
384
385    #[test]
386    fn test_name_roundtrip_reserved() {
387        let name = Name::new_reserved("foo");
388        let str = name.to_string();
389        assert_eq!(str, "foo");
390        assert_eq!(name, Name::from_str(&str).unwrap());
391    }
392
393    #[test]
394    fn test_name_parse() {
395        // Multiple underscores are allowed in the name, as ShortUuid will choose
396        // the part after the last underscore.
397        let name = Name::from_str("foo_bar_1a2b3c4d5e6f").unwrap();
398        assert_eq!(format!("{}", name), "foo_bar_1a2b3c4d5e6f");
399    }
400}