hyperactor_mesh/
v1.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! A temporary holding space for APIv1 of the Hyperactor Mesh.
10//! This will be moved down to the base module when we graduate
11//! the APIs and fully deprecate the "v0" APIs.
12
13pub mod actor_mesh;
14pub mod host_mesh;
15pub mod mesh_controller;
16pub mod proc_mesh;
17pub mod testactor;
18pub mod testing;
19pub mod value_mesh;
20
21use std::io;
22use std::str::FromStr;
23
24pub use actor_mesh::ActorMesh;
25pub use actor_mesh::ActorMeshRef;
26use enum_as_inner::EnumAsInner;
27pub use host_mesh::HostMeshRef;
28use hyperactor::ActorId;
29use hyperactor::ActorRef;
30use hyperactor::ProcId;
31use hyperactor::host::HostError;
32use hyperactor::mailbox::MailboxSenderError;
33use hyperactor::reference;
34use ndslice::view;
35pub use proc_mesh::ProcMesh;
36pub use proc_mesh::ProcMeshRef;
37use serde::Deserialize;
38use serde::Serialize;
39use typeuri::Named;
40pub use value_mesh::ValueMesh;
41
42/// A mesh of per-rank lifecycle statuses.
43///
44/// `StatusMesh` is `ValueMesh<Status>` and supports dense or
45/// compressed encodings. Updates are applied via sparse overlays with
46/// **last-writer-wins** semantics (see
47/// [`ValueMesh::merge_from_overlay`]). The mesh's `Region` defines
48/// the rank space; all updates must match that region.
49pub type StatusMesh = ValueMesh<Status>;
50
51/// A sparse set of `(Range<usize>, Status)` updates for a
52/// [`StatusMesh`].
53///
54/// `StatusOverlay` carries **normalized** runs (sorted,
55/// non-overlapping, and coalesced). Applying an overlay to a
56/// `StatusMesh` uses **right-wins** semantics on overlap and
57/// preserves first-appearance order in the compressed table.
58/// Construct via `ValueOverlay::try_from_runs` after normalizing.
59pub type StatusOverlay = value_mesh::ValueOverlay<Status>;
60
61use crate::resource;
62use crate::resource::RankedValues;
63use crate::resource::Status;
64use crate::shortuuid::ShortUuid;
65use crate::v1::host_mesh::HostMeshAgent;
66use crate::v1::host_mesh::HostMeshRefParseError;
67use crate::v1::host_mesh::mesh_agent::ProcState;
68
69/// Errors that occur during mesh operations.
70#[derive(Debug, EnumAsInner, thiserror::Error)]
71pub enum Error {
72    #[error("invalid mesh ref: expected {expected} ranks, but contains {actual} ranks")]
73    InvalidRankCardinality { expected: usize, actual: usize },
74
75    #[error(transparent)]
76    NameParseError(#[from] NameParseError),
77
78    #[error(transparent)]
79    HostMeshRefParseError(#[from] HostMeshRefParseError),
80
81    #[error(transparent)]
82    AllocatorError(#[from] Box<crate::alloc::AllocatorError>),
83
84    #[error(transparent)]
85    ChannelError(#[from] Box<hyperactor::channel::ChannelError>),
86
87    #[error(transparent)]
88    MailboxError(#[from] Box<hyperactor::mailbox::MailboxError>),
89
90    #[error(transparent)]
91    CodecError(#[from] CodecError),
92
93    #[error("error during mesh configuration: {0}")]
94    ConfigurationError(anyhow::Error),
95
96    // This is a temporary error to ensure we don't create unroutable
97    // meshes.
98    #[error("configuration error: mesh is unroutable")]
99    UnroutableMesh(),
100
101    #[error("error while calling actor {0}: {1}")]
102    CallError(ActorId, anyhow::Error),
103
104    #[error("actor not registered for type {0}")]
105    ActorTypeNotRegistered(String),
106
107    // TODO: this should be a valuemesh of statuses
108    #[error("error while spawning actor {0}: {1}")]
109    GspawnError(Name, String),
110
111    #[error("error while sending message to actor {0}: {1}")]
112    SendingError(ActorId, Box<MailboxSenderError>),
113
114    #[error("error while casting message to {0}: {1}")]
115    CastingError(Name, anyhow::Error),
116
117    #[error("error configuring host mesh agent {0}: {1}")]
118    HostMeshAgentConfigurationError(ActorId, String),
119
120    #[error(
121        "error creating proc (host rank {host_rank}) on host mesh agent {mesh_agent}, state: {state}"
122    )]
123    ProcCreationError {
124        state: Box<resource::State<ProcState>>,
125        host_rank: usize,
126        mesh_agent: ActorRef<HostMeshAgent>,
127    },
128
129    #[error(
130        "error spawning proc mesh: statuses: {}",
131        RankedValues::invert(statuses)
132    )]
133    ProcSpawnError { statuses: RankedValues<Status> },
134
135    #[error(
136        "error spawning actor mesh: statuses: {}",
137        RankedValues::invert(statuses)
138    )]
139    ActorSpawnError { statuses: RankedValues<Status> },
140
141    #[error(
142        "error stopping actor mesh: statuses: {}",
143        RankedValues::invert(statuses)
144    )]
145    ActorStopError { statuses: RankedValues<Status> },
146
147    #[error("error spawning actor: {0}")]
148    SingletonActorSpawnError(anyhow::Error),
149
150    #[error("error spawning controller actor for mesh {0}: {1}")]
151    ControllerActorSpawnError(Name, anyhow::Error),
152
153    #[error("proc {0} must be direct-addressable")]
154    RankedProc(ProcId),
155
156    #[error("error: {0} does not exist")]
157    NotExist(Name),
158
159    #[error(transparent)]
160    Io(#[from] io::Error),
161
162    #[error(transparent)]
163    Host(#[from] HostError),
164
165    #[error(transparent)]
166    Other(#[from] anyhow::Error),
167}
168
169/// Errors that occur during serialization and deserialization.
170#[derive(Debug, thiserror::Error)]
171pub enum CodecError {
172    #[error(transparent)]
173    BincodeError(#[from] Box<bincode::Error>),
174    #[error(transparent)]
175    JsonError(#[from] Box<serde_json::Error>),
176    #[error(transparent)]
177    Base64Error(#[from] Box<base64::DecodeError>),
178    #[error(transparent)]
179    Utf8Error(#[from] Box<std::str::Utf8Error>),
180}
181
182impl From<bincode::Error> for Error {
183    fn from(e: bincode::Error) -> Self {
184        Error::CodecError(Box::new(e).into())
185    }
186}
187
188impl From<serde_json::Error> for Error {
189    fn from(e: serde_json::Error) -> Self {
190        Error::CodecError(Box::new(e).into())
191    }
192}
193
194impl From<base64::DecodeError> for Error {
195    fn from(e: base64::DecodeError) -> Self {
196        Error::CodecError(Box::new(e).into())
197    }
198}
199
200impl From<std::str::Utf8Error> for Error {
201    fn from(e: std::str::Utf8Error) -> Self {
202        Error::CodecError(Box::new(e).into())
203    }
204}
205
206impl From<crate::alloc::AllocatorError> for Error {
207    fn from(e: crate::alloc::AllocatorError) -> Self {
208        Error::AllocatorError(Box::new(e))
209    }
210}
211
212impl From<hyperactor::channel::ChannelError> for Error {
213    fn from(e: hyperactor::channel::ChannelError) -> Self {
214        Error::ChannelError(Box::new(e))
215    }
216}
217
218impl From<hyperactor::mailbox::MailboxError> for Error {
219    fn from(e: hyperactor::mailbox::MailboxError) -> Self {
220        Error::MailboxError(Box::new(e))
221    }
222}
223
224impl From<view::InvalidCardinality> for crate::v1::Error {
225    fn from(e: view::InvalidCardinality) -> Self {
226        crate::v1::Error::InvalidRankCardinality {
227            expected: e.expected,
228            actual: e.actual,
229        }
230    }
231}
232
233/// The type of result used in `hyperactor_mesh::v1`.
234pub type Result<T> = std::result::Result<T, Error>;
235
236/// Names are used to identify objects in the system. They have a user-provided name,
237/// and a unique UUID.
238///
239/// Names have a concrete syntax--`{name}-{uuid}`--printed by `Display` and parsed by `FromStr`.
240#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Named, EnumAsInner)]
241pub enum Name {
242    /// Normal names for most actors.
243    Suffixed(String, ShortUuid),
244    /// Reserved names for system actors without UUIDs.
245    Reserved(String),
246}
247wirevalue::register_type!(Name);
248
249// The delimiter between the name and the uuid when a Name::Suffixed is stringified.
250// Actor names must be parseable as an actor identifier. We do not allow this delimiter
251// in reserved names so that these names parse unambiguously.
252static NAME_SUFFIX_DELIMITER: &str = "-";
253
254impl Name {
255    /// Create a new `Name` from a user-provided base name.
256    pub fn new(name: impl Into<String>) -> Result<Self> {
257        Ok(Self::new_with_uuid(name, Some(ShortUuid::generate()))?)
258    }
259
260    /// Create a Reserved `Name` with no uuid. Only for use by system actors.
261    pub fn new_reserved(name: impl Into<String>) -> Result<Self> {
262        Ok(Self::new_with_uuid(name, None)?)
263    }
264
265    fn new_with_uuid(
266        name: impl Into<String>,
267        uuid: Option<ShortUuid>,
268    ) -> std::result::Result<Self, NameParseError> {
269        let mut name = name.into();
270        if name.is_empty() {
271            name = "unnamed".to_string();
272        }
273        if !reference::is_valid_ident(&name) {
274            return Err(NameParseError::InvalidName(name));
275        }
276        if let Some(uuid) = uuid {
277            Ok(Self::Suffixed(name, uuid))
278        } else {
279            Ok(Self::Reserved(name))
280        }
281    }
282
283    /// The name portion of this `Name`.
284    pub fn name(&self) -> &str {
285        match self {
286            Self::Suffixed(n, _) => n,
287            Self::Reserved(n) => n,
288        }
289    }
290
291    /// The UUID portion of this `Name`.
292    /// Only Some for Name::Suffixed, if called on Name::Reserved it'll be None.
293    pub fn uuid(&self) -> Option<&ShortUuid> {
294        match self {
295            Self::Suffixed(_, uuid) => Some(uuid),
296            Self::Reserved(_) => None,
297        }
298    }
299}
300
301impl Serialize for Name {
302    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
303    where
304        S: serde::Serializer,
305    {
306        // Consider doing this only when `serializer.is_human_readable()`:
307        serializer.serialize_str(&self.to_string())
308    }
309}
310
311impl<'de> Deserialize<'de> for Name {
312    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
313    where
314        D: serde::Deserializer<'de>,
315    {
316        let s = String::deserialize(deserializer)?;
317        Name::from_str(&s).map_err(serde::de::Error::custom)
318    }
319}
320
321/// Errors that occur when parsing names.
322#[derive(thiserror::Error, Debug)]
323pub enum NameParseError {
324    #[error("invalid name: missing name")]
325    MissingName,
326
327    #[error("invalid name: missing uuid")]
328    MissingUuid,
329
330    /// Strictly speaking Monarch identifier also supports other characters. But
331    /// to avoid confusion for the user, we only state intuitive characters here
332    /// so the error message is more actionable.
333    #[error(
334        "invalid name '{0}': names must contain only alphanumeric characters \
335        and underscores, and must start with a letter or underscore"
336    )]
337    InvalidName(String),
338
339    #[error(transparent)]
340    InvalidUuid(#[from] <ShortUuid as FromStr>::Err),
341
342    #[error("invalid name: missing separator")]
343    MissingSeparator,
344}
345
346impl FromStr for Name {
347    type Err = NameParseError;
348
349    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
350        // The delimiter ('-') is allowable in elements, but not identifiers;
351        // thus splitting on this unambiguously parses suffixed and reserved names.
352        if let Some((name, uuid)) = s.split_once(NAME_SUFFIX_DELIMITER) {
353            if name.is_empty() {
354                return Err(NameParseError::MissingName);
355            }
356            if uuid.is_empty() {
357                return Err(NameParseError::MissingName);
358            }
359
360            Name::new_with_uuid(name.to_string(), Some(uuid.parse()?))
361        } else {
362            if s.is_empty() {
363                return Err(NameParseError::MissingName);
364            }
365            Name::new_with_uuid(s, None)
366        }
367    }
368}
369
370impl std::fmt::Display for Name {
371    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
372        match self {
373            Self::Suffixed(n, uuid) => {
374                write!(f, "{}{}", n, NAME_SUFFIX_DELIMITER)?;
375                uuid.format(f, true /*raw*/)
376            }
377            Self::Reserved(n) => write!(f, "{}", n),
378        }
379    }
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385
386    #[test]
387    fn test_name_unique() {
388        assert_ne!(Name::new("foo").unwrap(), Name::new("foo").unwrap());
389        let name = Name::new("foo").unwrap();
390        assert_eq!(name, name);
391    }
392
393    #[test]
394    fn test_name_roundtrip() {
395        let uuid = "111111111111".parse::<ShortUuid>().unwrap();
396        let name = Name::new_with_uuid("foo", Some(uuid)).unwrap();
397        let str = name.to_string();
398        assert_eq!(str, "foo-111111111111");
399        assert_eq!(name, Name::from_str(&str).unwrap());
400    }
401
402    #[test]
403    fn test_name_roundtrip_with_underscore() {
404        // A ShortUuid may have an underscore prefix if the first character is a digit.
405        // Make sure this doesn't impact parsing.
406        let uuid = "_1a2b3c4d5e6f".parse::<ShortUuid>().unwrap();
407        let name = Name::new_with_uuid("foo", Some(uuid)).unwrap();
408        let str = name.to_string();
409        // Leading underscore is stripped as not needed.
410        assert_eq!(str, "foo-1a2b3c4d5e6f");
411        assert_eq!(name, Name::from_str(&str).unwrap());
412    }
413
414    #[test]
415    fn test_name_roundtrip_random() {
416        let name = Name::new("foo").unwrap();
417        assert_eq!(name, Name::from_str(&name.to_string()).unwrap());
418    }
419
420    #[test]
421    fn test_name_roundtrip_reserved() {
422        let name = Name::new_reserved("foo").unwrap();
423        let str = name.to_string();
424        assert_eq!(str, "foo");
425        assert_eq!(name, Name::from_str(&str).unwrap());
426    }
427
428    #[test]
429    fn test_name_parse() {
430        // Multiple underscores are allowed in the name, as ShortUuid will choose
431        // the part after the last underscore.
432        let name = Name::from_str("foo_bar_1a2b3c4d5e6f").unwrap();
433        assert_eq!(format!("{}", name), "foo_bar_1a2b3c4d5e6f");
434    }
435
436    #[test]
437    fn test_invalid() {
438        // We assign "unnamed" to empty names.
439        assert!(Name::new("").is_ok());
440        // These are not valid identifiers:
441        assert!(Name::new("foo-").is_err());
442        assert!(Name::new("foo-bar").is_err());
443    }
444}