hyperactor_mesh/
supervision.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Messages used in supervision of actor meshes.
10
11use hyperactor::Bind;
12use hyperactor::Unbind;
13use hyperactor::actor::ActorErrorKind;
14use hyperactor::actor::ActorStatus;
15use hyperactor::context;
16use hyperactor::supervision::ActorSupervisionEvent;
17use serde::Deserialize;
18use serde::Serialize;
19use typeuri::Named;
20
21/// Message about a supervision failure on a mesh of actors instead of a single
22/// actor.
23#[derive(Clone, Debug, Serialize, Deserialize, Named, PartialEq, Bind, Unbind)]
24pub struct MeshFailure {
25    /// Name of the mesh which the event originated from.
26    pub actor_mesh_name: Option<String>,
27    /// Rank of the mesh from which the event originated.
28    /// TODO: Point instead?
29    pub rank: Option<usize>,
30    /// The supervision event on an actor located at mesh + rank.
31    pub event: ActorSupervisionEvent,
32}
33wirevalue::register_type!(MeshFailure);
34
35impl MeshFailure {
36    /// Helper function to handle a message to an actor that just wants to forward
37    /// it to the next owner.
38    pub fn default_handler(&self, cx: &impl context::Actor) -> Result<(), anyhow::Error> {
39        // If an actor spawned by this one fails, we can't handle it. We fail
40        // ourselves with a chained error and bubble up to the next owner.
41        let err = ActorErrorKind::UnhandledSupervisionEvent(Box::new(ActorSupervisionEvent::new(
42            cx.instance().self_id().clone(),
43            None,
44            ActorStatus::Failed(ActorErrorKind::UnhandledSupervisionEvent(Box::new(
45                self.event.clone(),
46            ))),
47            None,
48        )));
49        Err(anyhow::Error::new(err))
50    }
51}
52
53impl std::fmt::Display for MeshFailure {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        write!(
56            f,
57            "Supervision failure on mesh {:?} at rank {:?} with event: {}",
58            self.actor_mesh_name, self.rank, self.event
59        )
60    }
61}
62
63// Shared between mesh types.
64#[derive(Debug, Clone)]
65pub(crate) enum Unhealthy {
66    StreamClosed(MeshFailure), // Event stream closed
67    Crashed(MeshFailure),      // Bad health event received
68}