hyperactor/metrics.rs
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Hyperactor metrics.
10//!
11//! This module contains metrics definitions for various components of hyperactor.
12
13use hyperactor_telemetry::declare_static_counter;
14use hyperactor_telemetry::declare_static_histogram;
15use hyperactor_telemetry::declare_static_timer;
16use hyperactor_telemetry::declare_static_up_down_counter;
17
18// MAILBOX
19// Tracks messages that couldn't be delivered to their destination and were returned as undeliverable
20declare_static_counter!(
21 MAILBOX_UNDELIVERABLE_MESSAGES,
22 "mailbox.undeliverable_messages"
23);
24// Tracks the number of messages that were posted.
25hyperactor_telemetry::declare_static_counter!(MAILBOX_POSTS, "mailbox.posts");
26
27// ACTOR
28// Tracks the current size of the message queue for actors (increases when messages are queued, decreases when processed)
29declare_static_up_down_counter!(ACTOR_MESSAGE_QUEUE_SIZE, "actor.message_queue_size");
30// Tracks the total number of messages sent by actors
31declare_static_counter!(ACTOR_MESSAGES_SENT, "actor.messages_sent");
32// Tracks the total number of messages received by actors
33declare_static_counter!(ACTOR_MESSAGES_RECEIVED, "actor.messages_received");
34// Tracks errors that occur when receiving messages
35declare_static_counter!(ACTOR_MESSAGE_RECEIVE_ERRORS, "actor.message_receive_errors");
36// Measures the time taken to handle messages by actors
37declare_static_timer!(
38 ACTOR_MESSAGE_HANDLER_DURATION,
39 "actor.message_handler_duration",
40 hyperactor_telemetry::TimeUnit::Nanos
41);
42
43// CHANNEL
44declare_static_histogram!(REMOTE_MESSAGE_SEND_SIZE, "channel.remote_message_send_size");
45// Tracks the number of new channel connections established (client and server)
46declare_static_counter!(CHANNEL_CONNECTIONS, "channel.connections");
47// Tracks errors that occur when establishing channel connections
48declare_static_counter!(CHANNEL_CONNECTION_ERRORS, "channel.connection_errors");
49// Tracks the number of channel reconnection attempts
50declare_static_counter!(CHANNEL_RECONNECTIONS, "channel.reconnections");
51// Tracks the number of NetRx encountering full buffer, i.e. its mspc channel.
52
53// This metric counts how often the NetRx→client mpsc channel remains full,
54// incrementing once per CHANNEL_NET_RX_BUFFER_FULL_CHECK_INTERVAL while blocked.
55declare_static_counter!(CHANNEL_NET_RX_BUFFER_FULL, "channel.net_rx_buffer_full");
56
57// PROC MESH
58// Tracks the number of active processes in the process mesh
59declare_static_counter!(PROC_MESH_ALLOCATION, "proc_mesh.active_procs");
60// Tracks the number of process failures in the process mesh
61declare_static_counter!(PROC_MESH_PROC_STOPPED, "proc_mesh.proc_failures");
62// Tracks the number of actor failures within the process mesh
63declare_static_counter!(PROC_MESH_ACTOR_FAILURES, "proc_mesh.actor_failures");
64
65// MESSAGE LATENCY
66// Tracks end-to-end message latency in microseconds (sampled at 1% by default)
67declare_static_histogram!(MESSAGE_LATENCY_MICROS, "message.e2e_latency.us");