monarch_rdma/backend/
ibverbs.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! ibverbs backend implementation for RDMA operations.
10
11use std::sync::Arc;
12
13use hyperactor::reference;
14use serde::Deserialize;
15use serde::Serialize;
16use typeuri::Named;
17
18pub mod device_selection;
19pub(crate) mod domain;
20pub mod manager_actor;
21pub mod primitives;
22pub mod queue_pair;
23
24use manager_actor::IbvManagerActor;
25pub use queue_pair::IbvQueuePair;
26pub use queue_pair::PollTarget;
27
28#[cfg(test)]
29mod ibv_manager_actor_tests;
30#[cfg(test)]
31mod mlx5dv_tests;
32#[cfg(test)]
33mod test_utils;
34
35use crate::RdmaOpType;
36use crate::local_memory::RdmaLocalMemory;
37
38/// Lazily-initialized ibverbs transport details for a registered memory
39/// region. Retrieved on demand from the [`IbvManagerActor`] via
40/// [`IbvManagerMessage::RequestBuffer`].
41#[derive(Debug, Clone, Serialize, Deserialize, Named)]
42pub struct IbvBuffer {
43    pub mr_id: usize,
44    pub lkey: u32,
45    pub rkey: u32,
46    /// RDMA address (may differ from virtual address for CUDA memory).
47    pub addr: usize,
48    pub size: usize,
49    /// Name of the RDMA device this buffer is associated with (e.g., "mlx5_0").
50    pub device_name: String,
51}
52
53/// A single RDMA op for the [`IbvSubmit`] message.
54#[derive(Debug, Clone, Named)]
55pub struct IbvOp {
56    pub op_type: RdmaOpType,
57    pub local_memory: Arc<dyn RdmaLocalMemory>,
58    pub remote_buffer: IbvBuffer,
59    pub remote_manager: reference::ActorRef<IbvManagerActor>,
60}