monarch_rdma/
efa.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! EFA (Elastic Fabric Adapter) specific RDMA operations.
10//!
11//! This module contains EFA-specific helpers for device detection and configuration.
12//! Connect and post operations are handled by C functions in rdmaxcel.c.
13
14use std::sync::OnceLock;
15
16use crate::backend::ibverbs::primitives::IbvConfig;
17
18/// Cached result of EFA device check.
19static EFA_DEVICE_CACHE: OnceLock<bool> = OnceLock::new();
20
21/// Checks if any EFA device is available in the system.
22///
23/// Uses `efadv_query_device()` to detect EFA hardware.
24/// The result is cached after the first call.
25pub fn is_efa_device() -> bool {
26    *EFA_DEVICE_CACHE.get_or_init(is_efa_device_impl)
27}
28
29fn is_efa_device_impl() -> bool {
30    // SAFETY: We are calling C functions from libibverbs and libefa.
31    unsafe {
32        let mut num_devices = 0;
33        let device_list = rdmaxcel_sys::ibv_get_device_list(&mut num_devices);
34        if device_list.is_null() || num_devices == 0 {
35            return false;
36        }
37        let mut found = false;
38        for i in 0..num_devices {
39            let device = *device_list.add(i as usize);
40            if device.is_null() {
41                continue;
42            }
43            let context = rdmaxcel_sys::ibv_open_device(device);
44            if context.is_null() {
45                continue;
46            }
47            if rdmaxcel_sys::rdmaxcel_is_efa_dev(context) != 0 {
48                found = true;
49                rdmaxcel_sys::ibv_close_device(context);
50                break;
51            }
52            rdmaxcel_sys::ibv_close_device(context);
53        }
54        rdmaxcel_sys::ibv_free_device_list(device_list);
55        found
56    }
57}
58
59/// Applies EFA-specific defaults to an `IbvConfig`.
60///
61/// EFA devices have different capabilities than standard InfiniBand/RoCE devices:
62/// - GID index 0 (instead of 3)
63/// - Max 1 SGE per work request
64/// - No RDMA atomics support
65pub fn apply_efa_defaults(config: &mut IbvConfig) {
66    config.gid_index = 0;
67    config.max_send_sge = 1;
68    config.max_recv_sge = 1;
69    config.max_dest_rd_atomic = 0;
70    config.max_rd_atomic = 0;
71}
72
73/// Returns the MR access flags appropriate for EFA devices.
74///
75/// EFA does not support `IBV_ACCESS_REMOTE_ATOMIC`, so this returns only
76/// local write, remote write, and remote read flags.
77pub fn mr_access_flags() -> rdmaxcel_sys::ibv_access_flags {
78    rdmaxcel_sys::ibv_access_flags::IBV_ACCESS_LOCAL_WRITE
79        | rdmaxcel_sys::ibv_access_flags::IBV_ACCESS_REMOTE_WRITE
80        | rdmaxcel_sys::ibv_access_flags::IBV_ACCESS_REMOTE_READ
81}