monarch_rdma/
test_utils.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::sync::Once;
10use std::sync::atomic::AtomicBool;
11use std::sync::atomic::Ordering;
12
13/// Cached result of CUDA availability check
14static CUDA_AVAILABLE: AtomicBool = AtomicBool::new(false);
15static INIT: Once = Once::new();
16
17/// Safely checks if CUDA is available on the system.
18///
19/// This function attempts to initialize CUDA and determine if it's available.
20/// The result is cached after the first call, so subsequent calls are very fast.
21///
22/// # Returns
23///
24/// `true` if CUDA is available and can be initialized, `false` otherwise.
25///
26/// # Examples
27///
28/// ```
29/// use monarch_rdma::is_cuda_available;
30///
31/// if is_cuda_available() {
32///     println!("CUDA is available, can use GPU features");
33/// } else {
34///     println!("CUDA is not available, falling back to CPU-only mode");
35/// }
36/// ```
37pub fn is_cuda_available() -> bool {
38    INIT.call_once(|| {
39        let available = check_cuda_available();
40        CUDA_AVAILABLE.store(available, Ordering::SeqCst);
41    });
42    CUDA_AVAILABLE.load(Ordering::SeqCst)
43}
44
45/// Internal function that performs the actual CUDA availability check
46fn check_cuda_available() -> bool {
47    unsafe {
48        // Try to initialize CUDA
49        let result = rdmaxcel_sys::rdmaxcel_cuInit(0);
50
51        if result != rdmaxcel_sys::CUDA_SUCCESS {
52            return false;
53        }
54
55        // Check if there are any CUDA devices
56        let mut device_count: i32 = 0;
57        let count_result = rdmaxcel_sys::rdmaxcel_cuDeviceGetCount(&mut device_count);
58
59        if count_result != rdmaxcel_sys::CUDA_SUCCESS || device_count <= 0 {
60            return false;
61        }
62
63        // Try to get the first device to verify it's actually accessible
64        let mut device: rdmaxcel_sys::CUdevice = std::mem::zeroed();
65        let device_result = rdmaxcel_sys::rdmaxcel_cuDeviceGet(&mut device, 0);
66
67        if device_result != rdmaxcel_sys::CUDA_SUCCESS {
68            return false;
69        }
70
71        true
72    }
73}