monarch_rdma/test_utils.rs
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::sync::Once;
10use std::sync::atomic::AtomicBool;
11use std::sync::atomic::Ordering;
12
13/// Cached result of CUDA availability check
14static CUDA_AVAILABLE: AtomicBool = AtomicBool::new(false);
15static INIT: Once = Once::new();
16
17/// Safely checks if CUDA is available on the system.
18///
19/// This function attempts to initialize CUDA and determine if it's available.
20/// The result is cached after the first call, so subsequent calls are very fast.
21///
22/// # Returns
23///
24/// `true` if CUDA is available and can be initialized, `false` otherwise.
25///
26/// # Examples
27///
28/// ```
29/// use monarch_rdma::is_cuda_available;
30///
31/// if is_cuda_available() {
32/// println!("CUDA is available, can use GPU features");
33/// } else {
34/// println!("CUDA is not available, falling back to CPU-only mode");
35/// }
36/// ```
37pub fn is_cuda_available() -> bool {
38 INIT.call_once(|| {
39 let available = check_cuda_available();
40 CUDA_AVAILABLE.store(available, Ordering::SeqCst);
41 });
42 CUDA_AVAILABLE.load(Ordering::SeqCst)
43}
44
45/// Internal function that performs the actual CUDA availability check
46fn check_cuda_available() -> bool {
47 unsafe {
48 // Try to initialize CUDA
49 let result = rdmaxcel_sys::rdmaxcel_cuInit(0);
50
51 if result != rdmaxcel_sys::CUDA_SUCCESS {
52 return false;
53 }
54
55 // Check if there are any CUDA devices
56 let mut device_count: i32 = 0;
57 let count_result = rdmaxcel_sys::rdmaxcel_cuDeviceGetCount(&mut device_count);
58
59 if count_result != rdmaxcel_sys::CUDA_SUCCESS || device_count <= 0 {
60 return false;
61 }
62
63 // Try to get the first device to verify it's actually accessible
64 let mut device: rdmaxcel_sys::CUdevice = std::mem::zeroed();
65 let device_result = rdmaxcel_sys::rdmaxcel_cuDeviceGet(&mut device, 0);
66
67 if device_result != rdmaxcel_sys::CUDA_SUCCESS {
68 return false;
69 }
70
71 true
72 }
73}