Skip to main content

torch_sys_cuda/
nccl_stubs.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Stub types for NCCL when the `cuda` feature is disabled.
10//!
11//! These types match the public API of the real `nccl` module so that
12//! downstream crates (e.g., `monarch_tensor_worker`) compile without CUDA.
13//! Constructors and operations panic at runtime; the types exist solely for
14//! type-level compatibility, and callers are expected to gate usage on a
15//! tensor-engine capability check before reaching this code.
16
17pub use monarch_types::ReduceOp;
18pub use monarch_types::UniqueId;
19use torch_sys2::CudaDevice;
20use torch_sys2::TensorCell;
21
22use crate::cuda::Stream;
23pub use crate::nccl_common::DataType;
24pub use crate::nccl_common::NcclError;
25pub use crate::nccl_common::NcclGroupTicket;
26pub use crate::nccl_common::NcclStatus;
27pub use crate::nccl_common::RawNcclError;
28
29const UNAVAILABLE: &str = "NCCL requires the `cuda` feature";
30
31pub fn group_start() -> Result<NcclGroupTicket, NcclError> {
32    panic!("{}", UNAVAILABLE)
33}
34
35pub fn group_end(_ticket: NcclGroupTicket) -> Result<(), NcclError> {
36    panic!("{}", UNAVAILABLE)
37}
38
39/// Extension trait providing NCCL-specific operations on `UniqueId`.
40pub trait UniqueIdExt {
41    fn new_nccl() -> Result<UniqueId, RawNcclError>;
42    fn to_nccl(&self) -> [std::os::raw::c_char; 128];
43}
44
45impl UniqueIdExt for UniqueId {
46    fn new_nccl() -> Result<UniqueId, RawNcclError> {
47        panic!("{}", UNAVAILABLE)
48    }
49
50    fn to_nccl(&self) -> [std::os::raw::c_char; 128] {
51        *self.internal()
52    }
53}
54
55#[derive(Debug)]
56pub struct Communicator {
57    _private: (),
58}
59
60impl Communicator {
61    pub fn new(
62        _device: CudaDevice,
63        _world_size: i32,
64        _unique_id: UniqueId,
65        _rank: i32,
66    ) -> Result<Self, NcclError> {
67        panic!("{}", UNAVAILABLE)
68    }
69
70    pub fn split_all(&mut self) -> Result<Self, NcclError> {
71        panic!("{}", UNAVAILABLE)
72    }
73
74    pub fn split_from(&mut self, _ranks: Vec<i32>) -> Result<Option<Self>, NcclError> {
75        panic!("{}", UNAVAILABLE)
76    }
77
78    pub fn all_reduce(
79        &mut self,
80        _tensor: &TensorCell,
81        _reduce_op: ReduceOp,
82        _stream: &Stream,
83    ) -> Result<NcclStatus, NcclError> {
84        panic!("{}", UNAVAILABLE)
85    }
86
87    pub fn broadcast(
88        &mut self,
89        _tensor: &TensorCell,
90        _root: i32,
91        _stream: &Stream,
92    ) -> Result<NcclStatus, NcclError> {
93        panic!("{}", UNAVAILABLE)
94    }
95
96    pub fn reduce(
97        &mut self,
98        _tensor: &TensorCell,
99        _reduce_op: ReduceOp,
100        _root: i32,
101        _stream: &Stream,
102    ) -> Result<NcclStatus, NcclError> {
103        panic!("{}", UNAVAILABLE)
104    }
105
106    pub fn all_gather(
107        &mut self,
108        _output_cells: &[TensorCell],
109        _input_cell: &TensorCell,
110        _stream: &Stream,
111    ) -> Result<NcclStatus, NcclError> {
112        panic!("{}", UNAVAILABLE)
113    }
114
115    pub fn all_gather_into_tensor(
116        &mut self,
117        _output_cell: &TensorCell,
118        _input_cell: &TensorCell,
119        _stream: &Stream,
120    ) -> Result<NcclStatus, NcclError> {
121        panic!("{}", UNAVAILABLE)
122    }
123
124    pub fn reduce_scatter_tensor(
125        &mut self,
126        _output_cell: &TensorCell,
127        _input_cell: &TensorCell,
128        _reduce_op: ReduceOp,
129        _stream: &Stream,
130    ) -> Result<NcclStatus, NcclError> {
131        panic!("{}", UNAVAILABLE)
132    }
133
134    pub fn send(
135        &mut self,
136        _tensor_cell: &TensorCell,
137        _dst: i32,
138        _stream: &Stream,
139    ) -> Result<NcclStatus, NcclError> {
140        panic!("{}", UNAVAILABLE)
141    }
142
143    pub fn recv(
144        &mut self,
145        _tensor_cell: &TensorCell,
146        _src: i32,
147        _stream: &Stream,
148    ) -> Result<NcclStatus, NcclError> {
149        panic!("{}", UNAVAILABLE)
150    }
151
152    pub fn all_to_all_single(
153        &mut self,
154        _output_cell: &TensorCell,
155        _input_cell: &TensorCell,
156        _stream: &Stream,
157    ) -> Result<NcclStatus, NcclError> {
158        panic!("{}", UNAVAILABLE)
159    }
160
161    pub fn barrier(&mut self, _stream: &Stream) -> Result<NcclStatus, NcclError> {
162        panic!("{}", UNAVAILABLE)
163    }
164}
165
166// SAFETY: the stub Communicator carries no data; all methods panic.
167unsafe impl Send for Communicator {}
168// SAFETY: the stub Communicator carries no data; all methods panic.
169unsafe impl Sync for Communicator {}