monarch_rdma/
local_memory.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Local memory abstractions for RDMA operations.
10//!
11//! [`KeepaliveLocalMemory`] wraps a raw pointer with a [`Keepalive`]
12//! guard and dispatches reads/writes to CPU or CUDA paths.
13
14use std::fmt::Debug;
15use std::sync::Arc;
16use std::sync::Condvar;
17use std::sync::Mutex;
18use std::sync::OnceLock;
19
20use crate::backend::ibverbs::memory_region::IbvMemoryRegionView;
21
22/// Returns `true` when `addr` is a CUDA device pointer.
23///
24/// Probes the CUDA driver via `cuPointerGetAttribute`; returns `false`
25/// when CUDA is unavailable or the pointer is not device memory.
26pub fn is_device_ptr(addr: usize) -> bool {
27    // SAFETY: FFI call that queries pointer metadata without accessing
28    // the pointed-to memory.
29    unsafe {
30        let mut mem_type: u32 = 0;
31        let err = rdmaxcel_sys::rdmaxcel_cuPointerGetAttribute(
32            &mut mem_type as *mut _ as *mut std::ffi::c_void,
33            rdmaxcel_sys::CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
34            addr as rdmaxcel_sys::CUdeviceptr,
35        );
36        err == rdmaxcel_sys::CUDA_SUCCESS && mem_type == rdmaxcel_sys::CU_MEMORYTYPE_DEVICE
37    }
38}
39
40/// RAII guard that restores the previous CUDA context on drop and, if a
41/// primary context was retained, releases it.
42pub(crate) struct CudaCtxGuard {
43    prev: rdmaxcel_sys::CUcontext,
44    /// Set when the fallback path called `cuDevicePrimaryCtxRetain`.
45    retained_device: Option<rdmaxcel_sys::CUdevice>,
46}
47
48impl Drop for CudaCtxGuard {
49    fn drop(&mut self) {
50        unsafe {
51            rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(self.prev);
52            if let Some(device) = self.retained_device {
53                rdmaxcel_sys::rdmaxcel_cuDevicePrimaryCtxRelease(device);
54            }
55        }
56    }
57}
58
59/// Make the CUDA context that owns `addr` current on the calling
60/// thread, returning a guard that restores the previous context on
61/// drop.
62///
63/// First tries `CU_POINTER_ATTRIBUTE_CONTEXT` to get the exact context
64/// the allocation belongs to.  When that returns null (runtime-API or
65/// memory-pool allocations such as PyTorch's caching allocator), falls
66/// back to the device's primary context via
67/// `CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL` + `cuDevicePrimaryCtxRetain`.
68///
69/// # Safety
70///
71/// `addr` must be a valid CUDA device pointer.
72pub(crate) unsafe fn set_ctx_for_ptr(addr: usize) -> Result<CudaCtxGuard, anyhow::Error> {
73    let mut prev: rdmaxcel_sys::CUcontext = std::ptr::null_mut();
74    unsafe {
75        rdmaxcel_sys::rdmaxcel_cuCtxGetCurrent(&mut prev);
76    }
77
78    let mut ctx: rdmaxcel_sys::CUcontext = std::ptr::null_mut();
79    let rc = unsafe {
80        rdmaxcel_sys::rdmaxcel_cuPointerGetAttribute(
81            &mut ctx as *mut _ as *mut std::ffi::c_void,
82            rdmaxcel_sys::CU_POINTER_ATTRIBUTE_CONTEXT,
83            addr as rdmaxcel_sys::CUdeviceptr,
84        )
85    };
86
87    // Null context: allocation came from the runtime API or a memory
88    // pool.  Fall back to the owning device's primary context.
89    let mut retained_device = None;
90    if rc != rdmaxcel_sys::CUDA_SUCCESS || ctx.is_null() {
91        let mut ordinal: i32 = -1;
92        let rc = unsafe {
93            rdmaxcel_sys::rdmaxcel_cuPointerGetAttribute(
94                &mut ordinal as *mut _ as *mut std::ffi::c_void,
95                rdmaxcel_sys::CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
96                addr as rdmaxcel_sys::CUdeviceptr,
97            )
98        };
99        anyhow::ensure!(
100            rc == rdmaxcel_sys::CUDA_SUCCESS,
101            "cuPointerGetAttribute(DEVICE_ORDINAL) failed with error code {rc}"
102        );
103
104        let mut device: rdmaxcel_sys::CUdevice = 0;
105        let rc = unsafe { rdmaxcel_sys::rdmaxcel_cuDeviceGet(&mut device, ordinal) };
106        anyhow::ensure!(
107            rc == rdmaxcel_sys::CUDA_SUCCESS,
108            "cuDeviceGet({ordinal}) failed with error code {rc}"
109        );
110
111        let rc = unsafe { rdmaxcel_sys::rdmaxcel_cuDevicePrimaryCtxRetain(&mut ctx, device) };
112        anyhow::ensure!(
113            rc == rdmaxcel_sys::CUDA_SUCCESS,
114            "cuDevicePrimaryCtxRetain failed with error code {rc}"
115        );
116        retained_device = Some(device);
117    }
118
119    let rc = unsafe { rdmaxcel_sys::rdmaxcel_cuCtxSetCurrent(ctx) };
120    anyhow::ensure!(
121        rc == rdmaxcel_sys::CUDA_SUCCESS,
122        "cuCtxSetCurrent failed with error code {rc}"
123    );
124
125    Ok(CudaCtxGuard {
126        prev,
127        retained_device,
128    })
129}
130
131/// Verify that an access at `offset` with `len` bytes fits within `size`.
132fn check_bounds(offset: usize, len: usize, size: usize) -> Result<(), anyhow::Error> {
133    anyhow::ensure!(
134        offset.checked_add(len).is_some_and(|end| end <= size),
135        "access at offset {offset} with length {len} exceeds region size {size}"
136    );
137    Ok(())
138}
139
140/// Copy `dst.len()` bytes from host memory at `addr + offset` into `dst`.
141///
142/// # Safety
143///
144/// The caller must ensure that `addr` points to a valid host allocation of
145/// at least `offset + dst.len()` bytes.
146unsafe fn read_cpu(addr: usize, offset: usize, dst: &mut [u8]) {
147    unsafe {
148        std::ptr::copy_nonoverlapping((addr + offset) as *const u8, dst.as_mut_ptr(), dst.len());
149    }
150}
151
152/// Copy `src.len()` bytes from `src` into host memory at `addr + offset`.
153///
154/// # Safety
155///
156/// The caller must ensure that `addr` points to a valid host allocation of
157/// at least `offset + src.len()` bytes.
158unsafe fn write_cpu(addr: usize, offset: usize, src: &[u8]) {
159    unsafe {
160        std::ptr::copy_nonoverlapping(src.as_ptr(), (addr + offset) as *mut u8, src.len());
161    }
162}
163
164/// Copy `dst.len()` bytes from device memory at `addr + offset` into `dst`.
165///
166/// # Safety
167///
168/// The caller must ensure that `addr` is a valid CUDA device pointer to an
169/// allocation of at least `offset + dst.len()` bytes.
170unsafe fn read_gpu(addr: usize, offset: usize, dst: &mut [u8]) -> Result<(), anyhow::Error> {
171    let _guard = unsafe { set_ctx_for_ptr(addr)? };
172    let rc = unsafe {
173        rdmaxcel_sys::rdmaxcel_cuMemcpyDtoH_v2(
174            dst.as_mut_ptr() as *mut std::ffi::c_void,
175            (addr + offset) as rdmaxcel_sys::CUdeviceptr,
176            dst.len(),
177        )
178    };
179    anyhow::ensure!(
180        rc == rdmaxcel_sys::CUDA_SUCCESS,
181        "cuMemcpyDtoH failed with error code {rc}"
182    );
183    Ok(())
184}
185
186/// Copy `src.len()` bytes from `src` into device memory at `addr + offset`.
187///
188/// # Safety
189///
190/// The caller must ensure that `addr` is a valid CUDA device pointer to an
191/// allocation of at least `offset + src.len()` bytes.
192unsafe fn write_gpu(addr: usize, offset: usize, src: &[u8]) -> Result<(), anyhow::Error> {
193    let _guard = unsafe { set_ctx_for_ptr(addr)? };
194    let rc = unsafe {
195        rdmaxcel_sys::rdmaxcel_cuMemcpyHtoD_v2(
196            (addr + offset) as rdmaxcel_sys::CUdeviceptr,
197            src.as_ptr() as *const std::ffi::c_void,
198            src.len(),
199        )
200    };
201    anyhow::ensure!(
202        rc == rdmaxcel_sys::CUDA_SUCCESS,
203        "cuMemcpyHtoD failed with error code {rc}"
204    );
205    Ok(())
206}
207
208/// Three-mode access lock used by [`KeepaliveLocalMemory`] to coordinate
209/// concurrent reads, exclusive writes, and parallel "disjoint" writes
210/// (writers that the caller has promised target disjoint ranges).
211///
212/// - [`AccessLock::read`] returns when no exclusive writer and no
213///   disjoint writer is active. Multiple readers are permitted to hold
214///   the lock at the same time.
215/// - [`AccessLock::disjoint_write`] returns when no reader and no
216///   exclusive writer is active. Multiple disjoint writers are
217///   permitted to hold the lock at the same time.
218/// - [`AccessLock::exclusive`] returns only when no one else holds the
219///   lock.
220///
221/// Read mode and disjoint-write mode are mutually exclusive, which is
222/// what gives readers a torn-free view of memory in the presence of
223/// disjoint parallel writers.
224#[derive(Debug, Default)]
225struct AccessLock {
226    state: Mutex<AccessState>,
227    cond: Condvar,
228}
229
230#[derive(Debug, Default)]
231enum AccessState {
232    #[default]
233    Idle,
234    Reading(usize),
235    DisjointWriting(usize),
236    Exclusive,
237}
238
239impl AccessLock {
240    fn new() -> Self {
241        Self::default()
242    }
243
244    fn read(&self) -> AccessReadGuard<'_> {
245        let mut state = self.state.lock().expect("AccessLock poisoned");
246        loop {
247            match &mut *state {
248                AccessState::Idle => {
249                    *state = AccessState::Reading(1);
250                    return AccessReadGuard(self);
251                }
252                AccessState::Reading(n) => {
253                    *n += 1;
254                    return AccessReadGuard(self);
255                }
256                AccessState::DisjointWriting(_) | AccessState::Exclusive => {
257                    state = self.cond.wait(state).expect("AccessLock poisoned");
258                }
259            }
260        }
261    }
262
263    fn disjoint_write(&self) -> AccessDisjointWriteGuard<'_> {
264        let mut state = self.state.lock().expect("AccessLock poisoned");
265        loop {
266            match &mut *state {
267                AccessState::Idle => {
268                    *state = AccessState::DisjointWriting(1);
269                    return AccessDisjointWriteGuard(self);
270                }
271                AccessState::DisjointWriting(n) => {
272                    *n += 1;
273                    return AccessDisjointWriteGuard(self);
274                }
275                AccessState::Reading(_) | AccessState::Exclusive => {
276                    state = self.cond.wait(state).expect("AccessLock poisoned");
277                }
278            }
279        }
280    }
281
282    fn exclusive(&self) -> AccessExclusiveGuard<'_> {
283        let mut state = self.state.lock().expect("AccessLock poisoned");
284        loop {
285            if matches!(*state, AccessState::Idle) {
286                *state = AccessState::Exclusive;
287                return AccessExclusiveGuard(self);
288            }
289            state = self.cond.wait(state).expect("AccessLock poisoned");
290        }
291    }
292}
293
294struct AccessReadGuard<'a>(&'a AccessLock);
295impl Drop for AccessReadGuard<'_> {
296    fn drop(&mut self) {
297        let mut state = self.0.state.lock().expect("AccessLock poisoned");
298        match &mut *state {
299            AccessState::Reading(1) => {
300                *state = AccessState::Idle;
301                self.0.cond.notify_all();
302            }
303            AccessState::Reading(n) => *n -= 1,
304            other => unreachable!("AccessReadGuard dropped in non-Reading state: {other:?}"),
305        }
306    }
307}
308
309struct AccessDisjointWriteGuard<'a>(&'a AccessLock);
310impl Drop for AccessDisjointWriteGuard<'_> {
311    fn drop(&mut self) {
312        let mut state = self.0.state.lock().expect("AccessLock poisoned");
313        match &mut *state {
314            AccessState::DisjointWriting(1) => {
315                *state = AccessState::Idle;
316                self.0.cond.notify_all();
317            }
318            AccessState::DisjointWriting(n) => *n -= 1,
319            other => unreachable!(
320                "AccessDisjointWriteGuard dropped in non-DisjointWriting state: {other:?}"
321            ),
322        }
323    }
324}
325
326struct AccessExclusiveGuard<'a>(&'a AccessLock);
327impl Drop for AccessExclusiveGuard<'_> {
328    fn drop(&mut self) {
329        let mut state = self.0.state.lock().expect("AccessLock poisoned");
330        debug_assert!(matches!(*state, AccessState::Exclusive));
331        *state = AccessState::Idle;
332        self.0.cond.notify_all();
333    }
334}
335
336/// Trait for values that keep a backing memory allocation alive and
337/// know its address and size.
338///
339/// As long as a value implementing this trait exists, the memory region
340/// it describes is guaranteed to remain valid.
341pub trait Keepalive: Send + Sync {
342    /// Start address of the memory region this keepalive pins.
343    fn addr(&self) -> usize;
344
345    /// Size in bytes of the memory region this keepalive pins.
346    fn size(&self) -> usize;
347
348    /// Produce a [`WeakKeepalive`] pointing at the same underlying
349    /// resource. Defaults to `None` for impls with no weak form.
350    fn downgrade(&self) -> Option<Arc<dyn WeakKeepalive>> {
351        None
352    }
353}
354
355/// Counterpart to [`Keepalive`]: a non-pinning reference to the same
356/// underlying resource that can be re-promoted to a [`Keepalive`] as
357/// long as the resource is still alive.
358pub trait WeakKeepalive: Send + Sync {
359    /// Re-acquire a strong [`Keepalive`] for the underlying resource,
360    /// or `None` if the referent has gone away.
361    fn upgrade(&self) -> Option<Arc<dyn Keepalive>>;
362}
363
364impl Keepalive for Box<[u8]> {
365    fn addr(&self) -> usize {
366        self.as_ptr() as usize
367    }
368
369    fn size(&self) -> usize {
370        self.len()
371    }
372}
373
374/// Backing state of a [`KeepaliveLocalMemory`].
375///
376/// Holds the addressing/bandwidth metadata, the access-coordination
377/// lock, and a single-slot home for an [`IbvMemoryRegionView`]
378/// registered against this region. Cloning shares the slot and the
379/// access lock by `Arc`, so every handle derived from the same
380/// allocation observes the same registered MR and the same
381/// reader/writer coordination.
382///
383/// All access goes through methods on [`KeepaliveLocalMemory`];
384/// nothing outside the module pokes at these fields directly.
385#[derive(Clone)]
386pub(crate) struct LocalMemoryInner {
387    addr: usize,
388    size: usize,
389    /// Bandwidth (bytes/s) for direct host-thread pointer access, or `None`
390    /// if the memory is not host-accessible.
391    direct_access_host_bandwidth: Option<u64>,
392    /// Bandwidth (bytes/s) for direct device-thread pointer access, or
393    /// `None` if the memory is not device-accessible.
394    direct_access_device_bandwidth: Option<u64>,
395    /// Per-allocation slot for the [`IbvMemoryRegionView`] registered
396    /// against this region. Populated lazily by
397    /// `IbvManagerActor::resolve_local_mr` on first use.
398    mr_slot: Arc<OnceLock<IbvMemoryRegionView>>,
399    /// Coordinates concurrent reads, exclusive writes, and parallel
400    /// disjoint writes against this region.
401    access: Arc<AccessLock>,
402}
403
404impl LocalMemoryInner {
405    fn new(addr: usize, size: usize) -> Self {
406        // TODO(slurye): Using placeholder values for now. Fill in with real values.
407        let (host_bw, device_bw) = if is_device_ptr(addr) {
408            (None, Some(1))
409        } else {
410            (Some(1), None)
411        };
412        Self {
413            addr,
414            size,
415            direct_access_host_bandwidth: host_bw,
416            direct_access_device_bandwidth: device_bw,
417            mr_slot: Arc::new(OnceLock::new()),
418            access: Arc::new(AccessLock::new()),
419        }
420    }
421}
422
423/// Local memory handle that keeps its backing allocation alive via an
424/// [`Arc<dyn Keepalive>`].
425///
426/// Detects at construction time whether the address is a CUDA device
427/// pointer and dispatches `read_at`/`write_at` accordingly.
428///
429/// All three access methods are `unsafe`: the [`Keepalive`] only
430/// guarantees the allocation stays mapped, not that this handle has
431/// unique ownership. The internal [`AccessLock`] coordinates concurrent
432/// callers that share the same clone of this handle (readers run in
433/// parallel, exclusive writers run alone, disjoint writers run in
434/// parallel with one another but exclude readers and exclusive
435/// writers), but callers must additionally rule out concurrent access
436/// through other views of the same allocation.
437///
438/// The `direct_access_host_bandwidth` and `direct_access_device_bandwidth`
439/// fields indicate the speed of reading the memory via pointer dereference
440/// on a host or device thread, respectively. A value of `None` means the
441/// memory is not directly accessible from that context.
442#[derive(Clone)]
443pub struct KeepaliveLocalMemory {
444    inner: LocalMemoryInner,
445    _keepalive: Arc<dyn Keepalive>,
446}
447
448impl Debug for KeepaliveLocalMemory {
449    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
450        f.debug_struct("KeepaliveLocalMemory")
451            .field("addr", &self.inner.addr)
452            .field("size", &self.inner.size)
453            .field(
454                "direct_access_host_bandwidth",
455                &self.inner.direct_access_host_bandwidth,
456            )
457            .field(
458                "direct_access_device_bandwidth",
459                &self.inner.direct_access_device_bandwidth,
460            )
461            .finish_non_exhaustive()
462    }
463}
464
465impl KeepaliveLocalMemory {
466    /// Create a new handle. Derives `addr` and `size` from the
467    /// `keepalive` via [`Keepalive::addr`] /
468    /// [`Keepalive::size`], then probes the CUDA driver to
469    /// determine whether the address is a device pointer and sets the
470    /// bandwidth fields accordingly.
471    pub fn new(keepalive: Arc<dyn Keepalive>) -> Self {
472        let addr = keepalive.addr();
473        let size = keepalive.size();
474        Self {
475            inner: LocalMemoryInner::new(addr, size),
476            _keepalive: keepalive,
477        }
478    }
479
480    /// Starting virtual address of the memory region.
481    pub fn addr(&self) -> usize {
482        self.inner.addr
483    }
484
485    /// Size of the memory region in bytes.
486    pub fn size(&self) -> usize {
487        self.inner.size
488    }
489
490    /// Shared slot for the [`IbvMemoryRegionView`] registered against
491    /// this region. Populated lazily by
492    /// [`IbvManagerActor::resolve_local_mr`] on first use; the slot
493    /// is cloned `Arc` so every handle derived from the same
494    /// allocation sees the same registered MR.
495    pub fn mr_slot(&self) -> &Arc<OnceLock<IbvMemoryRegionView>> {
496        &self.inner.mr_slot
497    }
498
499    /// Copy `dst.len()` bytes from this memory region starting at `offset`
500    /// into `dst`.
501    ///
502    /// Mutually exclusive with both `write_at` and `write_at_disjoint`
503    /// *across clones of this handle*: the [`AccessLock`] guarantees a
504    /// reader and any writer (exclusive or disjoint) that share the
505    /// same lock never observe each other's partial state. Multiple
506    /// concurrent `read_at` calls on shared clones are permitted and
507    /// run in parallel.
508    ///
509    /// # Safety
510    ///
511    /// The [`Keepalive`] guarantees the allocation stays mapped, but it
512    /// does *not* imply unique ownership: another component may hold its
513    /// own view of the same allocation and read or write it concurrently
514    /// outside this handle's [`AccessLock`]. The caller must ensure that
515    /// no such external access produces a torn read of
516    /// `offset..offset + dst.len()` for the duration of this call.
517    pub unsafe fn read_at(&self, offset: usize, dst: &mut [u8]) -> Result<(), anyhow::Error> {
518        let _guard = self.inner.access.read();
519        check_bounds(offset, dst.len(), self.inner.size)?;
520        // SAFETY: the `_keepalive` field keeps the allocation live, the
521        // read guard above excludes concurrent exclusive and disjoint
522        // writers that share this lock, `check_bounds` verified the access
523        // is in range, and the caller upholds the no-external-writer
524        // obligation documented on this method.
525        unsafe {
526            if self.inner.direct_access_host_bandwidth.is_some() {
527                read_cpu(self.inner.addr, offset, dst);
528                Ok(())
529            } else {
530                read_gpu(self.inner.addr, offset, dst)
531            }
532        }
533    }
534
535    /// Copy `src.len()` bytes from `src` into this memory region starting
536    /// at `offset`.
537    ///
538    /// Mutually exclusive with every other read and write against this
539    /// region *across clones of this handle*: the [`AccessLock`] blocks
540    /// concurrent readers and writers that share the same lock. Use
541    /// [`KeepaliveLocalMemory::write_at_disjoint`] when multiple writers
542    /// can be proven to target disjoint byte ranges.
543    ///
544    /// # Safety
545    ///
546    /// See [`KeepaliveLocalMemory::read_at`]. The [`Keepalive`] guarantee
547    /// covers liveness only; the caller must ensure no concurrent
548    /// external reader or writer observes an overlapping byte range.
549    pub unsafe fn write_at(&self, offset: usize, src: &[u8]) -> Result<(), anyhow::Error> {
550        let _guard = self.inner.access.exclusive();
551        check_bounds(offset, src.len(), self.inner.size)?;
552        // SAFETY: the `_keepalive` field keeps the allocation live, the
553        // exclusive guard above excludes every other reader and writer
554        // that shares this lock, `check_bounds` verified the access is
555        // in range, and the caller upholds the no-external-access
556        // obligation documented on this method.
557        unsafe {
558            if self.inner.direct_access_host_bandwidth.is_some() {
559                write_cpu(self.inner.addr, offset, src);
560                Ok(())
561            } else {
562                write_gpu(self.inner.addr, offset, src)
563            }
564        }
565    }
566
567    /// Like [`KeepaliveLocalMemory::write_at`], but allows other
568    /// concurrent `write_at_disjoint` calls (across clones of this
569    /// handle) to proceed in parallel. Still mutually exclusive with
570    /// `read_at` and `write_at` through the [`AccessLock`].
571    ///
572    /// # Safety
573    ///
574    /// In addition to the obligations of
575    /// [`KeepaliveLocalMemory::write_at`] (no external concurrent
576    /// reader or writer of the same byte range), the caller must
577    /// ensure that no other concurrent call to this method targets a
578    /// byte range that overlaps `offset..offset + src.len()`. Disjoint
579    /// byte ranges across concurrent disjoint callers are sound.
580    pub unsafe fn write_at_disjoint(&self, offset: usize, src: &[u8]) -> Result<(), anyhow::Error> {
581        let _guard = self.inner.access.disjoint_write();
582        check_bounds(offset, src.len(), self.inner.size)?;
583        // SAFETY: the `_keepalive` field keeps the allocation live, the
584        // disjoint-write guard above excludes concurrent readers and
585        // exclusive writers that share this lock, `check_bounds`
586        // verified the access is in range, and the caller upholds both
587        // safety obligations documented on this method (no external access,
588        // no overlap with other concurrent disjoint writers).
589        unsafe {
590            if self.inner.direct_access_host_bandwidth.is_some() {
591                write_cpu(self.inner.addr, offset, src);
592                Ok(())
593            } else {
594                write_gpu(self.inner.addr, offset, src)
595            }
596        }
597    }
598
599    /// Pair off a [`WeakLocalMemory`] that shares this handle's
600    /// [`LocalMemoryInner`] (and therefore the same MR slot and
601    /// access lock). Returns `None` when the underlying [`Keepalive`]
602    /// does not provide a weak form.
603    pub fn downgrade(&self) -> Option<WeakLocalMemory> {
604        let weak_keepalive = self._keepalive.downgrade()?;
605        Some(WeakLocalMemory {
606            inner: self.inner.clone(),
607            weak_keepalive,
608        })
609    }
610}
611
612/// Non-pinning counterpart of [`KeepaliveLocalMemory`].
613///
614/// Holds the shared [`LocalMemoryInner`] (so a re-promoted strong
615/// handle sees the same MR slot and access lock) plus a
616/// [`WeakKeepalive`] that can be upgraded to a fresh
617/// [`Arc<dyn Keepalive>`] as long as the referent is still alive.
618#[derive(Clone)]
619pub struct WeakLocalMemory {
620    inner: LocalMemoryInner,
621    weak_keepalive: Arc<dyn WeakKeepalive>,
622}
623
624impl WeakLocalMemory {
625    /// Starting virtual address of the memory region.
626    pub fn addr(&self) -> usize {
627        self.inner.addr
628    }
629
630    /// Size of the memory region in bytes.
631    pub fn size(&self) -> usize {
632        self.inner.size
633    }
634
635    /// Materialize a strong [`KeepaliveLocalMemory`] sharing this
636    /// handle's [`LocalMemoryInner`]. Returns `None` if the
637    /// referent has gone away **or** if its currently-computed
638    /// `(addr, size)` no longer matches the values stored on this
639    /// handle — the latter guarding against the live referent
640    /// describing a different memory region than the one this weak
641    /// handle was paired with at downgrade time.
642    pub fn upgrade(&self) -> Option<KeepaliveLocalMemory> {
643        let keepalive = self.weak_keepalive.upgrade()?;
644        let new_addr = keepalive.addr();
645        let new_size = keepalive.size();
646        if new_addr != self.inner.addr || new_size != self.inner.size {
647            tracing::warn!(
648                expected_addr = self.inner.addr,
649                actual_addr = new_addr,
650                expected_size = self.inner.size,
651                actual_size = new_size,
652                "WeakLocalMemory upgrade rejected: backing keepalive's (addr, size) changed since downgrade",
653            );
654            return None;
655        }
656        Some(KeepaliveLocalMemory {
657            inner: self.inner.clone(),
658            _keepalive: keepalive,
659        })
660    }
661}
662
663#[cfg(test)]
664mod tests {
665    use super::*;
666
667    // -- KeepaliveLocalMemory (host) --
668
669    fn host_keepalive_mem(data: Box<[u8]>) -> KeepaliveLocalMemory {
670        KeepaliveLocalMemory::new(Arc::new(data))
671    }
672
673    #[test]
674    fn keepalive_host_read_at() {
675        let mem = host_keepalive_mem(Box::from([1, 2, 3, 4, 5]));
676        let mut buf = [0u8; 3];
677        // SAFETY: `mem` is the sole handle to the allocation, no other
678        // thread or component holds a view of it.
679        unsafe { mem.read_at(1, &mut buf) }.unwrap();
680        assert_eq!(buf, [2, 3, 4]);
681    }
682
683    #[test]
684    fn keepalive_host_write_then_read() {
685        let mem = host_keepalive_mem(vec![0; 5].into_boxed_slice());
686        // SAFETY: `mem` is the sole handle to the allocation, no other
687        // thread or component holds a view of it.
688        unsafe { mem.write_at(1, &[7, 8, 9]) }.unwrap();
689        let mut buf = [0u8; 5];
690        // SAFETY: same as above.
691        unsafe { mem.read_at(0, &mut buf) }.unwrap();
692        assert_eq!(buf, [0, 7, 8, 9, 0]);
693    }
694
695    #[test]
696    fn keepalive_host_out_of_bounds() {
697        let mem = host_keepalive_mem(vec![0; 3].into_boxed_slice());
698        let mut buf = [0u8; 3];
699        // SAFETY: `mem` is the sole handle to the allocation; the
700        // bounds check fires before any pointer dereference.
701        assert!(unsafe { mem.read_at(1, &mut buf) }.is_err());
702        // SAFETY: same as above.
703        assert!(unsafe { mem.write_at(1, &[7, 8, 9]) }.is_err());
704    }
705}
monarch_rdma/local_memory.rs

monarch_rdma/
local_memory.rs