monarch_hyperactor/
runtime.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9use std::cell::OnceCell as UnsyncOnceCell;
10use std::future::Future;
11use std::pin::Pin;
12use std::sync::Mutex;
13use std::sync::OnceLock;
14use std::sync::atomic::AtomicUsize;
15use std::sync::atomic::Ordering;
16use std::time::Duration;
17
18use anyhow::Result;
19use hyperactor::runtime_identity::RuntimeKind;
20use hyperactor::runtime_identity::shutdown_data_plane_runtimes;
21use hyperactor::runtime_identity::tag_current_thread;
22pub use monarch_gil::GilSite;
23pub use monarch_gil::force_unsanctioned_gil_on_control_plane;
24pub use monarch_gil::get_gil_on_control_plane;
25pub use monarch_gil::monarch_with_gil;
26pub use monarch_gil::monarch_with_gil_blocking;
27pub use monarch_gil::reset_gil_on_control_plane;
28use pyo3::PyResult;
29use pyo3::Python;
30use pyo3::exceptions::PyRuntimeError;
31use pyo3::prelude::*;
32use pyo3::types::PyAnyMethods;
33use pyo3_async_runtimes::TaskLocals;
34use tokio::runtime::Handle;
35use tokio::task;
36
37use crate::config::TOKIO_WORKER_THREADS;
38
39/// Global tokio runtime container.
40///
41/// `handle` is cheap to clone and is what callers receive from
42/// `get_tokio_runtime()`. Holding a `Handle` does not lock anything, so
43/// concurrent block_on calls from different threads do not contend.
44///
45/// `runtime` exists only so the atexit handler can take ownership and
46/// call `shutdown_timeout`. Under normal operation nothing locks it; the
47/// mutex is uncontended at shutdown.
48struct GlobalRuntime {
49    handle: Handle,
50    runtime: Mutex<Option<tokio::runtime::Runtime>>,
51}
52
53static INSTANCE: OnceLock<GlobalRuntime> = OnceLock::new();
54
55fn global_runtime() -> &'static GlobalRuntime {
56    INSTANCE.get_or_init(|| {
57        let worker_threads = hyperactor_config::global::get(TOKIO_WORKER_THREADS);
58        let mut builder = tokio::runtime::Builder::new_multi_thread();
59        if let Some(worker_threads) = worker_threads {
60            builder.worker_threads(worker_threads.get());
61        }
62        let runtime = builder
63            .thread_name_fn(|| {
64                static ATOMIC_ID: AtomicUsize = AtomicUsize::new(0);
65                let id = ATOMIC_ID.fetch_add(1, Ordering::SeqCst);
66                format!("monarch-pytokio-worker-{}", id)
67            })
68            // The shared control-plane runtime: stamp its workers (and
69            // blocking-pool threads) so GIL-entry sites can tell they are on the
70            // control plane. See `hyperactor::runtime_identity`.
71            .on_thread_start(|| tag_current_thread(RuntimeKind::ControlPlane))
72            .enable_all()
73            .build()
74            .unwrap();
75        let handle = runtime.handle().clone();
76        GlobalRuntime {
77            handle,
78            runtime: Mutex::new(Some(runtime)),
79        }
80    })
81}
82
83pub fn get_tokio_runtime() -> Handle {
84    global_runtime().handle.clone()
85}
86
87/// atexit handler that tears down the data-plane runtimes and the global Tokio runtime.
88///
89/// Callers obtain a cloned `Handle` from `get_tokio_runtime()` rather
90/// than a guard, so the `runtime` mutex is uncontended at shutdown. We
91/// can take ownership of the `Runtime` and call `shutdown_timeout`
92/// directly. If a worker thread is still inside `Handle::block_on` on a
93/// future that never resolves (e.g. a non-main thread that cannot
94/// observe SIGINT), `shutdown_timeout` aborts spawned tasks and returns
95/// after at most one second; the stuck worker is then a daemon thread
96/// that CPython kills on interpreter exit.
97#[pyfunction]
98pub fn shutdown_tokio_runtime(py: Python<'_>) {
99    // Called from Python's atexit, which holds the GIL. Release it so tokio
100    // worker threads can acquire it to complete their Python work.
101    py.detach(|| {
102        // Tear down the data-plane runtimes (e.g. rdma) first, while the
103        // control-plane runtime is still intact, so their GIL-taking workers
104        // stop before Py_Finalize.
105        shutdown_data_plane_runtimes(Duration::from_secs(1));
106        let Some(global) = INSTANCE.get() else {
107            return;
108        };
109        let Some(rt) = global.runtime.lock().unwrap().take() else {
110            return;
111        };
112        rt.shutdown_timeout(Duration::from_secs(1));
113    });
114}
115
116/// Stores the native thread ID of the main Python thread.
117/// This is lazily initialized on first call to `is_main_thread`.
118static MAIN_THREAD_NATIVE_ID: OnceLock<i64> = OnceLock::new();
119
120/// Returns the native thread ID of the main Python thread.
121/// On first call, looks it up via `threading.main_thread().native_id`.
122fn get_main_thread_native_id() -> i64 {
123    *MAIN_THREAD_NATIVE_ID.get_or_init(|| {
124        monarch_with_gil_blocking(GilSite::Bootstrap, |py| {
125            let threading = py.import("threading").expect("failed to import threading");
126            let main_thread = threading
127                .call_method0("main_thread")
128                .expect("failed to get main_thread");
129            main_thread
130                .getattr("native_id")
131                .expect("failed to get native_id")
132                .extract::<i64>()
133                .expect("native_id is not an i64")
134        })
135    })
136}
137
138/// Returns the current thread's native ID in a cross-platform way.
139#[cfg(target_os = "linux")]
140fn get_current_thread_id() -> i64 {
141    nix::unistd::gettid().as_raw() as i64
142}
143
144/// Returns the current thread's native ID in a cross-platform way.
145#[cfg(target_os = "macos")]
146fn get_current_thread_id() -> i64 {
147    let mut tid: u64 = 0;
148    // pthread_threadid_np with thread=0 (null pthread_t) gets the current thread's ID.
149    unsafe {
150        let ret = libc::pthread_threadid_np(0, &mut tid);
151        debug_assert_eq!(
152            ret, 0,
153            "pthread_threadid_np failed with error code: {}",
154            ret
155        );
156    }
157    // macOS thread IDs are u64 so we need to convert to i64.
158    debug_assert!(tid <= i64::MAX as u64, "thread ID {} exceeds i64::MAX", tid);
159    tid as i64
160}
161
162/// Returns the current thread's native ID in a cross-platform way.
163#[cfg(not(any(target_os = "linux", target_os = "macos")))]
164compile_error!("get_current_thread_id is only implemented for Linux and macOS");
165
166/// Returns true if the current thread is the main Python thread.
167/// Compares the current thread's native ID against the main Python thread's native ID.
168pub fn is_main_thread() -> bool {
169    let current_tid = get_current_thread_id();
170    current_tid == get_main_thread_native_id()
171}
172
173pub fn initialize(py: Python) -> Result<()> {
174    // Eagerly initialize the main thread ID while we're on the main thread
175    // with the GIL held. If this were lazily initialized on a background
176    // tokio thread during shutdown, the `py.import("threading")` call inside
177    // get_main_thread_native_id() would trigger module_from_spec on a
178    // partially-finalized interpreter, causing a segfault.
179    let _ = get_main_thread_native_id();
180
181    let atexit = py.import("atexit")?;
182    let shutdown_fn = wrap_pyfunction!(shutdown_tokio_runtime, py)?;
183    atexit.call_method1("register", (shutdown_fn,))?;
184    Ok(())
185}
186
187/// Block the current thread on a future, but make sure to check for signals
188/// originating from the Python signal handler.
189///
190/// Python's signal handler just sets a flag that it expects the Python
191/// interpreter to handle later via a call to `PyErr_CheckSignals`. When we
192/// enter into potentially long-running native code, we need to make sure to be
193/// checking for signals frequently, otherwise we will ignore them. This will
194/// manifest as `ctrl-C` not doing anything.
195///
196/// One additional wrinkle is that `PyErr_CheckSignals` only works on the main
197/// Python thread; if it's called on any other thread it silently does nothing.
198/// So, we check if we're on the main thread by comparing native thread IDs.
199pub fn signal_safe_block_on<F>(py: Python, future: F) -> PyResult<F::Output>
200where
201    F: Future + Send + 'static,
202    F::Output: Send + 'static,
203{
204    let runtime = get_tokio_runtime();
205    // Release the GIL, otherwise the work in `future` that tries to acquire the
206    // GIL on another thread may deadlock.
207    py.detach(|| {
208        if is_main_thread() {
209            // Spawn the future onto the tokio runtime
210            let handle = runtime.spawn(future);
211            // Block the current thread on waiting for *either* the future to
212            // complete or a signal.
213            runtime.block_on(async {
214                tokio::select! {
215                    result = handle => result.map_err(|e| PyRuntimeError::new_err(format!("JoinErr: {:?}", e))),
216                    signal = async {
217                        let sleep_for = std::time::Duration::from_millis(100);
218                        loop {
219                            // Acquiring the GIL in a loop is sad, hopefully once
220                            // every 100ms is fine.
221                            monarch_with_gil_blocking(GilSite::AwaitDrive, |py| py.check_signals())?;
222                            tokio::time::sleep(sleep_for).await;
223                        }
224                    } => signal
225                }
226            })
227        } else {
228            // If we're not on the main thread, we can just block it. We've
229            // released the GIL, so the Python main thread will continue on, and
230            // `PyErr_CheckSignals` doesn't do anything anyway.
231            Ok(runtime.block_on(future))
232        }
233    })
234}
235
236/// A test function that sleeps indefinitely in a loop.
237/// This is used for testing signal handling in signal_safe_block_on.
238/// The function will sleep forever until interrupted by a signal.
239#[pyfunction]
240pub fn sleep_indefinitely_for_unit_tests(py: Python) -> PyResult<()> {
241    // Create a future that sleeps indefinitely
242    let future = async {
243        loop {
244            tracing::info!("idef sleeping for 100ms");
245            tokio::time::sleep(Duration::from_millis(100)).await;
246        }
247    };
248
249    // Use signal_safe_block_on to run the future, which should make it
250    // interruptible by signals like SIGINT
251    signal_safe_block_on(py, future)
252}
253
254/// Initialize the runtime module and expose Python functions
255pub fn register_python_bindings(runtime_mod: &Bound<'_, PyModule>) -> PyResult<()> {
256    let sleep_indefinitely_fn =
257        wrap_pyfunction!(sleep_indefinitely_for_unit_tests, runtime_mod.py())?;
258    sleep_indefinitely_fn.setattr(
259        "__module__",
260        "monarch._rust_bindings.monarch_hyperactor.runtime",
261    )?;
262    runtime_mod.add_function(sleep_indefinitely_fn)?;
263
264    let get_gil_on_control_plane_fn = wrap_pyfunction!(get_gil_on_control_plane, runtime_mod.py())?;
265    get_gil_on_control_plane_fn.setattr(
266        "__module__",
267        "monarch._rust_bindings.monarch_hyperactor.runtime",
268    )?;
269    runtime_mod.add_function(get_gil_on_control_plane_fn)?;
270
271    let reset_gil_on_control_plane_fn =
272        wrap_pyfunction!(reset_gil_on_control_plane, runtime_mod.py())?;
273    reset_gil_on_control_plane_fn.setattr(
274        "__module__",
275        "monarch._rust_bindings.monarch_hyperactor.runtime",
276    )?;
277    runtime_mod.add_function(reset_gil_on_control_plane_fn)?;
278
279    let force_unsanctioned_gil_on_control_plane_fn =
280        wrap_pyfunction!(force_unsanctioned_gil_on_control_plane, runtime_mod.py())?;
281    force_unsanctioned_gil_on_control_plane_fn.setattr(
282        "__module__",
283        "monarch._rust_bindings.monarch_hyperactor.runtime",
284    )?;
285    runtime_mod.add_function(force_unsanctioned_gil_on_control_plane_fn)?;
286
287    Ok(())
288}
289
290struct SimpleRuntime;
291
292impl pyo3_async_runtimes::generic::Runtime for SimpleRuntime {
293    type JoinError = task::JoinError;
294    type JoinHandle = task::JoinHandle<()>;
295
296    fn spawn<F>(fut: F) -> Self::JoinHandle
297    where
298        F: Future<Output = ()> + Send + 'static,
299    {
300        get_tokio_runtime().spawn(async move {
301            fut.await;
302        })
303    }
304}
305
306tokio::task_local! {
307    static TASK_LOCALS: UnsyncOnceCell<TaskLocals>;
308}
309
310impl pyo3_async_runtimes::generic::ContextExt for SimpleRuntime {
311    fn scope<F, R>(locals: TaskLocals, fut: F) -> Pin<Box<dyn Future<Output = R> + Send>>
312    where
313        F: Future<Output = R> + Send + 'static,
314    {
315        let cell = UnsyncOnceCell::new();
316        cell.set(locals).unwrap();
317
318        Box::pin(TASK_LOCALS.scope(cell, fut))
319    }
320
321    fn get_task_locals() -> Option<TaskLocals> {
322        TASK_LOCALS
323            .try_with(|c| {
324                c.get().map(|locals| {
325                    monarch_with_gil_blocking(GilSite::TaskLocals, |py| locals.clone_ref(py))
326                })
327            })
328            .unwrap_or_default()
329    }
330}
331
332pub fn future_into_py<F, T>(py: Python, fut: F) -> PyResult<Bound<PyAny>>
333where
334    F: Future<Output = PyResult<T>> + Send + 'static,
335    T: for<'py> IntoPyObject<'py>,
336{
337    pyo3_async_runtimes::generic::future_into_py::<SimpleRuntime, F, T>(py, fut)
338}
339
340#[cfg(test)]
341mod tests {
342    use hyperactor::runtime_identity::RuntimeKind;
343    use hyperactor::runtime_identity::current_runtime_kind;
344
345    use super::*;
346
347    // The shared control-plane runtime stamps its worker threads ControlPlane.
348    #[test]
349    fn global_runtime_workers_are_control_plane() {
350        let kind = get_tokio_runtime().block_on(async {
351            tokio::spawn(async { current_runtime_kind() })
352                .await
353                .unwrap()
354        });
355        assert_eq!(kind, Some(RuntimeKind::ControlPlane));
356    }
357
358    // on_thread_start also reaches the blocking pool, so GIL work on a
359    // spawn_blocking thread is still seen as control-plane.
360    #[test]
361    fn global_runtime_blocking_pool_is_control_plane() {
362        let kind = get_tokio_runtime().block_on(async {
363            tokio::task::spawn_blocking(current_runtime_kind)
364                .await
365                .unwrap()
366        });
367        assert_eq!(kind, Some(RuntimeKind::ControlPlane));
368    }
369}
monarch_hyperactor/runtime.rs

monarch_hyperactor/
runtime.rs