hyperactor/
panic_handler.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Used to capture the backtrace from panic and store it in a task_local, so
10//! that it can be retrieved later when the panic is catched.
11
12use std::backtrace::Backtrace;
13use std::cell::RefCell;
14use std::future::Future;
15use std::ops::Deref;
16use std::panic;
17
18tokio::task_local! {
19    /// A task_local variable to store the backtrace from a panic, so it can be
20    /// retrieved later.
21    static BACKTRACE: RefCell<Option<String>>;
22}
23
24/// Call this from the main method of your application, and use it in conjunction
25/// with [[with_backtrace_tracking]] and [[take_panic_backtrace]], in order to
26/// capture the backtrace from a panic.
27pub fn set_panic_hook() {
28    panic::update_hook(move |prev, info| {
29        // Ignore AccessError, which would happen if panic occurred outside of
30        // BACKTRACE's scope.
31        let backtrace = Backtrace::force_capture();
32        let loc = info.location().map_or_else(
33            || "unavailable".to_owned(),
34            |loc: &panic::Location<'_>| format!("{}:{}:{}", loc.file(), loc.line(), loc.column()),
35        );
36        let _result = BACKTRACE.try_with(|entry| match entry.try_borrow_mut() {
37            Ok(mut entry_ref) => {
38                *entry_ref = Some(format!("panicked at {loc}\n{backtrace}"));
39            }
40            Err(borrow_mut_error) => {
41                eprintln!(
42                    "failed to store backtrace to task_local: {:?}",
43                    borrow_mut_error
44                );
45            }
46        });
47        tracing::error!("stacktrace"=%backtrace, "panic at {loc}");
48
49        // Execute the default hood to preserve the default behavior.
50        prev(info);
51    });
52}
53
54/// Set a task_local variable for this future f, so any panic occurred in f can
55/// be stored and retrieved later.
56pub async fn with_backtrace_tracking<F>(f: F) -> F::Output
57where
58    F: Future,
59{
60    BACKTRACE.scope(RefCell::new(None), f).await
61}
62
63/// Take the backtrace from the task_local variable, and reset the task_local to
64/// None. Return error if the backtrace is not stored, or cannot be retrieved.
65pub fn take_panic_backtrace() -> Result<String, anyhow::Error> {
66    BACKTRACE.try_with(|entry| {
67        entry.try_borrow_mut().map(|mut entry_ref| {
68            let result = match entry_ref.deref() {
69                Some(bt) => Ok(bt.to_string()),
70                None => Err(anyhow::anyhow!("nothing is stored in task_local")),
71            };
72            // Clear the task_local because the backtrace has been retrieve.
73            if result.is_ok() {
74                *entry_ref = None;
75            }
76            result
77        })
78    })??
79}
80
81#[cfg(test)]
82mod tests {
83    use futures::FutureExt;
84
85    use super::*;
86
87    async fn execute_panic() {
88        let result = async {
89            panic!("boom!");
90        }
91        .catch_unwind()
92        .await;
93        assert!(result.is_err());
94    }
95
96    #[tokio::test]
97    async fn test_with_tracking() {
98        set_panic_hook();
99        with_backtrace_tracking(async {
100            execute_panic().await;
101            // Verify backtrace can be taken successfully.
102            assert!(take_panic_backtrace().is_ok());
103            // Cannot take backtrace again because task_local is reset in the
104            // previous take.
105            assert!(take_panic_backtrace().is_err());
106        })
107        .await;
108
109        // Cannot get backtrace because this is out of the set task_local's
110        // scope.
111        assert!(take_panic_backtrace().is_err());
112    }
113
114    #[tokio::test]
115    async fn test_without_tracking() {
116        set_panic_hook();
117        async {
118            execute_panic().await;
119            // Cannot get backtrace because task_local is not set.
120            assert!(take_panic_backtrace().is_err());
121        }
122        .await;
123    }
124
125    #[tokio::test]
126    async fn test_without_init() {
127        // set_panic_hook() was not called.
128        with_backtrace_tracking(async {
129            execute_panic().await;
130            // Cannot get backtrace because the custom panic hook is not set.
131            assert!(take_panic_backtrace().is_err());
132        })
133        .await;
134    }
135
136    #[tokio::test]
137    async fn test_nested_tasks() {
138        async fn verify_inner_panic(backtrace_captured: bool) {
139            let result = async {
140                panic!("wow!");
141            }
142            .catch_unwind()
143            .await;
144            assert!(result.is_err());
145            if backtrace_captured {
146                assert!(
147                    take_panic_backtrace()
148                        .unwrap()
149                        .contains("verify_inner_panic")
150                );
151            } else {
152                assert!(take_panic_backtrace().is_err());
153            }
154        }
155
156        set_panic_hook();
157        with_backtrace_tracking(async {
158            execute_panic().await;
159            // Execute a nested task without tracking, and verify it cannot get backtrace.
160            let result = tokio::task::spawn(async {
161                verify_inner_panic(false).await;
162            })
163            .await;
164            assert!(result.is_ok());
165
166            // Execute a nested task with tracking, and verify it can get its own backtrace.
167            let result =
168                tokio::task::spawn(with_backtrace_tracking(verify_inner_panic(true))).await;
169            assert!(result.is_ok());
170
171            // Verify the outer task can get its own backtrace.
172            assert!(
173                take_panic_backtrace()
174                    .unwrap()
175                    .contains("test_nested_tasks")
176            );
177        })
178        .await;
179    }
180}