monarch_distributed_telemetry/
pyspy_table.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! DataFusion table schemas for py-spy stack trace data.
10//!
11//! Four normalized tables matching the structures in `hyperactor_mesh::pyspy`:
12//! - `pyspy_dumps`: one row per dump (top-level `PySpyResult::Ok` metadata)
13//! - `pyspy_stack_traces`: one row per thread (matches `PySpyStackTrace`)
14//! - `pyspy_frames`: one row per frame (matches `PySpyFrame`)
15//! - `pyspy_local_variables`: one row per local variable (matches `PySpyLocalVariable`)
16
17use monarch_record_batch::RecordBatchRow;
18
19/// Row data for the pyspy_dumps table.
20#[derive(RecordBatchRow)]
21pub struct PySpyDump {
22    /// Caller-provided identifier. Uniqueness and semantics are the caller's
23    /// responsibility (typically a UUID).
24    pub dump_id: String,
25    /// Ingestion timestamp, not the py-spy capture time. We record when the
26    /// result was stored rather than when the snapshot was taken because the
27    /// py-spy JSON does not carry a capture timestamp.
28    pub timestamp_us: i64,
29    pub pid: i32,
30    pub binary: String,
31    pub proc_ref: String,
32}
33
34/// Row data for the pyspy_stack_traces table.
35/// Matches `hyperactor_mesh::pyspy::PySpyStackTrace`.
36#[derive(RecordBatchRow)]
37pub struct PySpyStackTrace {
38    pub dump_id: String,
39    pub pid: i32,
40    pub thread_id: u64,
41    pub thread_name: Option<String>,
42    pub os_thread_id: Option<u64>,
43    pub active: bool,
44    pub owns_gil: bool,
45}
46
47/// Row data for the pyspy_frames table.
48/// Matches `hyperactor_mesh::pyspy::PySpyFrame`.
49#[derive(RecordBatchRow)]
50pub struct PySpyFrame {
51    pub dump_id: String,
52    pub thread_id: u64,
53    pub frame_depth: i32,
54    pub name: String,
55    pub filename: String,
56    pub module: Option<String>,
57    pub short_filename: Option<String>,
58    pub line: i32,
59    pub is_entry: bool,
60}
61
62/// Row data for the pyspy_local_variables table.
63/// Matches `hyperactor_mesh::pyspy::PySpyLocalVariable`.
64#[derive(RecordBatchRow)]
65pub struct PySpyLocalVariable {
66    pub dump_id: String,
67    pub thread_id: u64,
68    pub frame_depth: i32,
69    pub name: String,
70    pub addr: u64,
71    pub arg: bool,
72    pub repr: Option<String>,
73}