hyperactor_config/
global.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Global layered configuration for Hyperactor.
10//!
11//! This module provides the process-wide configuration store and APIs
12//! to access it. Configuration values are resolved via a **layered
13//! model**: `TestOverride → Env → Runtime → File → ClientOverride →
14//! Default`.
15//!
16//! - Reads (`get`, `get_cloned`) consult layers in that order, falling
17//!   back to defaults if no explicit value is set.
18//! - `attrs()` returns a complete snapshot of all CONFIG-marked keys at
19//!   call time: it materializes defaults for keys not set in any layer.
20//!   Keys without @meta(CONFIG = …) are excluded.
21//! - In tests, `lock()` and `override_key` allow temporary overrides
22//!   that are removed automatically when the guard drops.
23//! - In normal operation, a parent process can capture its effective
24//!   config via `attrs()` and pass that snapshot to a child during
25//!   bootstrap. The child installs it as a `ClientOverride` layer.
26//!   Note that Env and Runtime layers will take precedence over this
27//!   inherited configuration.
28//!
29//! This design provides flexibility (easy test overrides, runtime
30//! updates, YAML/Env baselines) while ensuring type safety and
31//! predictable resolution order.
32//!
33//! # Testing
34//!
35//! Tests can override global configuration using [`lock`]. This
36//! ensures such tests are serialized (and cannot clobber each other's
37//! overrides).
38//!
39//! ```ignore
40//! #[test]
41//! fn test_my_feature() {
42//!     let config = hyperactor::config::global::lock();
43//!     let _guard = config.override_key(SOME_CONFIG_KEY, test_value);
44//!     // ... test logic here ...
45//! }
46//! ```
47use std::collections::HashMap;
48use std::marker::PhantomData;
49use std::path::Path;
50use std::sync::Arc;
51use std::sync::LazyLock;
52use std::sync::RwLock;
53use std::sync::atomic::AtomicU64;
54use std::sync::atomic::Ordering;
55
56use arc_swap::ArcSwap;
57
58use crate::CONFIG;
59use crate::attrs::AttrKeyInfo;
60use crate::attrs::AttrValue;
61use crate::attrs::Attrs;
62use crate::attrs::Key;
63use crate::from_env;
64use crate::from_yaml;
65
66/// Configuration source layers in priority order.
67///
68/// Resolution order is always: **TestOverride -> Env -> Runtime
69/// -> File -> ClientOverride -> Default**.
70///
71/// Smaller `priority()` number = higher precedence.
72#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
73pub enum Source {
74    /// Values set by the config snapshot sent from the client
75    /// during proc bootstrap.
76    ClientOverride,
77    /// Values loaded from configuration files (e.g., YAML).
78    File,
79    /// Values read from environment variables at process startup.
80    /// Higher priority than Runtime, File, and ClientOverride, but
81    /// lower than TestOverride.
82    Env,
83    /// Values set programmatically at runtime. High-priority layer
84    /// but overridden by Env and TestOverride.
85    Runtime,
86    /// Ephemeral values inserted by tests via
87    /// `ConfigLock::override_key`. Always wins over all other
88    /// sources; removed when the guard drops.
89    TestOverride,
90}
91
92/// Return the numeric priority for a source.
93///
94/// Smaller number = higher precedence. Matches the documented order:
95/// TestOverride (0) -> Env (1) -> Runtime (2) -> File (3) ->
96/// ClientOverride (4).
97fn priority(s: Source) -> u8 {
98    match s {
99        Source::TestOverride => 0,
100        Source::Env => 1,
101        Source::Runtime => 2,
102        Source::File => 3,
103        Source::ClientOverride => 4,
104    }
105}
106
107/// The full set of configuration layers in priority order.
108///
109/// `Layers` wraps a vector of [`Layer`]s, always kept sorted by
110/// [`priority`] (lowest number = highest precedence).
111///
112/// Resolution (`get`, `get_cloned`, `attrs`) consults `ordered` from
113/// front to back, returning the first value found for each key and
114/// falling back to defaults if none are set in any layer.
115struct Layers {
116    /// Kept sorted by `priority` (lowest number first = highest
117    /// priority).
118    ordered: Vec<Layer>,
119}
120
121/// A single configuration layer in the global configuration model.
122///
123/// Layers are consulted in priority order (`TestOverride → Env →
124/// Runtime → File → ClientOverride → Default`) when resolving
125/// configuration values. Each variant holds an [`Attrs`] map of
126/// key/value pairs.
127///
128/// The `TestOverride` variant additionally maintains per-key override
129/// stacks to support nested and out-of-order test overrides.
130///
131/// Variants:
132/// - [`Layer::ClientOverride`] - Values set by the config snapshot
133///   sent from the client during proc bootstrap.
134/// - [`Layer::File`] — Values loaded from configuration files.
135/// - [`Layer::Env`] — Values sourced from process environment
136///   variables.
137/// - [`Layer::Runtime`] — Programmatically set runtime overrides.
138/// - [`Layer::TestOverride`] — Temporary in-test overrides applied
139///   under [`ConfigLock`].
140///
141/// Layers are stored in [`Layers::ordered`], kept sorted by their
142/// effective [`Source`] priority (`TestOverride` first, `Default`
143/// last).
144enum Layer {
145    /// Values set by the config snapshot sent from the client
146    /// during proc bootstrap.
147    ClientOverride(Attrs),
148
149    /// Values loaded from configuration files.
150    File(Attrs),
151
152    /// Values read from process environment variables. Typically
153    /// installed at startup via [`init_from_env`].
154    Env(Attrs),
155
156    /// Values set programmatically at runtime. High-priority layer
157    /// used for dynamic updates (e.g., Python `configure()` API), but
158    /// overridden by Env and TestOverride layers.
159    Runtime(Attrs),
160
161    /// Ephemeral values inserted during tests via
162    /// [`ConfigLock::override_key`]. Always takes precedence over all
163    /// other layers. Holds both the active `attrs` map and a per-key
164    /// `stacks` table to support nested and out-of-order test
165    /// overrides.
166    TestOverride {
167        attrs: Attrs,
168        stacks: HashMap<&'static str, OverrideStack>,
169    },
170}
171
172/// A per-key stack of test overrides used by the
173/// [`Layer::TestOverride`] layer.
174///
175/// Each stack tracks the sequence of active overrides applied to a
176/// single configuration key. The topmost frame represents the
177/// currently effective override; earlier frames represent older
178/// (still live) guards that may drop out of order.
179///
180/// Fields:
181/// - `env_var`: The associated process environment variable name, if
182///   any.
183/// - `saved_env`: The original environment variable value before the
184///   first override was applied (or `None` if it did not exist).
185/// - `frames`: The stack of active override frames, with the top
186///   being the last element in the vector.
187struct OverrideStack {
188    /// The name of the process environment variable associated with
189    /// this configuration key, if any.
190    ///
191    /// Used to mirror changes to the environment when overrides are
192    /// applied or removed. `None` if the key has no
193    /// `CONFIG.env_name`.
194    env_var: Option<String>,
195
196    /// The original value of the environment variable before the
197    /// first override was applied.
198    ///
199    /// Stored so it can be restored once the last frame is dropped.
200    /// `None` means the variable did not exist prior to overriding.
201    saved_env: Option<String>,
202
203    /// The sequence of active override frames for this key.
204    ///
205    /// Each frame represents one active test override; the last
206    /// element (`frames.last()`) is the current top-of-stack and
207    /// defines the effective value seen in the configuration and
208    /// environment.
209    frames: Vec<OverrideFrame>,
210}
211
212/// A single entry in a per-key override stack.
213///
214/// Each `OverrideFrame` represents one active test override applied
215/// via [`ConfigLock::override_key`]. Frames are uniquely identified
216/// by a monotonically increasing token and record both the value
217/// being overridden and its string form for environment mirroring.
218///
219/// When a guard drops, its frame is removed from the stack; if it was
220/// the top, the next frame (if any) becomes active, or the original
221/// environment is restored when the stack becomes empty.
222struct OverrideFrame {
223    /// A unique, monotonically increasing identifier for this
224    /// override frame.
225    ///
226    /// Used to associate a dropping [`ConfigValueGuard`] with its
227    /// corresponding entry in the stack, even if drops occur out of
228    /// order.
229    token: u64,
230
231    /// The serialized configuration value active while this frame is
232    /// on top of its stack.
233    ///
234    /// Stored as a boxed [`SerializableValue`] to match how values
235    /// are kept within [`Attrs`].
236    value: Box<dyn crate::attrs::SerializableValue>,
237
238    /// Pre-rendered string form of the value, used for environment
239    /// variable updates when this frame becomes active.
240    ///
241    /// Avoids recomputing `value.display()` on every push or pop.
242    env_str: String,
243}
244
245/// Return the [`Source`] corresponding to a given [`Layer`].
246///
247/// This provides a uniform way to retrieve a layer's logical source
248/// (File, Env, Runtime, TestOverride, or ClientOverride) regardless
249/// of its internal representation. Used for sorting layers by
250/// priority and for source-based lookups or removals.
251fn layer_source(l: &Layer) -> Source {
252    match l {
253        Layer::File(_) => Source::File,
254        Layer::Env(_) => Source::Env,
255        Layer::Runtime(_) => Source::Runtime,
256        Layer::TestOverride { .. } => Source::TestOverride,
257        Layer::ClientOverride(_) => Source::ClientOverride,
258    }
259}
260
261/// Return an immutable reference to the [`Attrs`] contained in a
262/// [`Layer`].
263///
264/// This abstracts over the specific layer variant so callers can read
265/// configuration values uniformly without needing to pattern-match on
266/// the layer type. For `TestOverride`, this returns the current
267/// top-level attributes reflecting the active overrides.
268fn layer_attrs(l: &Layer) -> &Attrs {
269    match l {
270        Layer::File(a) | Layer::Env(a) | Layer::Runtime(a) | Layer::ClientOverride(a) => a,
271        Layer::TestOverride { attrs, .. } => attrs,
272    }
273}
274
275/// Return a mutable reference to the [`Attrs`] contained in a
276/// [`Layer`].
277///
278/// This allows callers to modify configuration values within any
279/// layer without needing to pattern-match on its variant. For
280/// `TestOverride`, the returned [`Attrs`] always reflect the current
281/// top-of-stack overrides for each key.
282fn layer_attrs_mut(l: &mut Layer) -> &mut Attrs {
283    match l {
284        Layer::File(a) | Layer::Env(a) | Layer::Runtime(a) | Layer::ClientOverride(a) => a,
285        Layer::TestOverride { attrs, .. } => attrs,
286    }
287}
288
289impl Layers {
290    // Mutation methods:
291
292    /// Insert or replace a configuration layer for the given source.
293    ///
294    /// If a layer with the same [`Source`] already exists, its contents
295    /// are replaced with the provided `attrs`. Otherwise a new layer is
296    /// added. After insertion, layers are re-sorted so that
297    /// higher-priority sources (e.g. [`Source::TestOverride`],
298    /// [`Source::Env`]) appear before lower-priority ones
299    /// ([`Source::Runtime`], [`Source::File`]).
300    fn set(&mut self, source: Source, attrs: Attrs) {
301        if let Some(l) = self.ordered.iter_mut().find(|l| layer_source(l) == source) {
302            *layer_attrs_mut(l) = attrs;
303        } else {
304            self.ordered.push(make_layer(source, attrs));
305        }
306        self.ordered.sort_by_key(|l| priority(layer_source(l)));
307    }
308
309    /// Insert or update a configuration layer for the given [`Source`].
310    ///
311    /// If a layer with the same [`Source`] already exists, its attributes
312    /// are **updated in place**: all keys present in `attrs` are absorbed
313    /// into the existing layer, overwriting any previous values for those
314    /// keys while leaving all other keys in that layer unchanged.
315    ///
316    /// If no layer for `source` exists yet, this behaves like [`set`]: a
317    /// new layer is created with the provided `attrs`.
318    fn merge(&mut self, source: Source, attrs: Attrs) {
319        if let Some(layer) = self.ordered.iter_mut().find(|l| layer_source(l) == source) {
320            layer_attrs_mut(layer).merge(attrs);
321        } else {
322            self.ordered.push(make_layer(source, attrs));
323        }
324        self.ordered.sort_by_key(|l| priority(layer_source(l)));
325    }
326
327    /// Remove the configuration layer for the given [`Source`], if
328    /// present.
329    ///
330    /// After this call, values from that source will no longer contribute
331    /// to resolution in [`get`], [`get_cloned`], or [`attrs`]. Defaults
332    /// and any remaining layers continue to apply in their normal
333    /// priority order.
334    fn clear(&mut self, source: Source) {
335        self.ordered.retain(|l| layer_source(l) != source);
336    }
337
338    /// Reset the global configuration to only Defaults (for testing).
339    ///
340    /// This clears all explicit layers (`File`, `Env`, `Runtime`,
341    /// `ClientOverride`, and `TestOverride`). Subsequent lookups will
342    /// resolve keys entirely from their declared defaults.
343    fn reset(&mut self) {
344        self.ordered.clear();
345    }
346
347    // Read methods:
348
349    /// Return a complete, merged snapshot of the effective configuration
350    /// **(only keys marked with `@meta(CONFIG = ...)`)**.
351    ///
352    /// Resolution per key:
353    /// 1) First explicit value found in layers (TestOverride → Env →
354    ///    Runtime → File → ClientOverride).
355    /// 2) Otherwise, the key's default (if any).
356    fn materialize(&self) -> Attrs {
357        let mut merged = Attrs::new();
358        for info in inventory::iter::<AttrKeyInfo>() {
359            if info.meta.get(CONFIG).is_none() {
360                continue;
361            }
362            let name = info.name;
363            let mut chosen: Option<Box<dyn crate::attrs::SerializableValue>> = None;
364            for layer in &self.ordered {
365                if let Some(v) = layer_attrs(layer).get_value_by_name(name) {
366                    chosen = Some(v.cloned());
367                    break;
368                }
369            }
370            let boxed = match chosen {
371                Some(b) => b,
372                None => {
373                    if let Some(default) = info.default {
374                        default.cloned()
375                    } else {
376                        continue;
377                    }
378                }
379            };
380            merged.insert_value_by_name_unchecked(name, boxed);
381        }
382        merged
383    }
384
385    /// Return a complete, merged snapshot of the effective configuration
386    /// **(only keys marked with `@meta(CONFIG = ...)` and `propagate: true`)**.
387    ///
388    /// This is similar to [`materialize`] but excludes keys with
389    /// `propagate: false`. Use this when sending config to child processes
390    /// via `BootstrapProcManager`.
391    fn materialize_propagatable(&self) -> Attrs {
392        let mut merged = Attrs::new();
393        for info in inventory::iter::<AttrKeyInfo>() {
394            let Some(cfg_meta) = info.meta.get(CONFIG) else {
395                continue;
396            };
397            if !cfg_meta.propagate {
398                continue;
399            }
400            let name = info.name;
401            let mut chosen: Option<Box<dyn crate::attrs::SerializableValue>> = None;
402            for layer in &self.ordered {
403                if let Some(v) = layer_attrs(layer).get_value_by_name(name) {
404                    chosen = Some(v.cloned());
405                    break;
406                }
407            }
408            let boxed = match chosen {
409                Some(b) => b,
410                None => {
411                    if let Some(default) = info.default {
412                        default.cloned()
413                    } else {
414                        continue;
415                    }
416                }
417            };
418            merged.insert_value_by_name_unchecked(name, boxed);
419        }
420        merged
421    }
422
423    /// Return a snapshot of the attributes for a specific configuration
424    /// source.
425    ///
426    /// If a layer with the given [`Source`] exists, this clones and
427    /// returns its [`Attrs`]. Otherwise an empty [`Attrs`] is returned.
428    fn layer_attrs_for(&self, source: Source) -> Attrs {
429        if let Some(layer) = self.ordered.iter().find(|l| layer_source(l) == source) {
430            layer_attrs(layer).clone()
431        } else {
432            Attrs::new()
433        }
434    }
435
436    // Test override support:
437
438    /// Ensure TestOverride layer exists, return mutable access to its
439    /// attrs and stacks.
440    fn ensure_test_override(&mut self) -> (&mut Attrs, &mut HashMap<&'static str, OverrideStack>) {
441        let idx = if let Some(i) = self
442            .ordered
443            .iter()
444            .position(|l| matches!(l, Layer::TestOverride { .. }))
445        {
446            i
447        } else {
448            self.ordered.push(Layer::TestOverride {
449                attrs: Attrs::new(),
450                stacks: HashMap::new(),
451            });
452            self.ordered.sort_by_key(|l| priority(layer_source(l)));
453            self.ordered
454                .iter()
455                .position(|l| matches!(l, Layer::TestOverride { .. }))
456                .expect("just inserted TestOverride layer")
457        };
458        match &mut self.ordered[idx] {
459            Layer::TestOverride { attrs, stacks } => (attrs, stacks),
460            _ => unreachable!(),
461        }
462    }
463
464    /// Get mutable access to TestOverride layer if it exists.
465    fn test_override_mut(
466        &mut self,
467    ) -> Option<(&mut Attrs, &mut HashMap<&'static str, OverrideStack>)> {
468        let idx = self
469            .ordered
470            .iter()
471            .position(|l| matches!(l, Layer::TestOverride { .. }))?;
472        match &mut self.ordered[idx] {
473            Layer::TestOverride { attrs, stacks } => Some((attrs, stacks)),
474            _ => None,
475        }
476    }
477
478    /// Remove the TestOverride layer entirely.
479    fn remove_test_override(&mut self) {
480        self.ordered
481            .retain(|l| !matches!(l, Layer::TestOverride { .. }));
482    }
483}
484
485/// Global configuration state combining layers and materialized
486/// snapshot.
487///
488/// This struct holds both the mutable layer stack (protected by
489/// RwLock) and the pre-materialized snapshot (in ArcSwap) together to
490/// avoid initialization order dependencies.
491struct GlobalConfig {
492    /// The layered configuration store.
493    layers: RwLock<Layers>,
494    /// Pre-materialized snapshot for lock-free reads.
495    materialized: ArcSwap<Attrs>,
496}
497
498/// Global layered configuration store.
499///
500/// This is the single authoritative store for configuration in the
501/// process. It is always present, protected by an `RwLock`, and holds
502/// a [`Layers`] struct containing all active sources.
503///
504/// On startup it is seeded with a single [`Source::Env`] layer
505/// (values loaded from process environment variables). Additional
506/// layers can be installed later via [`set`] or cleared with
507/// [`clear`]. Reads (`get`, `get_cloned`, `attrs`) consult the layers
508/// in priority order.
509///
510/// In tests, a [`Source::TestOverride`] layer is pushed on demand by
511/// [`ConfigLock::override_key`]. This layer always takes precedence
512/// and is automatically removed when the guard drops.
513///
514/// In normal operation, a parent process may capture its config with
515/// [`attrs`] and pass it to a child during bootstrap. The child
516/// installs this snapshot as its [`Source::ClientOverride`] layer,
517/// which has the lowest precedence among explicit layers.
518static GLOBAL: LazyLock<GlobalConfig> = LazyLock::new(|| {
519    let env = from_env();
520    let layers = Layers {
521        ordered: vec![Layer::Env(env)],
522    };
523    let materialized = ArcSwap::new(Arc::new(layers.materialize()));
524    GlobalConfig {
525        layers: RwLock::new(layers),
526        materialized,
527    }
528});
529
530/// Update the materialized snapshot from the current layers.
531///
532/// Must be called while holding `GLOBAL.layers.write()` to ensure the
533/// snapshot is consistent with the layers.
534fn rematerialize(layers: &Layers) {
535    GLOBAL.materialized.store(Arc::new(layers.materialize()));
536}
537
538/// Monotonically increasing sequence used to assign unique tokens to
539/// each test override frame.
540///
541/// Tokens identify individual [`ConfigValueGuard`] instances within a
542/// key's override stack, allowing frames to be removed safely even
543/// when guards are dropped out of order. The counter starts at 1 and
544/// uses relaxed atomic ordering since exact sequencing across threads
545/// is not required—only uniqueness.
546static OVERRIDE_TOKEN_SEQ: AtomicU64 = AtomicU64::new(1);
547
548/// Acquire the global configuration lock.
549///
550/// This lock serializes all mutations of the global configuration,
551/// ensuring they cannot clobber each other. It returns a
552/// [`ConfigLock`] guard, which must be held for the duration of any
553/// mutation (e.g. inserting or overriding values).
554///
555/// Most commonly used in tests, where it provides exclusive access to
556/// push a [`Source::TestOverride`] layer via
557/// [`ConfigLock::override_key`]. The override layer is automatically
558/// removed when the guard drops, restoring the original state.
559///
560/// # Example
561/// ```rust,ignore
562/// let lock = hyperactor::config::global::lock();
563/// let _guard = lock.override_key(CONFIG_KEY, "test_value");
564/// // Code under test sees the overridden config.
565/// // On drop, the key is restored.
566/// ```
567pub fn lock() -> ConfigLock {
568    static MUTEX: LazyLock<std::sync::Mutex<()>> = LazyLock::new(|| std::sync::Mutex::new(()));
569    ConfigLock {
570        _guard: MUTEX.lock().unwrap_or_else(|e| e.into_inner()),
571    }
572}
573
574/// Initialize the global configuration from environment variables.
575///
576/// Reads values from process environment variables, using each key's
577/// `CONFIG.env_name` (from `@meta(CONFIG = ConfigAttr { … })`) to
578/// determine its mapping. The resulting values are installed as the
579/// [`Source::Env`] layer. Keys without a corresponding environment
580/// variable fall back to lower-priority sources or defaults.
581///
582/// Typically invoked once at process startup to overlay config values
583/// from the environment. Repeated calls replace the existing Env
584/// layer.
585pub fn init_from_env() {
586    set(Source::Env, from_env());
587}
588
589/// Initialize the global configuration from a YAML file.
590///
591/// Loads values from the specified YAML file and installs them as the
592/// [`Source::File`] layer. During resolution, File is consulted after
593/// TestOverride, Env, and Runtime layers, and before ClientOverride
594/// and defaults.
595///
596/// Typically invoked once at process startup to provide a baseline
597/// configuration. Repeated calls replace the existing File layer.
598pub fn init_from_yaml<P: AsRef<Path>>(path: P) -> Result<(), anyhow::Error> {
599    let file = from_yaml(path)?;
600    set(Source::File, file);
601    Ok(())
602}
603
604/// Get a key from the global configuration (Copy types).
605///
606/// Resolution order: TestOverride -> Env -> Runtime -> File ->
607/// ClientOverride -> Default. Panics if the key has no default and is
608/// not set in any layer.
609///
610/// This function reads from a pre-materialized snapshot for lock-free
611/// performance. The snapshot is updated atomically whenever layers
612/// change.
613pub fn get<T: AttrValue + Copy>(key: Key<T>) -> T {
614    let snapshot = GLOBAL.materialized.load();
615    *snapshot.get(key).expect("key must have a default")
616}
617
618/// Return the override value for `key` if it is explicitly present in
619/// `overrides`, otherwise fall back to the global value for that key.
620pub fn override_or_global<T: AttrValue + Copy>(overrides: &Attrs, key: Key<T>) -> T {
621    if overrides.contains_key(key) {
622        *overrides.get(key).unwrap()
623    } else {
624        get(key)
625    }
626}
627
628/// Get a key by cloning the value.
629///
630/// Resolution order: TestOverride -> Env -> Runtime -> File ->
631/// ClientOverride -> Default. Panics if the key has no default and
632/// is not set in any layer.
633pub fn get_cloned<T: AttrValue>(key: Key<T>) -> T {
634    try_get_cloned(key)
635        .expect("key must have a default")
636        .clone()
637}
638
639/// Try to get a key by cloning the value.
640///
641/// Resolution order: TestOverride -> Env -> Runtime -> File ->
642/// ClientOverride -> Default. Returns None if the key has no default
643/// and is not set in any layer.
644///
645/// This function reads from a pre-materialized snapshot for lock-free
646/// performance.
647pub fn try_get_cloned<T: AttrValue>(key: Key<T>) -> Option<T> {
648    let snapshot = GLOBAL.materialized.load();
649    snapshot.get(key).cloned()
650}
651
652/// Construct a [`Layer`] for the given [`Source`] using the provided
653/// `attrs`.
654///
655/// Used by [`set`] and [`create_or_merge`] when installing a new
656/// configuration layer.
657fn make_layer(source: Source, attrs: Attrs) -> Layer {
658    match source {
659        Source::File => Layer::File(attrs),
660        Source::Env => Layer::Env(attrs),
661        Source::Runtime => Layer::Runtime(attrs),
662        Source::TestOverride => Layer::TestOverride {
663            attrs,
664            stacks: HashMap::new(),
665        },
666        Source::ClientOverride => Layer::ClientOverride(attrs),
667    }
668}
669
670/// Insert or replace a configuration layer for the given source.
671///
672/// If a layer with the same [`Source`] already exists, its contents
673/// are replaced with the provided `attrs`. Otherwise a new layer is
674/// added. After insertion, layers are re-sorted so that
675/// higher-priority sources (e.g. [`Source::TestOverride`],
676/// [`Source::Env`]) appear before lower-priority ones
677/// ([`Source::Runtime`], [`Source::File`]).
678///
679/// This function is used by initialization routines (e.g.
680/// `init_from_env`, `init_from_yaml`) and by tests when overriding
681/// configuration values.
682pub fn set(source: Source, attrs: Attrs) {
683    let mut g = GLOBAL.layers.write().unwrap();
684    g.set(source, attrs);
685    rematerialize(&g);
686}
687
688/// Insert or update a configuration layer for the given [`Source`].
689///
690/// If a layer with the same [`Source`] already exists, its attributes
691/// are **updated in place**: all keys present in `attrs` are absorbed
692/// into the existing layer, overwriting any previous values for those
693/// keys while leaving all other keys in that layer unchanged.
694///
695/// If no layer for `source` exists yet, this behaves like [`set`]: a
696/// new layer is created with the provided `attrs`.
697///
698/// This is useful for incremental / additive updates (for example,
699/// runtime configuration driven by a Python API), where callers want
700/// to change a subset of keys without discarding previously installed
701/// values in the same layer.
702///
703/// By contrast, [`set`] replaces the entire layer for `source` with
704/// `attrs`, discarding any existing values in that layer.
705pub fn create_or_merge(source: Source, attrs: Attrs) {
706    let mut g = GLOBAL.layers.write().unwrap();
707    g.merge(source, attrs);
708    rematerialize(&g);
709}
710
711/// Remove the configuration layer for the given [`Source`], if
712/// present.
713///
714/// After this call, values from that source will no longer contribute
715/// to resolution in [`get`], [`get_cloned`], or [`attrs`]. Defaults
716/// and any remaining layers continue to apply in their normal
717/// priority order.
718pub fn clear(source: Source) {
719    let mut g = GLOBAL.layers.write().unwrap();
720    g.clear(source);
721    rematerialize(&g);
722}
723
724/// Return a complete, merged snapshot of the effective configuration
725/// **(only keys marked with `@meta(CONFIG = ...)`)**.
726///
727/// Resolution per key:
728/// 1) First explicit value found in layers (TestOverride → Env →
729///    Runtime → File → ClientOverride).
730/// 2) Otherwise, the key's default (if any).
731///
732/// Notes:
733/// - This materializes defaults into the returned Attrs for all
734///   CONFIG-marked keys, so it's self-contained.
735/// - Keys without `CONFIG` meta are excluded.
736pub fn attrs() -> Attrs {
737    GLOBAL.layers.read().unwrap().materialize()
738}
739
740/// Return a complete, merged snapshot of the effective configuration
741/// **(only keys marked with `@meta(CONFIG = ...)` and `propagate: true`)**.
742///
743/// Resolution per key:
744/// 1) First explicit value found in layers (TestOverride → Env →
745///    Runtime → File → ClientOverride).
746/// 2) Otherwise, the key's default (if any).
747///
748/// Notes:
749/// - This materializes defaults into the returned Attrs for all
750///   CONFIG-marked keys with `propagate: true`.
751/// - Keys without `CONFIG` meta are excluded.
752/// - Keys with `propagate: false` are excluded.
753///
754/// Use this when sending config to child processes via
755/// `BootstrapProcManager`. Process-local configs (like TLS cert paths)
756/// should have `propagate: false` and will not be included.
757pub fn propagatable_attrs() -> Attrs {
758    GLOBAL.layers.read().unwrap().materialize_propagatable()
759}
760
761/// Return a snapshot of the attributes for a specific configuration
762/// source.
763///
764/// If a layer with the given [`Source`] exists, this clones and
765/// returns its [`Attrs`]. Otherwise an empty [`Attrs`] is returned.
766/// The returned map is detached from the global store – mutating it
767/// does **not** affect the underlying layer; use [`set`] or
768/// [`create_or_merge`] to modify layers.
769fn layer_attrs_for(source: Source) -> Attrs {
770    GLOBAL.layers.read().unwrap().layer_attrs_for(source)
771}
772
773/// Snapshot the current attributes in the **Runtime** configuration
774/// layer.
775///
776/// This returns a cloned [`Attrs`] containing only values explicitly
777/// set in the [`Source::Runtime`] layer (no merging with
778/// Env/File/Defaults). If no Runtime layer is present, an empty
779/// [`Attrs`] is returned.
780pub fn runtime_attrs() -> Attrs {
781    layer_attrs_for(Source::Runtime)
782}
783
784/// Reset the global configuration to only Defaults (for testing).
785///
786/// This clears all explicit layers (`File`, `Env`, `Runtime`,
787/// `ClientOverride`, and `TestOverride`). Subsequent lookups will
788/// resolve keys entirely from their declared defaults.
789///
790/// Note: Should be called while holding [`global::lock`] in tests, to
791/// ensure no concurrent modifications happen.
792pub fn reset_to_defaults() {
793    let mut g = GLOBAL.layers.write().unwrap();
794    g.reset();
795    rematerialize(&g);
796}
797
798/// A guard that holds the global configuration lock and provides
799/// override functionality.
800///
801/// This struct acts as both a lock guard (preventing other tests from
802/// modifying global config) and as the only way to create
803/// configuration overrides. Override guards cannot outlive this
804/// ConfigLock, ensuring proper synchronization.
805pub struct ConfigLock {
806    _guard: std::sync::MutexGuard<'static, ()>,
807}
808
809impl ConfigLock {
810    /// Create a configuration override that is active until the
811    /// returned guard is dropped.
812    ///
813    /// Each call pushes a new frame onto a per-key override stack
814    /// within the [`Source::TestOverride`] layer. The topmost frame
815    /// defines the effective value seen by `get()` and in the
816    /// mirrored environment variable (if any). When a guard is
817    /// dropped, its frame is removed: if it was the top, the previous
818    /// frame (if any) becomes active or the key and env var are
819    /// restored to their prior state.
820    ///
821    /// The returned guard must not outlive this [`ConfigLock`].
822    pub fn override_key<'a, T: AttrValue>(
823        &'a self,
824        key: crate::attrs::Key<T>,
825        value: T,
826    ) -> ConfigValueGuard<'a, T> {
827        let token = OVERRIDE_TOKEN_SEQ.fetch_add(1, Ordering::Relaxed);
828
829        let mut g = GLOBAL.layers.write().unwrap();
830
831        {
832            // Ensure TestOverride layer exists and get mutable access.
833            let (attrs, stacks) = g.ensure_test_override();
834
835            // Compute env var (if any) for this key once.
836            let (env_var, env_str) = if let Some(cfg) = key.attrs().get(crate::CONFIG) {
837                if let Some(name) = &cfg.env_name {
838                    (Some(name.clone()), value.display())
839                } else {
840                    (None, String::new())
841                }
842            } else {
843                (None, String::new())
844            };
845
846            // Get per-key stack (by declared name).
847            let key_name = key.name();
848            let stack = stacks.entry(key_name).or_insert_with(|| OverrideStack {
849                env_var: env_var.clone(),
850                saved_env: env_var.as_ref().and_then(|n| std::env::var(n).ok()),
851                frames: Vec::new(),
852            });
853
854            // Push the new frame.
855            let boxed: Box<dyn crate::attrs::SerializableValue> = Box::new(value.clone());
856            stack.frames.push(OverrideFrame {
857                token,
858                value: boxed,
859                env_str,
860            });
861
862            // Make this frame the active value in TestOverride attrs.
863            attrs.set(key, value.clone());
864
865            // Update process env to reflect new top-of-stack.
866            if let (Some(var), Some(top)) = (stack.env_var.as_ref(), stack.frames.last()) {
867                // SAFETY: Under global ConfigLock during tests.
868                unsafe { std::env::set_var(var, &top.env_str) }
869            }
870        }
871
872        rematerialize(&g);
873
874        ConfigValueGuard {
875            key,
876            token,
877            _phantom: PhantomData,
878        }
879    }
880}
881
882/// When a [`ConfigLock`] is dropped, the special
883/// [`Source::TestOverride`] layer (if present) is removed entirely.
884/// This discards all temporary overrides created under the lock,
885/// ensuring they cannot leak into subsequent tests or callers. Other
886/// layers (`Runtime`, `Env`, `File`, `ClientOverride`, and defaults)
887/// are left untouched.
888///
889/// Note: individual values within the TestOverride layer may already
890/// have been restored by [`ConfigValueGuard`]s as they drop. This
891/// final removal guarantees no residual layer remains once the lock
892/// itself is released.
893impl Drop for ConfigLock {
894    fn drop(&mut self) {
895        let mut g = GLOBAL.layers.write().unwrap();
896        g.remove_test_override();
897        rematerialize(&g);
898    }
899}
900
901/// A guard that restores a single configuration value when dropped.
902pub struct ConfigValueGuard<'a, T: 'static> {
903    key: crate::attrs::Key<T>,
904    token: u64,
905    // This is here so we can hold onto a 'a lifetime.
906    _phantom: PhantomData<&'a ()>,
907}
908
909/// When a [`ConfigValueGuard`] is dropped, it restores configuration
910/// state for the key it was guarding.
911///
912/// Behavior:
913/// - Each key maintains a stack of override frames. The most recent
914///   frame (top of stack) defines the effective value in
915///   [`Source::TestOverride`].
916/// - Dropping a guard removes its frame. If it was the top frame, the
917///   next frame (if any) becomes active and both the config and
918///   mirrored env var are updated accordingly.
919/// - If the dropped frame was not on top, no changes occur until the
920///   active frame is dropped.
921/// - When the last frame for a key is removed, the key is deleted
922///   from the TestOverride layer and its associated environment
923///   variable (if any) is restored to its original value or removed
924///   if it did not exist.
925///
926/// This guarantees that nested or out-of-order test overrides are
927/// restored deterministically and without leaking state into
928/// subsequent tests.
929impl<T: 'static> Drop for ConfigValueGuard<'_, T> {
930    fn drop(&mut self) {
931        let mut g = GLOBAL.layers.write().unwrap();
932
933        // Track whether the config actually changed (for rematerialization).
934        let mut config_changed = false;
935
936        // Env var restoration info (captured inside the block, applied
937        // outside).
938        let mut restore_env_var: Option<String> = None;
939        let mut restore_env_to: Option<String> = None;
940
941        if let Some((attrs, stacks)) = g.test_override_mut() {
942            let key_name = self.key.name();
943
944            // We need a tiny scope for the &mut borrow of the stack so
945            // we can call `stacks.remove(key_name)` afterward if it
946            // becomes empty.
947            let mut remove_empty_stack = false;
948
949            if let Some(stack) = stacks.get_mut(key_name) {
950                // Find this guard's frame by token.
951                if let Some(pos) = stack.frames.iter().position(|f| f.token == self.token) {
952                    let is_top = pos + 1 == stack.frames.len();
953
954                    if is_top {
955                        // Pop the active frame
956                        stack.frames.pop();
957                        config_changed = true;
958
959                        if let Some(new_top) = stack.frames.last() {
960                            // New top becomes active: update attrs and env.
961                            attrs.insert_value(self.key, (*new_top.value).cloned());
962                            if let Some(var) = stack.env_var.as_ref() {
963                                // SAFETY: Under global ConfigLock during tests.
964                                unsafe { std::env::set_var(var, &new_top.env_str) }
965                            }
966                        } else {
967                            // Stack empty: remove the key now, then after
968                            // releasing the &mut borrow of the stack,
969                            // restore the env var and remove the stack
970                            // entry.
971                            let _ = attrs.remove_value(self.key);
972
973                            // Capture restoration details while we still
974                            // have access to the stack.
975                            if let Some(var) = stack.env_var.as_ref() {
976                                restore_env_var = Some(var.clone());
977                                restore_env_to = stack.saved_env.clone(); // None => unset
978                            }
979                            remove_empty_stack = true
980                        }
981                    } else {
982                        // Out-of-order drop: remove only that frame:
983                        // active top stays
984                        stack.frames.remove(pos);
985                        // No changes to attrs or env here (and no
986                        // rematerialization needed).
987                    }
988                } // else: token already handled; nothing to do
989            } // &mut stack borrow ends here
990
991            // If we emptied the stack for this key, remove the stack
992            // entry.
993            if remove_empty_stack {
994                let _ = stacks.remove(key_name);
995            }
996        }
997
998        // Restore env var outside the borrow scope.
999        if let Some(var) = restore_env_var.as_ref() {
1000            // SAFETY: Under global ConfigLock during tests.
1001            unsafe {
1002                if let Some(val) = restore_env_to.as_ref() {
1003                    std::env::set_var(var, val);
1004                } else {
1005                    std::env::remove_var(var);
1006                }
1007            }
1008        }
1009
1010        // Rematerialize if the config actually changed.
1011        if config_changed {
1012            rematerialize(&g);
1013        }
1014    }
1015}
1016
1017#[cfg(test)]
1018mod tests {
1019    use std::time::Duration;
1020
1021    use super::*;
1022    use crate::ConfigAttr;
1023    use crate::attrs::declare_attrs;
1024
1025    // Test configuration keys used to exercise the layered config
1026    // infrastructure. These mirror hyperactor's config keys but are
1027    // declared locally to keep hyperactor_config independent.
1028
1029    declare_attrs! {
1030        /// Maximum frame length for codec
1031        @meta(CONFIG = ConfigAttr::new(
1032            Some("HYPERACTOR_CODEC_MAX_FRAME_LENGTH".to_string()),
1033            None,
1034        ))
1035        pub attr CODEC_MAX_FRAME_LENGTH: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
1036
1037        /// Message delivery timeout
1038        @meta(CONFIG = ConfigAttr::new(
1039            Some("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT".to_string()),
1040            None,
1041        ))
1042        pub attr MESSAGE_DELIVERY_TIMEOUT: Duration = Duration::from_secs(30);
1043
1044        /// Number of messages after which to send an acknowledgment
1045        @meta(CONFIG = ConfigAttr::new(
1046            Some("HYPERACTOR_MESSAGE_ACK_EVERY_N_MESSAGES".to_string()),
1047            None,
1048        ))
1049        pub attr MESSAGE_ACK_EVERY_N_MESSAGES: u64 = 1000;
1050
1051        /// Maximum buffer size for split port messages
1052        @meta(CONFIG = ConfigAttr::new(
1053            Some("HYPERACTOR_SPLIT_MAX_BUFFER_SIZE".to_string()),
1054            None,
1055        ))
1056        pub attr SPLIT_MAX_BUFFER_SIZE: usize = 5;
1057
1058        /// Whether to use multipart encoding for network channel communications
1059        @meta(CONFIG = ConfigAttr::new(
1060            Some("HYPERACTOR_CHANNEL_MULTIPART".to_string()),
1061            None,
1062        ))
1063        pub attr CHANNEL_MULTIPART: bool = true;
1064
1065        /// Default hop Time-To-Live for message envelopes
1066        @meta(CONFIG = ConfigAttr::new(
1067            Some("HYPERACTOR_MESSAGE_TTL_DEFAULT".to_string()),
1068            None,
1069        ))
1070        pub attr MESSAGE_TTL_DEFAULT: u8 = 64;
1071
1072        /// A test key with no environment variable mapping
1073        @meta(CONFIG = ConfigAttr::new(
1074            None,
1075            None,
1076        ))
1077        pub attr CONFIG_KEY_NO_ENV: u32 = 100;
1078    }
1079
1080    #[test]
1081    fn test_global_config() {
1082        let config = lock();
1083
1084        // Reset global config to defaults to avoid interference from
1085        // other tests
1086        reset_to_defaults();
1087
1088        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT);
1089        {
1090            let _guard = config.override_key(CODEC_MAX_FRAME_LENGTH, 1024);
1091            assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 1024);
1092            // The configuration will be automatically restored when
1093            // _guard goes out of scope
1094        }
1095
1096        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT);
1097    }
1098
1099    #[test]
1100    fn test_overrides() {
1101        let config = lock();
1102
1103        // Reset global config to defaults to avoid interference from
1104        // other tests
1105        reset_to_defaults();
1106
1107        // Test the new lock/override API for individual config values
1108        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT);
1109        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(30));
1110
1111        // Test single value override
1112        {
1113            let _guard = config.override_key(CODEC_MAX_FRAME_LENGTH, 2048);
1114            assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 2048);
1115            assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(30)); // Unchanged
1116        }
1117
1118        // Values should be restored after guard is dropped
1119        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT);
1120
1121        // Test multiple overrides
1122        let orig_value = std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").ok();
1123        {
1124            let _guard1 = config.override_key(CODEC_MAX_FRAME_LENGTH, 4096);
1125            let _guard2 = config.override_key(MESSAGE_DELIVERY_TIMEOUT, Duration::from_mins(1));
1126
1127            assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 4096);
1128            assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_mins(1));
1129            // This was overridden:
1130            assert_eq!(
1131                std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").unwrap(),
1132                "1m"
1133            );
1134        }
1135        assert_eq!(
1136            std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").ok(),
1137            orig_value
1138        );
1139
1140        // All values should be restored
1141        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT);
1142        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(30));
1143    }
1144
1145    #[test]
1146    fn test_layer_precedence_env_over_file_and_replacement() {
1147        let _lock = lock();
1148        reset_to_defaults();
1149
1150        // File sets a value.
1151        let mut file = Attrs::new();
1152        file[CODEC_MAX_FRAME_LENGTH] = 1111;
1153        set(Source::File, file);
1154
1155        // Env sets a different value.
1156        let mut env = Attrs::new();
1157        env[CODEC_MAX_FRAME_LENGTH] = 2222;
1158        set(Source::Env, env);
1159
1160        // Env should win over File.
1161        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 2222);
1162
1163        // Replace Env layer with a new value.
1164        let mut env2 = Attrs::new();
1165        env2[CODEC_MAX_FRAME_LENGTH] = 3333;
1166        set(Source::Env, env2);
1167
1168        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 3333);
1169    }
1170
1171    #[test]
1172    fn test_layer_precedence_read_file_if_not_found_in_env() {
1173        let _lock = lock();
1174        reset_to_defaults();
1175
1176        // Read the default value because no layers have been set.
1177        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 10737418240);
1178
1179        // File sets a value.
1180        let mut file = Attrs::new();
1181        file[CODEC_MAX_FRAME_LENGTH] = 1111;
1182        set(Source::File, file);
1183
1184        // Env does not have any attribute.
1185        let env = Attrs::new();
1186        set(Source::Env, env);
1187
1188        // Should read from File.
1189        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 1111);
1190
1191        // Replace Env layer with a new value.
1192        let mut env2 = Attrs::new();
1193        env2[CODEC_MAX_FRAME_LENGTH] = 2222;
1194        set(Source::Env, env2);
1195
1196        // Env should win over File.
1197        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 2222);
1198    }
1199
1200    #[test]
1201    fn test_runtime_overrides_and_clear_restores_lower_layers() {
1202        let _lock = lock();
1203        reset_to_defaults();
1204
1205        // File baseline.
1206        let mut file = Attrs::new();
1207        file[MESSAGE_DELIVERY_TIMEOUT] = Duration::from_secs(30);
1208        set(Source::File, file);
1209
1210        // Env override.
1211        let mut env = Attrs::new();
1212        env[MESSAGE_DELIVERY_TIMEOUT] = Duration::from_secs(40);
1213        set(Source::Env, env);
1214
1215        // Runtime layer (but Env beats it).
1216        let mut rt = Attrs::new();
1217        rt[MESSAGE_DELIVERY_TIMEOUT] = Duration::from_secs(50);
1218        set(Source::Runtime, rt);
1219
1220        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(40));
1221
1222        // Clearing Env should reveal Runtime.
1223        clear(Source::Env);
1224
1225        // With the Env layer gone, Runtime wins over File.
1226        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(50));
1227    }
1228
1229    #[test]
1230    fn test_attrs_snapshot_materializes_defaults_and_omits_meta() {
1231        let _lock = lock();
1232        reset_to_defaults();
1233
1234        // No explicit layers: values should come from Defaults.
1235        let snap = attrs();
1236
1237        // A few representative defaults are materialized:
1238        assert_eq!(snap[CODEC_MAX_FRAME_LENGTH], 10 * 1024 * 1024 * 1024);
1239        assert_eq!(snap[MESSAGE_DELIVERY_TIMEOUT], Duration::from_secs(30));
1240
1241        // CONFIG has no default and wasn't explicitly set: should be
1242        // omitted.
1243        let json = serde_json::to_string(&snap).unwrap();
1244        assert!(
1245            !json.contains("hyperactor::config::config"),
1246            "CONFIG must not appear in snapshot unless explicitly set"
1247        );
1248    }
1249
1250    #[test]
1251    fn test_parent_child_snapshot_as_clientoverride_layer() {
1252        let _lock = lock();
1253        reset_to_defaults();
1254
1255        // Parent effective config (pretend it's a parent process).
1256        let mut parent_env = Attrs::new();
1257        parent_env[MESSAGE_ACK_EVERY_N_MESSAGES] = 12345;
1258        set(Source::Env, parent_env);
1259
1260        let parent_snap = attrs();
1261
1262        // "Child" process: start clean, install parent snapshot as
1263        // ClientOverride.
1264        reset_to_defaults();
1265        set(Source::ClientOverride, parent_snap);
1266
1267        // Child should observe parent's effective value from the
1268        // ClientOverride layer (since child has no Env/Runtime/File
1269        // layers set).
1270        assert_eq!(get(MESSAGE_ACK_EVERY_N_MESSAGES), 12345);
1271    }
1272
1273    #[test]
1274    fn test_testoverride_layer_override_and_env_restore() {
1275        let lock = lock();
1276        reset_to_defaults();
1277
1278        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(30));
1279
1280        // SAFETY: single-threaded test.
1281        unsafe {
1282            std::env::remove_var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT");
1283        }
1284
1285        {
1286            let _guard = lock.override_key(MESSAGE_DELIVERY_TIMEOUT, Duration::from_secs(99));
1287            // Override wins:
1288            assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(99));
1289
1290            // Env should be mirrored to the same duration (string may
1291            // be "1m 39s")
1292            let s = std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").unwrap();
1293            let parsed = humantime::parse_duration(&s).unwrap();
1294            assert_eq!(parsed, Duration::from_secs(99));
1295        }
1296
1297        // After drop, value and env restored:
1298        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(30));
1299        assert!(std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").is_err());
1300    }
1301
1302    #[test]
1303    fn test_reset_to_defaults_clears_all_layers() {
1304        let _lock = lock();
1305        reset_to_defaults();
1306
1307        // Seed multiple layers.
1308        let mut file = Attrs::new();
1309        file[SPLIT_MAX_BUFFER_SIZE] = 7;
1310        set(Source::File, file);
1311
1312        let mut env = Attrs::new();
1313        env[SPLIT_MAX_BUFFER_SIZE] = 8;
1314        set(Source::Env, env);
1315
1316        let mut rt = Attrs::new();
1317        rt[SPLIT_MAX_BUFFER_SIZE] = 9;
1318        set(Source::Runtime, rt);
1319
1320        // Sanity: Env wins over Runtime and File.
1321        assert_eq!(get(SPLIT_MAX_BUFFER_SIZE), 8);
1322
1323        // Reset clears all explicit layers; defaults apply.
1324        reset_to_defaults();
1325        assert_eq!(get(SPLIT_MAX_BUFFER_SIZE), 5); // default
1326    }
1327
1328    #[test]
1329    fn test_get_cloned_resolution_matches_get() {
1330        let _lock = lock();
1331        reset_to_defaults();
1332
1333        let mut env = Attrs::new();
1334        env[MESSAGE_DELIVERY_TIMEOUT] = Duration::from_mins(2);
1335        set(Source::Env, env);
1336
1337        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_mins(2));
1338        let v = get_cloned(MESSAGE_DELIVERY_TIMEOUT);
1339        assert_eq!(v, Duration::from_mins(2));
1340    }
1341
1342    #[test]
1343    fn test_attrs_snapshot_respects_layer_precedence_per_key() {
1344        let _lock = lock();
1345        reset_to_defaults();
1346
1347        let mut file = Attrs::new();
1348        file[MESSAGE_TTL_DEFAULT] = 10;
1349        set(Source::File, file);
1350
1351        let mut env = Attrs::new();
1352        env[MESSAGE_TTL_DEFAULT] = 20;
1353        set(Source::Env, env);
1354
1355        let snap = attrs();
1356        assert_eq!(snap[MESSAGE_TTL_DEFAULT], 20); // Env beats File
1357    }
1358
1359    declare_attrs! {
1360      @meta(CONFIG = ConfigAttr::new(
1361          None,
1362          None,
1363      ))
1364      pub attr CONFIG_KEY: bool = true;
1365
1366      pub attr NON_CONFIG_KEY: bool = true;
1367
1368      @meta(CONFIG = ConfigAttr::new(
1369          None,
1370          None,
1371      ).process_local())
1372      pub attr NON_PROPAGATE_KEY: bool = true;
1373    }
1374
1375    #[test]
1376    fn test_attrs_excludes_non_config_keys() {
1377        let _lock = lock();
1378        reset_to_defaults();
1379
1380        let snap = attrs();
1381        let json = serde_json::to_string(&snap).unwrap();
1382
1383        // Expect our CONFIG_KEY to be present.
1384        assert!(
1385            json.contains("hyperactor_config::global::tests::config_key"),
1386            "attrs() should include keys with @meta(CONFIG = ...)"
1387        );
1388        // Expect our NON_CONFIG_KEY to be omitted.
1389        assert!(
1390            !json.contains("hyperactor_config::global::tests::non_config_key"),
1391            "attrs() should exclude keys without @meta(CONFIG = ...)"
1392        );
1393    }
1394
1395    #[test]
1396    fn test_propagatable_attrs_excludes_non_propagate_keys() {
1397        let _lock = lock();
1398        reset_to_defaults();
1399
1400        // attrs() should include NON_PROPAGATE_KEY (it has CONFIG meta)
1401        let snap = attrs();
1402        let json = serde_json::to_string(&snap).unwrap();
1403        assert!(
1404            json.contains("hyperactor_config::global::tests::non_propagate_key"),
1405            "attrs() should include keys with propagate: false"
1406        );
1407
1408        // propagatable_attrs() should exclude NON_PROPAGATE_KEY
1409        let propagatable = propagatable_attrs();
1410        let json_propagatable = serde_json::to_string(&propagatable).unwrap();
1411        assert!(
1412            !json_propagatable.contains("hyperactor_config::global::tests::non_propagate_key"),
1413            "propagatable_attrs() should exclude keys with propagate: false"
1414        );
1415
1416        // propagatable_attrs() should still include CONFIG_KEY (propagate: true)
1417        assert!(
1418            json_propagatable.contains("hyperactor_config::global::tests::config_key"),
1419            "propagatable_attrs() should include keys with propagate: true"
1420        );
1421    }
1422
1423    #[test]
1424    fn test_testoverride_multiple_stacked_overrides_lifo() {
1425        let lock = lock();
1426        reset_to_defaults();
1427
1428        // Baseline sanity.
1429        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(30));
1430
1431        // Start from a clean env so we can assert restoration to "unset".
1432        // SAFETY: single-threaded tests.
1433        unsafe {
1434            std::env::remove_var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT");
1435        }
1436        assert!(std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").is_err());
1437
1438        // Stack A: 40s (becomes top)
1439        let guard_a = lock.override_key(MESSAGE_DELIVERY_TIMEOUT, Duration::from_secs(40));
1440        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(40));
1441        {
1442            let s = std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").unwrap();
1443            assert_eq!(
1444                humantime::parse_duration(&s).unwrap(),
1445                Duration::from_secs(40)
1446            );
1447        }
1448
1449        // Stack B: 50s (new top)
1450        let guard_b = lock.override_key(MESSAGE_DELIVERY_TIMEOUT, Duration::from_secs(50));
1451        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(50));
1452        {
1453            let s = std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").unwrap();
1454            assert_eq!(
1455                humantime::parse_duration(&s).unwrap(),
1456                Duration::from_secs(50)
1457            );
1458        }
1459
1460        // Drop B first → should reveal A (LIFO)
1461        std::mem::drop(guard_b);
1462        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(40));
1463        {
1464            let s = std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").unwrap();
1465            assert_eq!(
1466                humantime::parse_duration(&s).unwrap(),
1467                Duration::from_secs(40)
1468            );
1469        }
1470
1471        // Drop A → should restore default and unset env.
1472        std::mem::drop(guard_a);
1473        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(30));
1474        assert!(std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").is_err());
1475    }
1476
1477    #[test]
1478    fn test_testoverride_out_of_order_drop_keeps_top_stable() {
1479        let lock = lock();
1480        reset_to_defaults();
1481
1482        // Clean env baseline.
1483        // SAFETY: single-threaded tests.
1484        unsafe {
1485            std::env::remove_var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT");
1486        }
1487        assert!(std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").is_err());
1488
1489        // Push three frames in order: A=40s, B=50s, C=70s (C is top).
1490        let guard_a = lock.override_key(MESSAGE_DELIVERY_TIMEOUT, Duration::from_secs(40));
1491        let guard_b = lock.override_key(MESSAGE_DELIVERY_TIMEOUT, Duration::from_secs(50));
1492        let guard_c = lock.override_key(MESSAGE_DELIVERY_TIMEOUT, Duration::from_secs(70));
1493
1494        // Top is C.
1495        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(70));
1496        {
1497            let s = std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").unwrap();
1498            assert_eq!(
1499                humantime::parse_duration(&s).unwrap(),
1500                Duration::from_secs(70)
1501            );
1502        }
1503
1504        // Drop the *middle* frame (B) first → top must remain C, env unchanged.
1505        std::mem::drop(guard_b);
1506        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(70));
1507        {
1508            let s = std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").unwrap();
1509            assert_eq!(
1510                humantime::parse_duration(&s).unwrap(),
1511                Duration::from_secs(70)
1512            );
1513        }
1514
1515        // Now drop C → A becomes top, env follows A.
1516        std::mem::drop(guard_c);
1517        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(40));
1518        {
1519            let s = std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").unwrap();
1520            assert_eq!(
1521                humantime::parse_duration(&s).unwrap(),
1522                Duration::from_secs(40)
1523            );
1524        }
1525
1526        // Drop A → restore default and clear env.
1527        std::mem::drop(guard_a);
1528        assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(30));
1529        assert!(std::env::var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT").is_err());
1530    }
1531
1532    #[test]
1533    fn test_priority_order() {
1534        use Source::*;
1535        assert!(priority(TestOverride) < priority(Env));
1536        assert!(priority(Env) < priority(Runtime));
1537        assert!(priority(Runtime) < priority(File));
1538        assert!(priority(File) < priority(ClientOverride));
1539    }
1540
1541    #[test]
1542    fn test_create_or_merge_runtime_merges_keys() {
1543        let _lock = lock();
1544        reset_to_defaults();
1545
1546        // Seed Runtime with one key.
1547        let mut rt = Attrs::new();
1548        rt[MESSAGE_TTL_DEFAULT] = 10;
1549        set(Source::Runtime, rt);
1550
1551        // Now update Runtime with a different key via
1552        // `create_or_merge`.
1553        let mut update = Attrs::new();
1554        update[MESSAGE_ACK_EVERY_N_MESSAGES] = 123;
1555        create_or_merge(Source::Runtime, update);
1556
1557        // Both keys should now be visible from Runtime.
1558        assert_eq!(get(MESSAGE_TTL_DEFAULT), 10);
1559        assert_eq!(get(MESSAGE_ACK_EVERY_N_MESSAGES), 123);
1560    }
1561
1562    #[test]
1563    fn test_create_or_merge_runtime_creates_layer_if_missing() {
1564        let _lock = lock();
1565        reset_to_defaults();
1566
1567        let mut rt = Attrs::new();
1568        rt[MESSAGE_TTL_DEFAULT] = 42;
1569        create_or_merge(Source::Runtime, rt);
1570
1571        assert_eq!(get(MESSAGE_TTL_DEFAULT), 42);
1572    }
1573
1574    #[test]
1575    fn test_clientoverride_precedence_loses_to_all_other_layers() {
1576        let _lock = lock();
1577        reset_to_defaults();
1578
1579        // ClientOverride sets a baseline value.
1580        let mut client = Attrs::new();
1581        client[MESSAGE_TTL_DEFAULT] = 10;
1582        set(Source::ClientOverride, client);
1583        assert_eq!(get(MESSAGE_TTL_DEFAULT), 10);
1584
1585        // File should beat ClientOverride.
1586        let mut file = Attrs::new();
1587        file[MESSAGE_TTL_DEFAULT] = 20;
1588        set(Source::File, file);
1589        assert_eq!(get(MESSAGE_TTL_DEFAULT), 20);
1590
1591        // Runtime should beat both File and ClientOverride.
1592        let mut runtime = Attrs::new();
1593        runtime[MESSAGE_TTL_DEFAULT] = 30;
1594        set(Source::Runtime, runtime);
1595        assert_eq!(get(MESSAGE_TTL_DEFAULT), 30);
1596
1597        // Env should beat Runtime, File, and ClientOverride.
1598        let mut env = Attrs::new();
1599        env[MESSAGE_TTL_DEFAULT] = 40;
1600        set(Source::Env, env);
1601        assert_eq!(get(MESSAGE_TTL_DEFAULT), 40);
1602
1603        // Clear higher layers one by one to verify fallback.
1604        clear(Source::Env);
1605        assert_eq!(get(MESSAGE_TTL_DEFAULT), 30); // Runtime
1606
1607        clear(Source::Runtime);
1608        assert_eq!(get(MESSAGE_TTL_DEFAULT), 20); // File
1609
1610        clear(Source::File);
1611        assert_eq!(get(MESSAGE_TTL_DEFAULT), 10); // ClientOverride
1612    }
1613
1614    #[test]
1615    fn test_create_or_merge_clientoverride() {
1616        let _lock = lock();
1617        reset_to_defaults();
1618
1619        // Seed ClientOverride with one key.
1620        let mut client = Attrs::new();
1621        client[MESSAGE_TTL_DEFAULT] = 10;
1622        set(Source::ClientOverride, client);
1623
1624        // Merge in a different key.
1625        let mut update = Attrs::new();
1626        update[MESSAGE_ACK_EVERY_N_MESSAGES] = 123;
1627        create_or_merge(Source::ClientOverride, update);
1628
1629        // Both keys should now be visible.
1630        assert_eq!(get(MESSAGE_TTL_DEFAULT), 10);
1631        assert_eq!(get(MESSAGE_ACK_EVERY_N_MESSAGES), 123);
1632    }
1633
1634    #[test]
1635    fn test_override_or_global_returns_override_when_present() {
1636        let _lock = lock();
1637        reset_to_defaults();
1638
1639        // Set a global value via Env.
1640        let mut env = Attrs::new();
1641        env[MESSAGE_TTL_DEFAULT] = 99;
1642        set(Source::Env, env);
1643
1644        // Create an override Attrs with a different value.
1645        let mut overrides = Attrs::new();
1646        overrides[MESSAGE_TTL_DEFAULT] = 42;
1647
1648        // Should return the override value, not global.
1649        assert_eq!(override_or_global(&overrides, MESSAGE_TTL_DEFAULT), 42);
1650    }
1651
1652    #[test]
1653    fn test_override_or_global_returns_global_when_not_present() {
1654        let _lock = lock();
1655        reset_to_defaults();
1656
1657        // Set a global value via Env.
1658        let mut env = Attrs::new();
1659        env[MESSAGE_TTL_DEFAULT] = 99;
1660        set(Source::Env, env);
1661
1662        // Empty overrides.
1663        let overrides = Attrs::new();
1664
1665        // Should return the global value.
1666        assert_eq!(override_or_global(&overrides, MESSAGE_TTL_DEFAULT), 99);
1667    }
1668
1669    #[test]
1670    fn test_runtime_attrs_returns_only_runtime_layer() {
1671        let _lock = lock();
1672        reset_to_defaults();
1673
1674        // Set values in multiple layers.
1675        let mut file = Attrs::new();
1676        file[MESSAGE_TTL_DEFAULT] = 10;
1677        set(Source::File, file);
1678
1679        let mut env = Attrs::new();
1680        env[SPLIT_MAX_BUFFER_SIZE] = 20;
1681        set(Source::Env, env);
1682
1683        let mut runtime = Attrs::new();
1684        runtime[MESSAGE_ACK_EVERY_N_MESSAGES] = 123;
1685        set(Source::Runtime, runtime);
1686
1687        // runtime_attrs() should return only Runtime layer contents.
1688        let rt = runtime_attrs();
1689
1690        // Should have the Runtime key.
1691        assert_eq!(rt[MESSAGE_ACK_EVERY_N_MESSAGES], 123);
1692
1693        // Should NOT have File or Env keys.
1694        assert!(!rt.contains_key(MESSAGE_TTL_DEFAULT));
1695        assert!(!rt.contains_key(SPLIT_MAX_BUFFER_SIZE));
1696    }
1697
1698    #[test]
1699    fn test_override_key_without_env_name_does_not_mirror_to_env() {
1700        let lock = lock();
1701        reset_to_defaults();
1702
1703        // Verify default value.
1704        assert_eq!(get(CONFIG_KEY_NO_ENV), 100);
1705
1706        // Override the key (which has no env_name).
1707        let _guard = lock.override_key(CONFIG_KEY_NO_ENV, 999);
1708
1709        // Should see the override value.
1710        assert_eq!(get(CONFIG_KEY_NO_ENV), 999);
1711
1712        // No env var should have been set (test doesn't crash,
1713        // behavior is clean). This test mainly ensures no panic
1714        // occurs during override/restore.
1715
1716        drop(_guard);
1717
1718        // Should restore to default.
1719        assert_eq!(get(CONFIG_KEY_NO_ENV), 100);
1720    }
1721
1722    #[test]
1723    fn test_multiple_different_keys_overridden_simultaneously() {
1724        let lock = lock();
1725        reset_to_defaults();
1726
1727        // SAFETY: single-threaded test.
1728        unsafe {
1729            std::env::remove_var("HYPERACTOR_CODEC_MAX_FRAME_LENGTH");
1730            std::env::remove_var("HYPERACTOR_MESSAGE_TTL_DEFAULT");
1731        }
1732
1733        // Override multiple different keys at once.
1734        let guard1 = lock.override_key(CODEC_MAX_FRAME_LENGTH, 1111);
1735        let guard2 = lock.override_key(MESSAGE_TTL_DEFAULT, 42);
1736        let guard3 = lock.override_key(CHANNEL_MULTIPART, false);
1737
1738        // All should reflect their override values.
1739        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 1111);
1740        assert_eq!(get(MESSAGE_TTL_DEFAULT), 42);
1741        assert!(!get(CHANNEL_MULTIPART));
1742
1743        // Env vars should be mirrored.
1744        assert_eq!(
1745            std::env::var("HYPERACTOR_CODEC_MAX_FRAME_LENGTH").unwrap(),
1746            "1111"
1747        );
1748        assert_eq!(
1749            std::env::var("HYPERACTOR_MESSAGE_TTL_DEFAULT").unwrap(),
1750            "42"
1751        );
1752
1753        // Drop guards in arbitrary order.
1754        drop(guard2); // Drop MESSAGE_TTL_DEFAULT first
1755
1756        // MESSAGE_TTL_DEFAULT should restore, others should remain.
1757        assert_eq!(get(MESSAGE_TTL_DEFAULT), MESSAGE_TTL_DEFAULT_DEFAULT);
1758        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 1111);
1759        assert!(!get(CHANNEL_MULTIPART));
1760
1761        // Env for MESSAGE_TTL_DEFAULT should be cleared.
1762        assert!(std::env::var("HYPERACTOR_MESSAGE_TTL_DEFAULT").is_err());
1763
1764        drop(guard1);
1765        drop(guard3);
1766
1767        // All should be restored.
1768        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT);
1769        assert_eq!(get(CHANNEL_MULTIPART), CHANNEL_MULTIPART_DEFAULT);
1770    }
1771
1772    #[test]
1773    fn test_lock_recovers_after_panic() {
1774        let handle = std::thread::spawn(|| {
1775            let _lock = lock();
1776            panic!("intentional panic while holding ConfigLock");
1777        });
1778
1779        let result = handle.join();
1780        assert!(result.is_err(), "thread should have panicked");
1781
1782        let lock = lock();
1783        reset_to_defaults();
1784
1785        let _guard = lock.override_key(CODEC_MAX_FRAME_LENGTH, 9999);
1786        assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 9999);
1787    }
1788}