wirevalue/
lib.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Wirevalue provides an erased serialization format. [`Any`] is a type-erased
10//! envelope containing a serialized value identified by a [`typeuri::Named`].
11//!
12//! Wirevalues also provide encoding polymorphism, allowing the same representation
13//! to carry multiple serialization formats, and to transcode between them for
14//! types that are registered through [`register_type!`].
15
16use std::any::TypeId;
17use std::collections::HashMap;
18use std::fmt;
19use std::io::Cursor;
20use std::sync::LazyLock;
21
22use enum_as_inner::EnumAsInner;
23use hyperactor_config::AttrValue;
24use serde::Deserialize;
25use serde::Serialize;
26use serde::de::DeserializeOwned;
27pub use typeuri::Named;
28pub use typeuri::intern_typename;
29
30pub mod config;
31
32#[doc(hidden)]
33/// Dump trait for Named types that are also serializable/deserializable.
34/// This is a utility used by [`Any::dump`], and is not intended
35/// for direct use.
36pub trait NamedDumpable: Named + Serialize + for<'de> Deserialize<'de> {
37    /// Dump the data in Any to a JSON value.
38    fn dump(data: Any) -> Result<serde_json::Value, anyhow::Error>;
39}
40
41impl<T: Named + Serialize + for<'de> Deserialize<'de>> NamedDumpable for T {
42    fn dump(data: Any) -> Result<serde_json::Value, anyhow::Error> {
43        let value = data.deserialized::<Self>()?;
44        Ok(serde_json::to_value(value)?)
45    }
46}
47
48#[doc(hidden)]
49#[derive(Debug)]
50pub struct TypeInfo {
51    /// Named::typename()
52    pub typename: fn() -> &'static str,
53    /// Named::typehash()
54    pub typehash: fn() -> u64,
55    /// Named::typeid()
56    pub typeid: fn() -> TypeId,
57    /// Named::typehash()
58    pub port: fn() -> u64,
59    /// A function that can transcode a serialized value to JSON.
60    pub dump: Option<fn(Any) -> Result<serde_json::Value, anyhow::Error>>,
61    /// Return the arm for this type, if available.
62    pub arm_unchecked: unsafe fn(*const ()) -> Option<&'static str>,
63}
64
65#[allow(dead_code)]
66impl TypeInfo {
67    /// Get the typeinfo for the provided type hash.
68    pub fn get(typehash: u64) -> Option<&'static TypeInfo> {
69        TYPE_INFO.get(&typehash).map(|v| &**v)
70    }
71
72    /// Get the typeinfo for the provided type id.
73    pub fn get_by_typeid(typeid: TypeId) -> Option<&'static TypeInfo> {
74        TYPE_INFO_BY_TYPE_ID.get(&typeid).map(|v| &**v)
75    }
76
77    /// Get the typeinfo for the provided type.
78    pub fn of<T: ?Sized + 'static>() -> Option<&'static TypeInfo> {
79        Self::get_by_typeid(TypeId::of::<T>())
80    }
81
82    /// Get the typename for this type.
83    pub fn typename(&self) -> &'static str {
84        (self.typename)()
85    }
86
87    /// Get the typehash for this type.
88    pub fn typehash(&self) -> u64 {
89        (self.typehash)()
90    }
91
92    /// Get the typeid for this type.
93    pub fn typeid(&self) -> TypeId {
94        (self.typeid)()
95    }
96
97    /// Get the port for this type.
98    pub fn port(&self) -> u64 {
99        (self.port)()
100    }
101
102    /// Dump the serialized data to a JSON value.
103    pub fn dump(&self, data: Any) -> Result<serde_json::Value, anyhow::Error> {
104        if let Some(dump) = self.dump {
105            (dump)(data)
106        } else {
107            anyhow::bail!("binary does not have dumper for {}", self.typehash())
108        }
109    }
110
111    /// Get the arm name for an enum value.
112    ///
113    /// # Safety
114    /// The caller must ensure the value pointer is valid for this type.
115    pub unsafe fn arm_unchecked(&self, value: *const ()) -> Option<&'static str> {
116        // SAFETY: This isn't safe, we're passing it on.
117        unsafe { (self.arm_unchecked)(value) }
118    }
119}
120
121inventory::collect!(TypeInfo);
122
123/// Type infos for all types that have been linked into the binary, keyed by typehash.
124static TYPE_INFO: LazyLock<HashMap<u64, &'static TypeInfo>> = LazyLock::new(|| {
125    inventory::iter::<TypeInfo>()
126        .map(|entry| (entry.typehash(), entry))
127        .collect()
128});
129
130/// Type infos for all types that have been linked into the binary, keyed by typeid.
131static TYPE_INFO_BY_TYPE_ID: LazyLock<HashMap<std::any::TypeId, &'static TypeInfo>> =
132    LazyLock::new(|| {
133        TYPE_INFO
134            .values()
135            .map(|info| (info.typeid(), &**info))
136            .collect()
137    });
138
139/// Register a (concrete) type so that it may be looked up by name or hash. Type registration
140/// is required only to improve diagnostics, as it allows a binary to introspect serialized
141/// payloads under type erasure.
142///
143/// The provided type must implement [`typeuri::Named`], and must be concrete.
144#[macro_export]
145macro_rules! register_type {
146    ($type:ty) => {
147        $crate::submit! {
148            $crate::TypeInfo {
149                typename: <$type as $crate::Named>::typename,
150                typehash: <$type as $crate::Named>::typehash,
151                typeid: <$type as $crate::Named>::typeid,
152                port: <$type as $crate::Named>::port,
153                dump: Some(<$type as $crate::NamedDumpable>::dump),
154                arm_unchecked: <$type as $crate::Named>::arm_unchecked,
155            }
156        }
157    };
158}
159
160// Re-export inventory::submit for the register_type! macro
161#[doc(hidden)]
162pub use inventory::submit;
163
164/// An enumeration containing the supported encodings of serialized values.
165#[derive(
166    Debug,
167    Clone,
168    Copy,
169    Serialize,
170    Deserialize,
171    PartialEq,
172    Eq,
173    AttrValue,
174    typeuri::Named,
175    strum::EnumIter,
176    strum::Display,
177    strum::EnumString
178)]
179pub enum Encoding {
180    /// Serde bincode encoding.
181    #[strum(to_string = "bincode")]
182    Bincode,
183    /// Serde JSON encoding.
184    #[strum(to_string = "serde_json")]
185    Json,
186    /// Serde multipart encoding.
187    #[strum(to_string = "serde_multipart")]
188    Multipart,
189}
190
191/// The encoding used for a serialized value.
192#[derive(Clone, Serialize, Deserialize, PartialEq, EnumAsInner)]
193enum Encoded {
194    Bincode(bytes::Bytes),
195    Json(bytes::Bytes),
196    Multipart(serde_multipart::Message),
197}
198
199impl Encoded {
200    /// The length of the underlying serialized message
201    pub fn len(&self) -> usize {
202        match &self {
203            Encoded::Bincode(data) => data.len(),
204            Encoded::Json(data) => data.len(),
205            Encoded::Multipart(message) => message.len(),
206        }
207    }
208
209    /// Is the message empty. This should always return false.
210    pub fn is_empty(&self) -> bool {
211        match &self {
212            Encoded::Bincode(data) => data.is_empty(),
213            Encoded::Json(data) => data.is_empty(),
214            Encoded::Multipart(message) => message.is_empty(),
215        }
216    }
217
218    /// Returns the encoding of this serialized value.
219    pub fn encoding(&self) -> Encoding {
220        match &self {
221            Encoded::Bincode(_) => Encoding::Bincode,
222            Encoded::Json(_) => Encoding::Json,
223            Encoded::Multipart(_) => Encoding::Multipart,
224        }
225    }
226
227    /// Computes the 32bit crc of the encoded data
228    pub fn crc(&self) -> u32 {
229        match &self {
230            Encoded::Bincode(data) => crc32fast::hash(data),
231            Encoded::Json(data) => crc32fast::hash(data),
232            Encoded::Multipart(message) => {
233                let mut hasher = crc32fast::Hasher::new();
234                for fragment in message.body().iter() {
235                    hasher.update(fragment);
236                }
237                for part in message.parts() {
238                    for fragment in part.iter() {
239                        hasher.update(fragment);
240                    }
241                }
242                hasher.finalize()
243            }
244        }
245    }
246}
247
248impl std::fmt::Debug for Encoded {
249    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250        match self {
251            Encoded::Bincode(data) => write!(f, "Encoded::Bincode({})", HexFmt(data)),
252            Encoded::Json(data) => write!(f, "Encoded::Json({})", HexFmt(data)),
253            Encoded::Multipart(message) => {
254                write!(
255                    f,
256                    "Encoded::Multipart(body={}",
257                    HexFmt(&message.body().to_bytes())
258                )?;
259                for (index, part) in message.parts().iter().enumerate() {
260                    write!(f, ", part[{}]={}", index, HexFmt(&part.to_bytes()))?;
261                }
262                write!(f, ")")
263            }
264        }
265    }
266}
267
268/// The type of error returned by operations on [`Any`].
269#[derive(Debug, thiserror::Error)]
270pub enum Error {
271    /// Errors returned from serde bincode.
272    #[error(transparent)]
273    Bincode(#[from] bincode::Error),
274
275    /// Errors returned from serde JSON.
276    #[error(transparent)]
277    Json(#[from] serde_json::Error),
278
279    /// The encoding was not recognized.
280    #[error("unknown encoding: {0}")]
281    InvalidEncoding(String),
282}
283
284/// Represents a serialized value, wrapping the underlying serialization
285/// and deserialization details, while ensuring that we pass correctly-serialized
286/// message throughout the system.
287///
288/// Currently, Any passes through to bincode, but in the future we may include
289/// content-encoding information to allow for other codecs as well.
290#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
291pub struct Any {
292    /// The encoded data
293    encoded: Encoded,
294    /// The typehash of the serialized value. This is used to provide
295    /// typed introspection of the value.
296    typehash: u64,
297}
298
299impl std::fmt::Display for Any {
300    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
301        match self.dump() {
302            Ok(value) => {
303                // unwrap okay, self.dump() would return Err otherwise.
304                let typename = self.typename().unwrap();
305                // take the basename of the type (e.g. "foo::bar::baz" -> "baz")
306                let basename = typename.split("::").last().unwrap_or(typename);
307                write!(f, "{}{}", basename, JsonFmt(&value))
308            }
309            Err(_) => write!(f, "{:?}", self.encoded),
310        }
311    }
312}
313
314impl Any {
315    /// Construct a new serialized value by serializing the provided T-typed value.
316    /// Serialize uses the default encoding defined by the configuration key
317    /// [`config::DEFAULT_ENCODING`] in the global configuration; use [`serialize_with_encoding`]
318    /// to serialize values with a specific encoding.
319    pub fn serialize<T: Serialize + Named>(value: &T) -> Result<Self, Error> {
320        Self::serialize_with_encoding(
321            hyperactor_config::global::get(config::DEFAULT_ENCODING),
322            value,
323        )
324    }
325
326    /// Serialize U-typed value as a T-typed value. This should be used with care
327    /// (typically only in testing), as the value's representation may be illegally
328    /// coerced.
329    pub fn serialize_as<T: Named, U: Serialize>(value: &U) -> Result<Self, Error> {
330        Self::serialize_with_encoding_as::<T, U>(
331            hyperactor_config::global::get(config::DEFAULT_ENCODING),
332            value,
333        )
334    }
335
336    /// Serialize the value with the using the provided encoding.
337    pub fn serialize_with_encoding<T: Serialize + Named>(
338        encoding: Encoding,
339        value: &T,
340    ) -> Result<Self, Error> {
341        Self::serialize_with_encoding_as::<T, T>(encoding, value)
342    }
343
344    /// Serialize U-typed value as a T-typed value. This should be used with care
345    /// (typically only in testing), as the value's representation may be illegally
346    /// coerced.
347    pub fn serialize_with_encoding_as<T: Named, U: Serialize>(
348        encoding: Encoding,
349        value: &U,
350    ) -> Result<Self, Error> {
351        Ok(Self {
352            encoded: match encoding {
353                Encoding::Bincode => Encoded::Bincode(bincode::serialize(value)?.into()),
354                Encoding::Json => Encoded::Json(serde_json::to_vec(value)?.into()),
355                Encoding::Multipart => {
356                    Encoded::Multipart(serde_multipart::serialize_bincode(value)?)
357                }
358            },
359            typehash: T::typehash(),
360        })
361    }
362
363    /// Deserialize a value to the provided type T.
364    pub fn deserialized<T: DeserializeOwned + Named>(&self) -> Result<T, anyhow::Error> {
365        anyhow::ensure!(
366            self.is::<T>(),
367            "attempted to serialize {}-typed serialized into type {}",
368            self.typename().unwrap_or("unknown"),
369            T::typename()
370        );
371        self.deserialized_unchecked()
372    }
373
374    /// Deserialize a value to the provided type T, without checking for type conformance.
375    /// This should be used carefully, only when you know that the dynamic type check is
376    /// not needed.
377    pub fn deserialized_unchecked<T: DeserializeOwned>(&self) -> Result<T, anyhow::Error> {
378        match &self.encoded {
379            Encoded::Bincode(data) => bincode::deserialize(data).map_err(anyhow::Error::from),
380            Encoded::Json(data) => serde_json::from_slice(data).map_err(anyhow::Error::from),
381            Encoded::Multipart(message) => {
382                serde_multipart::deserialize_bincode(message.clone()).map_err(anyhow::Error::from)
383            }
384        }
385    }
386
387    /// Transcode the serialized value to JSON. This operation will succeed if the type hash
388    /// is embedded in the value, and the corresponding type is available in this binary.
389    pub fn transcode_to_json(self) -> Result<Self, Self> {
390        match self.encoded {
391            Encoded::Bincode(_) | Encoded::Multipart(_) => {
392                let json_value = match self.dump() {
393                    Ok(json_value) => json_value,
394                    Err(_) => return Err(self),
395                };
396                let json_data = match serde_json::to_vec(&json_value) {
397                    Ok(json_data) => json_data,
398                    Err(_) => return Err(self),
399                };
400                Ok(Self {
401                    encoded: Encoded::Json(json_data.into()),
402                    typehash: self.typehash,
403                })
404            }
405            Encoded::Json(_) => Ok(self),
406        }
407    }
408
409    /// Dump the Any message into a JSON value. This will succeed if: 1) the typehash is embedded
410    /// in the serialized value; 2) the named type is linked into the binary.
411    pub fn dump(&self) -> Result<serde_json::Value, anyhow::Error> {
412        match &self.encoded {
413            Encoded::Bincode(_) | Encoded::Multipart(_) => {
414                let Some(typeinfo) = TYPE_INFO.get(&self.typehash) else {
415                    anyhow::bail!("binary does not have typeinfo for {}", self.typehash);
416                };
417                typeinfo.dump(self.clone())
418            }
419            Encoded::Json(data) => serde_json::from_slice(data).map_err(anyhow::Error::from),
420        }
421    }
422
423    /// The encoding used by this serialized value.
424    pub fn encoding(&self) -> Encoding {
425        self.encoded.encoding()
426    }
427
428    /// The typehash of the serialized value.
429    pub fn typehash(&self) -> u64 {
430        self.typehash
431    }
432
433    /// The typename of the serialized value, if available.
434    pub fn typename(&self) -> Option<&'static str> {
435        TYPE_INFO
436            .get(&self.typehash)
437            .map(|typeinfo| typeinfo.typename())
438    }
439
440    /// Deserialize a prefix of the value. This is currently only supported
441    /// for bincode-serialized values.
442    // TODO: we should support this by formalizing the notion of a 'prefix'
443    // serialization, and generalize it to other codecs as well.
444    pub fn prefix<T: DeserializeOwned>(&self) -> Result<T, anyhow::Error> {
445        match &self.encoded {
446            Encoded::Bincode(data) => bincode::deserialize(data).map_err(anyhow::Error::from),
447            _ => anyhow::bail!("only bincode supports prefix emplacement"),
448        }
449    }
450
451    /// Emplace a new prefix to this value. This is currently only supported
452    /// for bincode-serialized values.
453    pub fn emplace_prefix<T: Serialize + DeserializeOwned>(
454        &mut self,
455        prefix: T,
456    ) -> Result<(), anyhow::Error> {
457        let data = match &self.encoded {
458            Encoded::Bincode(data) => data,
459            _ => anyhow::bail!("only bincode supports prefix emplacement"),
460        };
461
462        // This is a bit ugly, but: we first deserialize out the old prefix,
463        // then serialize the new prefix, then splice the two together.
464        // This is safe because we know that the prefix is the first thing
465        // in the serialized value, and that the serialization format is stable.
466        let mut cursor = Cursor::new(data.clone());
467        let _prefix: T = bincode::deserialize_from(&mut cursor).unwrap();
468        let position = cursor.position() as usize;
469        let suffix = &cursor.into_inner()[position..];
470        let mut data = bincode::serialize(&prefix)?;
471        data.extend_from_slice(suffix);
472        self.encoded = Encoded::Bincode(data.into());
473
474        Ok(())
475    }
476
477    /// The length of the underlying serialized message
478    pub fn len(&self) -> usize {
479        self.encoded.len()
480    }
481
482    /// Is the message empty. This should always return false.
483    pub fn is_empty(&self) -> bool {
484        self.encoded.is_empty()
485    }
486
487    /// Returns the 32bit crc of the serialized data
488    pub fn crc(&self) -> u32 {
489        self.encoded.crc()
490    }
491
492    /// Returns whether this value contains a serialized M-typed value. Returns None
493    /// when type information is unavailable.
494    pub fn is<M: Named>(&self) -> bool {
495        self.typehash == M::typehash()
496    }
497}
498
499const MAX_BYTE_PREVIEW_LENGTH: usize = 8;
500
501fn display_bytes_as_hash(f: &mut impl std::fmt::Write, bytes: &[u8]) -> std::fmt::Result {
502    let hash = crc32fast::hash(bytes);
503    write!(f, "CRC:{:x}", hash)?;
504    // Implementing in this way lets us print without allocating a new intermediate string.
505    for &byte in bytes.iter().take(MAX_BYTE_PREVIEW_LENGTH) {
506        write!(f, " {:x}", byte)?;
507    }
508    if bytes.len() > MAX_BYTE_PREVIEW_LENGTH {
509        write!(f, " [...{} bytes]", bytes.len() - MAX_BYTE_PREVIEW_LENGTH)?;
510    }
511    Ok(())
512}
513
514/// Formats a binary slice as hex when its display function is called.
515pub struct HexFmt<'a>(pub &'a [u8]);
516
517impl std::fmt::Display for HexFmt<'_> {
518    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
519        // calculate a 2 byte checksum to prepend to the message
520        display_bytes_as_hash(f, self.0)
521    }
522}
523
524/// Formats a JSON value for display, printing all keys but
525/// truncating and displaying a hash if the content is too long.
526pub struct JsonFmt<'a>(pub &'a serde_json::Value);
527
528const MAX_JSON_VALUE_DISPLAY_LENGTH: usize = 8;
529
530impl std::fmt::Display for JsonFmt<'_> {
531    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
532        /// Truncate the input string to MAX_JSON_VALUE_DISPLAY_LENGTH and append
533        /// the truncated hash of the full value for easy comparison.
534        fn truncate_and_hash(value_str: &str) -> String {
535            let truncate_at = MAX_JSON_VALUE_DISPLAY_LENGTH.min(value_str.len());
536
537            // Respect UTF-8 boundaries (multi-byte chars like emojis can be up to 4 bytes)
538            let mut safe_truncate_at = truncate_at;
539            while safe_truncate_at > 0 && !value_str.is_char_boundary(safe_truncate_at) {
540                safe_truncate_at -= 1;
541            }
542
543            let truncated_str = &value_str[..safe_truncate_at];
544            let mut result = truncated_str.to_string();
545            result.push_str(&format!("[...{} chars] ", value_str.len()));
546            display_bytes_as_hash(&mut result, value_str.as_bytes()).unwrap();
547            result
548        }
549
550        /// Recursively truncate a serde_json::Value object.
551        fn truncate_json_values(value: &serde_json::Value) -> serde_json::Value {
552            match value {
553                serde_json::Value::String(s) => {
554                    if s.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
555                        serde_json::Value::String(truncate_and_hash(s))
556                    } else {
557                        value.clone()
558                    }
559                }
560                serde_json::Value::Array(arr) => {
561                    let array_str = serde_json::to_string(arr).unwrap();
562                    if array_str.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
563                        serde_json::Value::String(truncate_and_hash(&array_str))
564                    } else {
565                        value.clone()
566                    }
567                }
568                serde_json::Value::Object(obj) => {
569                    let truncated_obj: serde_json::Map<_, _> = obj
570                        .iter()
571                        .map(|(k, v)| (k.clone(), truncate_json_values(v)))
572                        .collect();
573                    serde_json::Value::Object(truncated_obj)
574                }
575                _ => value.clone(),
576            }
577        }
578
579        let truncated = truncate_json_values(self.0);
580        write!(f, "{}", truncated)
581    }
582}
583
584#[cfg(test)]
585mod tests {
586    use serde::Deserialize;
587    use serde::Serialize;
588    use serde_multipart::Part;
589    use strum::IntoEnumIterator;
590    use typeuri::Named;
591
592    use super::*;
593
594    #[derive(typeuri::Named, Serialize, Deserialize)]
595    struct TestStruct;
596
597    #[test]
598    fn test_names() {
599        assert_eq!(String::typename(), "String");
600        assert_eq!(Option::<String>::typename(), "Option<String>");
601        assert_eq!(Vec::<String>::typename(), "Vec<String>");
602        assert_eq!(Vec::<Vec::<String>>::typename(), "Vec<Vec<String>>");
603        assert_eq!(
604            Vec::<Vec::<Vec::<String>>>::typename(),
605            "Vec<Vec<Vec<String>>>"
606        );
607        assert_eq!(
608            <(u64, String, Option::<isize>)>::typename(),
609            "(u64, String, Option<isize>)"
610        );
611        assert_eq!(TestStruct::typename(), "wirevalue::tests::TestStruct");
612        assert_eq!(
613            Vec::<TestStruct>::typename(),
614            "Vec<wirevalue::tests::TestStruct>"
615        );
616    }
617
618    #[test]
619    fn test_ports() {
620        assert_eq!(String::typehash(), 3947244799002047352u64);
621        assert_eq!(String::port(), 13170616835856823160u64);
622        assert_ne!(
623            Vec::<Vec::<Vec::<String>>>::typehash(),
624            Vec::<Vec::<Vec::<Vec::<String>>>>::typehash(),
625        );
626    }
627
628    #[derive(typeuri::Named, Serialize, Deserialize, PartialEq, Eq, Debug)]
629    struct TestDumpStruct {
630        a: String,
631        b: u64,
632        c: Option<i32>,
633        d: Option<Part>,
634    }
635    crate::register_type!(TestDumpStruct);
636
637    #[test]
638    fn test_dump_struct() {
639        let data = TestDumpStruct {
640            a: "hello".to_string(),
641            b: 1234,
642            c: Some(5678),
643            d: None,
644        };
645        let serialized = Any::serialize(&data).unwrap();
646        let serialized_json = serialized.clone().transcode_to_json().unwrap();
647
648        assert!(serialized.encoded.is_multipart());
649        assert!(serialized_json.encoded.is_json());
650
651        let json_string =
652            String::from_utf8(serialized_json.encoded.as_json().unwrap().to_vec().clone()).unwrap();
653        // The serialized data for JSON is just the (compact) JSON string.
654        assert_eq!(
655            json_string,
656            "{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}"
657        );
658
659        for serialized in [serialized, serialized_json] {
660            // Note, at this point, serialized has no knowledge other than its embedded typehash.
661
662            assert_eq!(
663                serialized.typename(),
664                Some("wirevalue::tests::TestDumpStruct")
665            );
666
667            let json = serialized.dump().unwrap();
668            assert_eq!(
669                json,
670                serde_json::json!({
671                    "a": "hello",
672                    "b": 1234,
673                    "c": 5678,
674                    "d": null,
675                })
676            );
677
678            assert_eq!(
679                format!("{}", serialized),
680                "TestDumpStruct{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}",
681            );
682        }
683    }
684
685    #[test]
686    fn test_emplace_prefix() {
687        let config = hyperactor_config::global::lock();
688        let _guard = config.override_key(config::DEFAULT_ENCODING, Encoding::Bincode);
689        let data = TestDumpStruct {
690            a: "hello".to_string(),
691            b: 1234,
692            c: Some(5678),
693            d: None,
694        };
695
696        let mut ser = Any::serialize(&data).unwrap();
697        assert_eq!(ser.prefix::<String>().unwrap(), "hello".to_string());
698
699        ser.emplace_prefix("hello, world, 123!".to_string())
700            .unwrap();
701
702        assert_eq!(
703            ser.deserialized::<TestDumpStruct>().unwrap(),
704            TestDumpStruct {
705                a: "hello, world, 123!".to_string(),
706                b: 1234,
707                c: Some(5678),
708                d: None,
709            }
710        );
711    }
712
713    #[test]
714    fn test_arms() {
715        #[derive(typeuri::Named, Serialize, Deserialize)]
716        enum TestArm {
717            #[allow(dead_code)]
718            A(u32),
719            B,
720            C(),
721            D {
722                #[allow(dead_code)]
723                a: u32,
724                #[allow(dead_code)]
725                b: String,
726            },
727        }
728
729        assert_eq!(TestArm::A(1234).arm(), Some("A"));
730        assert_eq!(TestArm::B.arm(), Some("B"));
731        assert_eq!(TestArm::C().arm(), Some("C"));
732        assert_eq!(
733            TestArm::D {
734                a: 1234,
735                b: "hello".to_string()
736            }
737            .arm(),
738            Some("D")
739        );
740    }
741
742    #[test]
743    fn display_hex() {
744        assert_eq!(
745            format!("{}", HexFmt("hello world".as_bytes())),
746            "CRC:d4a1185 68 65 6c 6c 6f 20 77 6f [...3 bytes]"
747        );
748        assert_eq!(format!("{}", HexFmt("".as_bytes())), "CRC:0");
749        assert_eq!(
750            format!("{}", HexFmt("a very long string that is long".as_bytes())),
751            "CRC:c7e24f62 61 20 76 65 72 79 20 6c [...23 bytes]"
752        );
753    }
754
755    #[test]
756    fn test_json_fmt() {
757        let json_value = serde_json::json!({
758            "name": "test",
759            "number": 42,
760            "nested": {
761                "key": "value"
762            }
763        });
764        // JSON values with short values should print normally
765        assert_eq!(
766            format!("{}", JsonFmt(&json_value)),
767            "{\"name\":\"test\",\"nested\":{\"key\":\"value\"},\"number\":42}",
768        );
769
770        let empty_json = serde_json::json!({});
771        assert_eq!(format!("{}", JsonFmt(&empty_json)), "{}");
772
773        let simple_array = serde_json::json!([1, 2, 3]);
774        assert_eq!(format!("{}", JsonFmt(&simple_array)), "[1,2,3]");
775
776        // JSON values with very long strings should be truncated
777        let long_string_json = serde_json::json!({
778            "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH * 5)
779        });
780        assert_eq!(
781            format!("{}", JsonFmt(&long_string_json)),
782            "{\"long_string\":\"aaaaaaaa[...40 chars] CRC:c95b8a25 61 61 61 61 61 61 61 61 [...32 bytes]\"}"
783        );
784
785        // JSON values with very long arrays should be truncated
786        let long_array_json =
787            serde_json::json!((1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>());
788        assert_eq!(
789            format!("{}", JsonFmt(&long_array_json)),
790            "\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\""
791        );
792
793        // Test for truncation within nested blocks
794        let nested_json = serde_json::json!({
795            "simple_number": 42,
796            "simple_bool": true,
797            "outer": {
798                "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH + 10),
799                "long_array": (1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>(),
800                "inner": {
801                    "simple_value": "short",
802                }
803            }
804        });
805        println!("{}", JsonFmt(&nested_json));
806        assert_eq!(
807            format!("{}", JsonFmt(&nested_json)),
808            "{\"outer\":{\"inner\":{\"simple_value\":\"short\"},\"long_array\":\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\",\"long_string\":\"aaaaaaaa[...18 chars] CRC:b8ac0e31 61 61 61 61 61 61 61 61 [...10 bytes]\"},\"simple_bool\":true,\"simple_number\":42}",
809        );
810    }
811
812    #[test]
813    fn test_json_fmt_utf8_truncation() {
814        // Test that UTF-8 character boundaries are respected during truncation
815        // Create a string with multi-byte characters that would be truncated
816
817        // String with 7 ASCII chars + 4-byte emoji (total 11 bytes, truncates at 8)
818        let utf8_json = serde_json::json!({
819            "emoji": "1234567🦀"  // 7 + 4 = 11 bytes, MAX is 8
820        });
821
822        // Should truncate at byte 7 (before the emoji) to respect UTF-8 boundary
823        let result = format!("{}", JsonFmt(&utf8_json));
824
825        // Verify it doesn't panic and produces valid output
826        assert!(result.contains("1234567"));
827        assert!(!result.contains("🦀")); // Emoji should be truncated away
828
829        // Test with all multi-byte characters
830        let all_multibyte = serde_json::json!({
831            "chinese": "你好世界"  // Each char is 3 bytes = 12 bytes total
832        });
833        let result3 = format!("{}", JsonFmt(&all_multibyte));
834        assert!(!result3.is_empty());
835    }
836
837    #[test]
838    fn test_encodings() {
839        let value = TestDumpStruct {
840            a: "hello, world".to_string(),
841            b: 123,
842            c: Some(321),
843            d: Some(Part::from("hello, world, again")),
844        };
845        for enc in Encoding::iter() {
846            let ser = Any::serialize_with_encoding(enc, &value).unwrap();
847            assert_eq!(ser.encoding(), enc);
848            assert_eq!(ser.deserialized::<TestDumpStruct>().unwrap(), value);
849        }
850    }
851}