wirevalue/
lib.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Wirevalue provides an erased serialization format. [`Any`] is a type-erased
10//! envelope containing a serialized value identified by a [`typeuri::Named`].
11//!
12//! Wirevalues also provide encoding polymorphism, allowing the same representation
13//! to carry multiple serialization formats, and to transcode between them for
14//! types that are registered through [`register_type!`].
15
16use std::any::TypeId;
17use std::collections::HashMap;
18use std::fmt;
19use std::io::Cursor;
20use std::sync::LazyLock;
21
22use enum_as_inner::EnumAsInner;
23use hyperactor_config::AttrValue;
24use serde::Deserialize;
25use serde::Serialize;
26use serde::de::DeserializeOwned;
27pub use typeuri::Named;
28pub use typeuri::intern_typename;
29
30pub mod config;
31
32/// Typehash value indicating a broken (unknown type, no value) Any.
33pub const BROKEN_TYPEHASH: u64 = 0;
34
35#[doc(hidden)]
36/// Dump trait for Named types that are also serializable/deserializable.
37/// This is a utility used by [`Any::dump`], and is not intended
38/// for direct use.
39pub trait NamedDumpable: Named + Serialize + for<'de> Deserialize<'de> {
40    /// Dump the data in Any to a JSON value.
41    fn dump(data: Any) -> Result<serde_json::Value>;
42}
43
44impl<T: Named + Serialize + for<'de> Deserialize<'de>> NamedDumpable for T {
45    fn dump(data: Any) -> Result<serde_json::Value> {
46        let value = data.deserialized::<Self>()?;
47        Ok(serde_json::to_value(value)?)
48    }
49}
50
51#[doc(hidden)]
52#[derive(Debug)]
53pub struct TypeInfo {
54    /// Named::typename()
55    pub typename: fn() -> &'static str,
56    /// Named::typehash()
57    pub typehash: fn() -> u64,
58    /// Named::typeid()
59    pub typeid: fn() -> TypeId,
60    /// Named::typehash()
61    pub port: fn() -> u64,
62    /// A function that can transcode a serialized value to JSON.
63    pub dump: Option<fn(Any) -> Result<serde_json::Value>>,
64    /// Return the arm for this type, if available.
65    pub arm_unchecked: unsafe fn(*const ()) -> Option<&'static str>,
66}
67
68#[allow(dead_code)]
69impl TypeInfo {
70    /// Get the typeinfo for the provided type hash.
71    pub fn get(typehash: u64) -> Option<&'static TypeInfo> {
72        TYPE_INFO.get(&typehash).map(|v| &**v)
73    }
74
75    /// Get the typeinfo for the provided type id.
76    pub fn get_by_typeid(typeid: TypeId) -> Option<&'static TypeInfo> {
77        TYPE_INFO_BY_TYPE_ID.get(&typeid).map(|v| &**v)
78    }
79
80    /// Get the typeinfo for the provided type.
81    pub fn of<T: ?Sized + 'static>() -> Option<&'static TypeInfo> {
82        Self::get_by_typeid(TypeId::of::<T>())
83    }
84
85    /// Get the typename for this type.
86    pub fn typename(&self) -> &'static str {
87        (self.typename)()
88    }
89
90    /// Get the typehash for this type.
91    pub fn typehash(&self) -> u64 {
92        (self.typehash)()
93    }
94
95    /// Get the typeid for this type.
96    pub fn typeid(&self) -> TypeId {
97        (self.typeid)()
98    }
99
100    /// Get the port for this type.
101    pub fn port(&self) -> u64 {
102        (self.port)()
103    }
104
105    /// Dump the serialized data to a JSON value.
106    pub fn dump(&self, data: Any) -> Result<serde_json::Value> {
107        if let Some(dump) = self.dump {
108            (dump)(data)
109        } else {
110            Err(Error::MissingDumper(self.typehash()))
111        }
112    }
113
114    /// Get the arm name for an enum value.
115    ///
116    /// # Safety
117    /// The caller must ensure the value pointer is valid for this type.
118    pub unsafe fn arm_unchecked(&self, value: *const ()) -> Option<&'static str> {
119        // SAFETY: This isn't safe, we're passing it on.
120        unsafe { (self.arm_unchecked)(value) }
121    }
122}
123
124inventory::collect!(TypeInfo);
125
126/// Type infos for all types that have been linked into the binary, keyed by typehash.
127static TYPE_INFO: LazyLock<HashMap<u64, &'static TypeInfo>> = LazyLock::new(|| {
128    inventory::iter::<TypeInfo>()
129        .map(|entry| (entry.typehash(), entry))
130        .collect()
131});
132
133/// Type infos for all types that have been linked into the binary, keyed by typeid.
134static TYPE_INFO_BY_TYPE_ID: LazyLock<HashMap<std::any::TypeId, &'static TypeInfo>> =
135    LazyLock::new(|| {
136        TYPE_INFO
137            .values()
138            .map(|info| (info.typeid(), &**info))
139            .collect()
140    });
141
142/// Register a (concrete) type so that it may be looked up by name or hash. Type registration
143/// is required only to improve diagnostics, as it allows a binary to introspect serialized
144/// payloads under type erasure.
145///
146/// The provided type must implement [`typeuri::Named`], and must be concrete.
147#[macro_export]
148macro_rules! register_type {
149    ($type:ty) => {
150        $crate::submit! {
151            $crate::TypeInfo {
152                typename: <$type as $crate::Named>::typename,
153                typehash: <$type as $crate::Named>::typehash,
154                typeid: <$type as $crate::Named>::typeid,
155                port: <$type as $crate::Named>::port,
156                dump: Some(<$type as $crate::NamedDumpable>::dump),
157                arm_unchecked: <$type as $crate::Named>::arm_unchecked,
158            }
159        }
160    };
161}
162
163// Re-export inventory::submit for the register_type! macro
164#[doc(hidden)]
165pub use inventory::submit;
166
167/// An enumeration containing the supported encodings of serialized values.
168#[derive(
169    Debug,
170    Clone,
171    Copy,
172    Serialize,
173    Deserialize,
174    PartialEq,
175    Eq,
176    AttrValue,
177    typeuri::Named,
178    strum::EnumIter,
179    strum::Display,
180    strum::EnumString
181)]
182pub enum Encoding {
183    /// Serde bincode encoding.
184    #[strum(to_string = "bincode")]
185    Bincode,
186    /// Serde JSON encoding.
187    #[strum(to_string = "serde_json")]
188    Json,
189    /// Serde multipart encoding.
190    #[strum(to_string = "serde_multipart")]
191    Multipart,
192}
193
194/// The encoding used for a serialized value.
195#[derive(Clone, Serialize, Deserialize, PartialEq, EnumAsInner)]
196enum Encoded {
197    Bincode(bytes::Bytes),
198    Json(bytes::Bytes),
199    Multipart(serde_multipart::Message),
200}
201
202impl Encoded {
203    /// The length of the underlying serialized message
204    pub fn len(&self) -> usize {
205        match &self {
206            Encoded::Bincode(data) => data.len(),
207            Encoded::Json(data) => data.len(),
208            Encoded::Multipart(message) => message.len(),
209        }
210    }
211
212    /// Is the message empty. This should always return false.
213    pub fn is_empty(&self) -> bool {
214        match &self {
215            Encoded::Bincode(data) => data.is_empty(),
216            Encoded::Json(data) => data.is_empty(),
217            Encoded::Multipart(message) => message.is_empty(),
218        }
219    }
220
221    /// Returns the encoding of this serialized value.
222    pub fn encoding(&self) -> Encoding {
223        match &self {
224            Encoded::Bincode(_) => Encoding::Bincode,
225            Encoded::Json(_) => Encoding::Json,
226            Encoded::Multipart(_) => Encoding::Multipart,
227        }
228    }
229
230    /// Computes the 32bit crc of the encoded data
231    pub fn crc(&self) -> u32 {
232        match &self {
233            Encoded::Bincode(data) => crc32fast::hash(data),
234            Encoded::Json(data) => crc32fast::hash(data),
235            Encoded::Multipart(message) => {
236                let mut hasher = crc32fast::Hasher::new();
237                for fragment in message.body().iter() {
238                    hasher.update(fragment);
239                }
240                for part in message.parts() {
241                    for fragment in part.iter() {
242                        hasher.update(fragment);
243                    }
244                }
245                hasher.finalize()
246            }
247        }
248    }
249}
250
251impl std::fmt::Debug for Encoded {
252    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
253        match self {
254            Encoded::Bincode(data) => write!(f, "Encoded::Bincode({})", HexFmt(data)),
255            Encoded::Json(data) => write!(f, "Encoded::Json({})", HexFmt(data)),
256            Encoded::Multipart(message) => {
257                write!(
258                    f,
259                    "Encoded::Multipart(body={}",
260                    HexFmt(&message.body().to_bytes())
261                )?;
262                for (index, part) in message.parts().iter().enumerate() {
263                    write!(f, ", part[{}]={}", index, HexFmt(&part.to_bytes()))?;
264                }
265                write!(f, ")")
266            }
267        }
268    }
269}
270
271/// The type of error returned by operations on [`Any`].
272#[derive(Debug, thiserror::Error)]
273pub enum Error {
274    /// Errors returned from serde bincode.
275    #[error(transparent)]
276    Bincode(#[from] bincode::Error),
277
278    /// Errors returned from serde JSON.
279    #[error(transparent)]
280    Json(#[from] serde_json::Error),
281
282    /// The encoding was not recognized.
283    #[error("unknown encoding: {0}")]
284    InvalidEncoding(String),
285
286    /// Attempted to deserialize a broken Any value.
287    #[error("attempted to deserialize a broken Any value")]
288    BrokenAny,
289
290    /// Type mismatch during deserialization.
291    #[error("type mismatch: expected {expected}, found {actual}")]
292    TypeMismatch {
293        expected: &'static str,
294        actual: String,
295    },
296
297    /// Type info not available for the given typehash.
298    #[error("binary does not have typeinfo for typehash {0}")]
299    MissingTypeInfo(u64),
300
301    /// Dumper not available for the given typehash.
302    #[error("binary does not have dumper for typehash {0}")]
303    MissingDumper(u64),
304
305    /// Operation requires bincode encoding.
306    #[error("only bincode encoding supports prefix operations")]
307    PrefixNotSupported,
308}
309
310/// A specialized Result type for wirevalue operations.
311pub type Result<T> = std::result::Result<T, Error>;
312
313/// Represents a serialized value, wrapping the underlying serialization
314/// and deserialization details, while ensuring that we pass correctly-serialized
315/// message throughout the system.
316///
317/// Currently, Any passes through to bincode, but in the future we may include
318/// content-encoding information to allow for other codecs as well.
319#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
320pub struct Any {
321    /// The encoded data
322    encoded: Encoded,
323    /// The typehash of the serialized value. This is used to provide
324    /// typed introspection of the value.
325    typehash: u64,
326}
327
328impl std::fmt::Display for Any {
329    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
330        match self.dump() {
331            Ok(value) => {
332                // unwrap okay, self.dump() would return Err otherwise.
333                let typename = self.typename().unwrap();
334                // take the basename of the type (e.g. "foo::bar::baz" -> "baz")
335                let basename = typename.split("::").last().unwrap_or(typename);
336                write!(f, "{}{}", basename, JsonFmt(&value))
337            }
338            Err(_) => write!(f, "{:?}", self.encoded),
339        }
340    }
341}
342
343impl Any {
344    /// Construct a new serialized value by serializing the provided T-typed value.
345    /// Serialize uses the default encoding defined by the configuration key
346    /// [`config::DEFAULT_ENCODING`] in the global configuration; use [`serialize_with_encoding`]
347    /// to serialize values with a specific encoding.
348    pub fn serialize<T: Serialize + Named>(value: &T) -> Result<Self> {
349        Self::serialize_with_encoding(
350            hyperactor_config::global::get(config::DEFAULT_ENCODING),
351            value,
352        )
353    }
354
355    /// Serialize U-typed value as a T-typed value. This should be used with care
356    /// (typically only in testing), as the value's representation may be illegally
357    /// coerced.
358    pub fn serialize_as<T: Named, U: Serialize>(value: &U) -> Result<Self> {
359        Self::serialize_with_encoding_as::<T, U>(
360            hyperactor_config::global::get(config::DEFAULT_ENCODING),
361            value,
362        )
363    }
364
365    /// Serialize the value with the using the provided encoding.
366    pub fn serialize_with_encoding<T: Serialize + Named>(
367        encoding: Encoding,
368        value: &T,
369    ) -> Result<Self> {
370        Self::serialize_with_encoding_as::<T, T>(encoding, value)
371    }
372
373    /// Serialize U-typed value as a T-typed value. This should be used with care
374    /// (typically only in testing), as the value's representation may be illegally
375    /// coerced.
376    pub fn serialize_with_encoding_as<T: Named, U: Serialize>(
377        encoding: Encoding,
378        value: &U,
379    ) -> Result<Self> {
380        Ok(Self {
381            encoded: match encoding {
382                Encoding::Bincode => Encoded::Bincode(bincode::serialize(value)?.into()),
383                Encoding::Json => Encoded::Json(serde_json::to_vec(value)?.into()),
384                Encoding::Multipart => {
385                    Encoded::Multipart(serde_multipart::serialize_bincode(value)?)
386                }
387            },
388            typehash: T::typehash(),
389        })
390    }
391
392    /// Create a new broken Any value. A broken value has unknown type and
393    /// no valid data. Attempting to deserialize a broken value will fail.
394    pub fn new_broken() -> Self {
395        Self {
396            encoded: Encoded::Bincode(bytes::Bytes::new()),
397            typehash: BROKEN_TYPEHASH,
398        }
399    }
400
401    /// Returns true if this Any is broken (unknown type, no value).
402    pub fn is_broken(&self) -> bool {
403        self.typehash == BROKEN_TYPEHASH
404    }
405
406    /// Deserialize a value to the provided type T.
407    pub fn deserialized<T: DeserializeOwned + Named>(&self) -> Result<T> {
408        if self.is_broken() {
409            return Err(Error::BrokenAny);
410        }
411        if !self.is::<T>() {
412            return Err(Error::TypeMismatch {
413                expected: T::typename(),
414                actual: self.typename().unwrap_or("unknown").to_string(),
415            });
416        }
417        self.deserialized_unchecked()
418    }
419
420    /// Deserialize a value to the provided type T, without checking for type conformance.
421    /// This should be used carefully, only when you know that the dynamic type check is
422    /// not needed.
423    pub fn deserialized_unchecked<T: DeserializeOwned>(&self) -> Result<T> {
424        match &self.encoded {
425            Encoded::Bincode(data) => Ok(bincode::deserialize(data)?),
426            Encoded::Json(data) => Ok(serde_json::from_slice(data)?),
427            Encoded::Multipart(message) => {
428                Ok(serde_multipart::deserialize_bincode(message.clone())?)
429            }
430        }
431    }
432
433    /// Transcode the serialized value to JSON. This operation will succeed if the type hash
434    /// is embedded in the value, and the corresponding type is available in this binary.
435    pub fn transcode_to_json(self) -> std::result::Result<Self, Self> {
436        match self.encoded {
437            Encoded::Bincode(_) | Encoded::Multipart(_) => {
438                let json_value = match self.dump() {
439                    Ok(json_value) => json_value,
440                    Err(_) => return Err(self),
441                };
442                let json_data = match serde_json::to_vec(&json_value) {
443                    Ok(json_data) => json_data,
444                    Err(_) => return Err(self),
445                };
446                Ok(Self {
447                    encoded: Encoded::Json(json_data.into()),
448                    typehash: self.typehash,
449                })
450            }
451            Encoded::Json(_) => Ok(self),
452        }
453    }
454
455    /// Dump the Any message into a JSON value. This will succeed if: 1) the typehash is embedded
456    /// in the serialized value; 2) the named type is linked into the binary.
457    pub fn dump(&self) -> Result<serde_json::Value> {
458        match &self.encoded {
459            Encoded::Bincode(_) | Encoded::Multipart(_) => {
460                let Some(typeinfo) = TYPE_INFO.get(&self.typehash) else {
461                    return Err(Error::MissingTypeInfo(self.typehash));
462                };
463                typeinfo.dump(self.clone())
464            }
465            Encoded::Json(data) => Ok(serde_json::from_slice(data)?),
466        }
467    }
468
469    /// The encoding used by this serialized value.
470    pub fn encoding(&self) -> Encoding {
471        self.encoded.encoding()
472    }
473
474    /// The typehash of the serialized value.
475    pub fn typehash(&self) -> u64 {
476        self.typehash
477    }
478
479    /// The typename of the serialized value, if available.
480    pub fn typename(&self) -> Option<&'static str> {
481        TYPE_INFO
482            .get(&self.typehash)
483            .map(|typeinfo| typeinfo.typename())
484    }
485
486    /// Deserialize a prefix of the value. This is currently only supported
487    /// for bincode-serialized values.
488    // TODO: we should support this by formalizing the notion of a 'prefix'
489    // serialization, and generalize it to other codecs as well.
490    pub fn prefix<T: DeserializeOwned>(&self) -> Result<T> {
491        match &self.encoded {
492            Encoded::Bincode(data) => Ok(bincode::deserialize(data)?),
493            _ => Err(Error::PrefixNotSupported),
494        }
495    }
496
497    /// Emplace a new prefix to this value. This is currently only supported
498    /// for bincode-serialized values.
499    pub fn emplace_prefix<T: Serialize + DeserializeOwned>(&mut self, prefix: T) -> Result<()> {
500        let data = match &self.encoded {
501            Encoded::Bincode(data) => data,
502            _ => return Err(Error::PrefixNotSupported),
503        };
504
505        // This is a bit ugly, but: we first deserialize out the old prefix,
506        // then serialize the new prefix, then splice the two together.
507        // This is safe because we know that the prefix is the first thing
508        // in the serialized value, and that the serialization format is stable.
509        let mut cursor = Cursor::new(data.clone());
510        let _prefix: T = bincode::deserialize_from(&mut cursor).unwrap();
511        let position = cursor.position() as usize;
512        let suffix = &cursor.into_inner()[position..];
513        let mut data = bincode::serialize(&prefix)?;
514        data.extend_from_slice(suffix);
515        self.encoded = Encoded::Bincode(data.into());
516
517        Ok(())
518    }
519
520    /// The length of the underlying serialized message
521    pub fn len(&self) -> usize {
522        self.encoded.len()
523    }
524
525    /// Is the message empty. This should always return false.
526    pub fn is_empty(&self) -> bool {
527        self.encoded.is_empty()
528    }
529
530    /// Returns the 32bit crc of the serialized data
531    pub fn crc(&self) -> u32 {
532        self.encoded.crc()
533    }
534
535    /// Returns whether this value contains a serialized M-typed value. Returns None
536    /// when type information is unavailable.
537    pub fn is<M: Named>(&self) -> bool {
538        self.typehash == M::typehash()
539    }
540}
541
542const MAX_BYTE_PREVIEW_LENGTH: usize = 8;
543
544fn display_bytes_as_hash(f: &mut impl std::fmt::Write, bytes: &[u8]) -> std::fmt::Result {
545    let hash = crc32fast::hash(bytes);
546    write!(f, "CRC:{:x}", hash)?;
547    // Implementing in this way lets us print without allocating a new intermediate string.
548    for &byte in bytes.iter().take(MAX_BYTE_PREVIEW_LENGTH) {
549        write!(f, " {:x}", byte)?;
550    }
551    if bytes.len() > MAX_BYTE_PREVIEW_LENGTH {
552        write!(f, " [...{} bytes]", bytes.len() - MAX_BYTE_PREVIEW_LENGTH)?;
553    }
554    Ok(())
555}
556
557/// Formats a binary slice as hex when its display function is called.
558pub struct HexFmt<'a>(pub &'a [u8]);
559
560impl std::fmt::Display for HexFmt<'_> {
561    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
562        // calculate a 2 byte checksum to prepend to the message
563        display_bytes_as_hash(f, self.0)
564    }
565}
566
567/// Formats a JSON value for display, printing all keys but
568/// truncating and displaying a hash if the content is too long.
569pub struct JsonFmt<'a>(pub &'a serde_json::Value);
570
571const MAX_JSON_VALUE_DISPLAY_LENGTH: usize = 8;
572
573impl std::fmt::Display for JsonFmt<'_> {
574    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
575        /// Truncate the input string to MAX_JSON_VALUE_DISPLAY_LENGTH and append
576        /// the truncated hash of the full value for easy comparison.
577        fn truncate_and_hash(value_str: &str) -> String {
578            let truncate_at = MAX_JSON_VALUE_DISPLAY_LENGTH.min(value_str.len());
579
580            // Respect UTF-8 boundaries (multi-byte chars like emojis can be up to 4 bytes)
581            let mut safe_truncate_at = truncate_at;
582            while safe_truncate_at > 0 && !value_str.is_char_boundary(safe_truncate_at) {
583                safe_truncate_at -= 1;
584            }
585
586            let truncated_str = &value_str[..safe_truncate_at];
587            let mut result = truncated_str.to_string();
588            result.push_str(&format!("[...{} chars] ", value_str.len()));
589            display_bytes_as_hash(&mut result, value_str.as_bytes()).unwrap();
590            result
591        }
592
593        /// Recursively truncate a serde_json::Value object.
594        fn truncate_json_values(value: &serde_json::Value) -> serde_json::Value {
595            match value {
596                serde_json::Value::String(s) => {
597                    if s.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
598                        serde_json::Value::String(truncate_and_hash(s))
599                    } else {
600                        value.clone()
601                    }
602                }
603                serde_json::Value::Array(arr) => {
604                    let array_str = serde_json::to_string(arr).unwrap();
605                    if array_str.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
606                        serde_json::Value::String(truncate_and_hash(&array_str))
607                    } else {
608                        value.clone()
609                    }
610                }
611                serde_json::Value::Object(obj) => {
612                    let truncated_obj: serde_json::Map<_, _> = obj
613                        .iter()
614                        .map(|(k, v)| (k.clone(), truncate_json_values(v)))
615                        .collect();
616                    serde_json::Value::Object(truncated_obj)
617                }
618                _ => value.clone(),
619            }
620        }
621
622        let truncated = truncate_json_values(self.0);
623        write!(f, "{}", truncated)
624    }
625}
626
627#[cfg(test)]
628mod tests {
629    use serde::Deserialize;
630    use serde::Serialize;
631    use serde_multipart::Part;
632    use strum::IntoEnumIterator;
633    use typeuri::Named;
634
635    use super::*;
636
637    #[derive(typeuri::Named, Serialize, Deserialize)]
638    struct TestStruct;
639
640    #[test]
641    fn test_names() {
642        assert_eq!(String::typename(), "String");
643        assert_eq!(Option::<String>::typename(), "Option<String>");
644        assert_eq!(Vec::<String>::typename(), "Vec<String>");
645        assert_eq!(Vec::<Vec::<String>>::typename(), "Vec<Vec<String>>");
646        assert_eq!(
647            Vec::<Vec::<Vec::<String>>>::typename(),
648            "Vec<Vec<Vec<String>>>"
649        );
650        assert_eq!(
651            <(u64, String, Option::<isize>)>::typename(),
652            "(u64, String, Option<isize>)"
653        );
654        assert_eq!(TestStruct::typename(), "wirevalue::tests::TestStruct");
655        assert_eq!(
656            Vec::<TestStruct>::typename(),
657            "Vec<wirevalue::tests::TestStruct>"
658        );
659    }
660
661    #[test]
662    fn test_ports() {
663        assert_eq!(String::typehash(), 3947244799002047352u64);
664        assert_eq!(String::port(), 13170616835856823160u64);
665        assert_ne!(
666            Vec::<Vec::<Vec::<String>>>::typehash(),
667            Vec::<Vec::<Vec::<Vec::<String>>>>::typehash(),
668        );
669    }
670
671    #[derive(typeuri::Named, Serialize, Deserialize, PartialEq, Eq, Debug)]
672    struct TestDumpStruct {
673        a: String,
674        b: u64,
675        c: Option<i32>,
676        d: Option<Part>,
677    }
678    crate::register_type!(TestDumpStruct);
679
680    #[test]
681    fn test_dump_struct() {
682        let data = TestDumpStruct {
683            a: "hello".to_string(),
684            b: 1234,
685            c: Some(5678),
686            d: None,
687        };
688        let serialized = Any::serialize(&data).unwrap();
689        let serialized_json = serialized.clone().transcode_to_json().unwrap();
690
691        assert!(serialized.encoded.is_multipart());
692        assert!(serialized_json.encoded.is_json());
693
694        let json_string =
695            String::from_utf8(serialized_json.encoded.as_json().unwrap().to_vec().clone()).unwrap();
696        // The serialized data for JSON is just the (compact) JSON string.
697        assert_eq!(
698            json_string,
699            "{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}"
700        );
701
702        for serialized in [serialized, serialized_json] {
703            // Note, at this point, serialized has no knowledge other than its embedded typehash.
704
705            assert_eq!(
706                serialized.typename(),
707                Some("wirevalue::tests::TestDumpStruct")
708            );
709
710            let json = serialized.dump().unwrap();
711            assert_eq!(
712                json,
713                serde_json::json!({
714                    "a": "hello",
715                    "b": 1234,
716                    "c": 5678,
717                    "d": null,
718                })
719            );
720
721            assert_eq!(
722                format!("{}", serialized),
723                "TestDumpStruct{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}",
724            );
725        }
726    }
727
728    #[test]
729    fn test_emplace_prefix() {
730        let config = hyperactor_config::global::lock();
731        let _guard = config.override_key(config::DEFAULT_ENCODING, Encoding::Bincode);
732        let data = TestDumpStruct {
733            a: "hello".to_string(),
734            b: 1234,
735            c: Some(5678),
736            d: None,
737        };
738
739        let mut ser = Any::serialize(&data).unwrap();
740        assert_eq!(ser.prefix::<String>().unwrap(), "hello".to_string());
741
742        ser.emplace_prefix("hello, world, 123!".to_string())
743            .unwrap();
744
745        assert_eq!(
746            ser.deserialized::<TestDumpStruct>().unwrap(),
747            TestDumpStruct {
748                a: "hello, world, 123!".to_string(),
749                b: 1234,
750                c: Some(5678),
751                d: None,
752            }
753        );
754    }
755
756    #[test]
757    fn test_arms() {
758        #[derive(typeuri::Named, Serialize, Deserialize)]
759        enum TestArm {
760            #[allow(dead_code)]
761            A(u32),
762            B,
763            C(),
764            D {
765                #[allow(dead_code)]
766                a: u32,
767                #[allow(dead_code)]
768                b: String,
769            },
770        }
771
772        assert_eq!(TestArm::A(1234).arm(), Some("A"));
773        assert_eq!(TestArm::B.arm(), Some("B"));
774        assert_eq!(TestArm::C().arm(), Some("C"));
775        assert_eq!(
776            TestArm::D {
777                a: 1234,
778                b: "hello".to_string()
779            }
780            .arm(),
781            Some("D")
782        );
783    }
784
785    #[test]
786    fn display_hex() {
787        assert_eq!(
788            format!("{}", HexFmt("hello world".as_bytes())),
789            "CRC:d4a1185 68 65 6c 6c 6f 20 77 6f [...3 bytes]"
790        );
791        assert_eq!(format!("{}", HexFmt("".as_bytes())), "CRC:0");
792        assert_eq!(
793            format!("{}", HexFmt("a very long string that is long".as_bytes())),
794            "CRC:c7e24f62 61 20 76 65 72 79 20 6c [...23 bytes]"
795        );
796    }
797
798    #[test]
799    fn test_json_fmt() {
800        let json_value = serde_json::json!({
801            "name": "test",
802            "number": 42,
803            "nested": {
804                "key": "value"
805            }
806        });
807        // JSON values with short values should print normally
808        assert_eq!(
809            format!("{}", JsonFmt(&json_value)),
810            "{\"name\":\"test\",\"nested\":{\"key\":\"value\"},\"number\":42}",
811        );
812
813        let empty_json = serde_json::json!({});
814        assert_eq!(format!("{}", JsonFmt(&empty_json)), "{}");
815
816        let simple_array = serde_json::json!([1, 2, 3]);
817        assert_eq!(format!("{}", JsonFmt(&simple_array)), "[1,2,3]");
818
819        // JSON values with very long strings should be truncated
820        let long_string_json = serde_json::json!({
821            "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH * 5)
822        });
823        assert_eq!(
824            format!("{}", JsonFmt(&long_string_json)),
825            "{\"long_string\":\"aaaaaaaa[...40 chars] CRC:c95b8a25 61 61 61 61 61 61 61 61 [...32 bytes]\"}"
826        );
827
828        // JSON values with very long arrays should be truncated
829        let long_array_json =
830            serde_json::json!((1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>());
831        assert_eq!(
832            format!("{}", JsonFmt(&long_array_json)),
833            "\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\""
834        );
835
836        // Test for truncation within nested blocks
837        let nested_json = serde_json::json!({
838            "simple_number": 42,
839            "simple_bool": true,
840            "outer": {
841                "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH + 10),
842                "long_array": (1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>(),
843                "inner": {
844                    "simple_value": "short",
845                }
846            }
847        });
848        println!("{}", JsonFmt(&nested_json));
849        assert_eq!(
850            format!("{}", JsonFmt(&nested_json)),
851            "{\"outer\":{\"inner\":{\"simple_value\":\"short\"},\"long_array\":\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\",\"long_string\":\"aaaaaaaa[...18 chars] CRC:b8ac0e31 61 61 61 61 61 61 61 61 [...10 bytes]\"},\"simple_bool\":true,\"simple_number\":42}",
852        );
853    }
854
855    #[test]
856    fn test_json_fmt_utf8_truncation() {
857        // Test that UTF-8 character boundaries are respected during truncation
858        // Create a string with multi-byte characters that would be truncated
859
860        // String with 7 ASCII chars + 4-byte emoji (total 11 bytes, truncates at 8)
861        let utf8_json = serde_json::json!({
862            "emoji": "1234567🦀"  // 7 + 4 = 11 bytes, MAX is 8
863        });
864
865        // Should truncate at byte 7 (before the emoji) to respect UTF-8 boundary
866        let result = format!("{}", JsonFmt(&utf8_json));
867
868        // Verify it doesn't panic and produces valid output
869        assert!(result.contains("1234567"));
870        assert!(!result.contains("🦀")); // Emoji should be truncated away
871
872        // Test with all multi-byte characters
873        let all_multibyte = serde_json::json!({
874            "chinese": "你好世界"  // Each char is 3 bytes = 12 bytes total
875        });
876        let result3 = format!("{}", JsonFmt(&all_multibyte));
877        assert!(!result3.is_empty());
878    }
879
880    #[test]
881    fn test_encodings() {
882        let value = TestDumpStruct {
883            a: "hello, world".to_string(),
884            b: 123,
885            c: Some(321),
886            d: Some(Part::from("hello, world, again")),
887        };
888        for enc in Encoding::iter() {
889            let ser = Any::serialize_with_encoding(enc, &value).unwrap();
890            assert_eq!(ser.encoding(), enc);
891            assert_eq!(ser.deserialized::<TestDumpStruct>().unwrap(), value);
892        }
893    }
894
895    #[test]
896    fn test_broken_any() {
897        let broken = Any::new_broken();
898        assert!(broken.is_broken());
899        assert_eq!(broken.typehash(), BROKEN_TYPEHASH);
900
901        // Normal values are not broken
902        let normal = Any::serialize(&"hello".to_string()).unwrap();
903        assert!(!normal.is_broken());
904
905        // deserialized() should fail for broken values
906        let err = broken.deserialized::<String>().unwrap_err();
907        assert!(err.to_string().contains("broken"));
908    }
909}