wirevalue/
lib.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Wirevalue provides an erased serialization format. [`Any`] is a type-erased
10//! envelope containing a serialized value identified by a [`typeuri::Named`].
11//!
12//! Wirevalues also provide encoding polymorphism, allowing the same representation
13//! to carry multiple serialization formats, and to transcode between them for
14//! types that are registered through [`register_type!`].
15
16use std::any::TypeId;
17use std::collections::HashMap;
18use std::fmt;
19use std::io::Cursor;
20use std::sync::LazyLock;
21
22use enum_as_inner::EnumAsInner;
23use hyperactor_config::AttrValue;
24use serde::Deserialize;
25use serde::Serialize;
26use serde::de::DeserializeOwned;
27pub use typeuri::Named;
28pub use typeuri::intern_typename;
29
30pub mod config;
31
32/// Typehash value indicating a broken (unknown type, no value) Any.
33pub const BROKEN_TYPEHASH: u64 = 0;
34
35#[doc(hidden)]
36/// Dump trait for Named types that are also serializable/deserializable.
37/// This is a utility used by [`Any::dump`], and is not intended
38/// for direct use.
39pub trait NamedDumpable: Named + Serialize + for<'de> Deserialize<'de> {
40    /// Dump the data in Any to a JSON value.
41    fn dump(data: Any) -> Result<serde_json::Value>;
42}
43
44impl<T: Named + Serialize + for<'de> Deserialize<'de>> NamedDumpable for T {
45    fn dump(data: Any) -> Result<serde_json::Value> {
46        let value = data.deserialized::<Self>()?;
47        Ok(serde_json::to_value(value)?)
48    }
49}
50
51#[doc(hidden)]
52#[derive(Debug)]
53pub struct TypeInfo {
54    /// Named::typename()
55    pub typename: fn() -> &'static str,
56    /// Named::typehash()
57    pub typehash: fn() -> u64,
58    /// Named::typeid()
59    pub typeid: fn() -> TypeId,
60    /// Named::typehash()
61    pub port: fn() -> u64,
62    /// A function that can transcode a serialized value to JSON.
63    pub dump: Option<fn(Any) -> Result<serde_json::Value>>,
64    /// Return the arm for this type, if available.
65    pub arm_unchecked: unsafe fn(*const ()) -> Option<&'static str>,
66    /// Return the endpoint name for this message, if available.
67    /// Separate from `arm_unchecked` because struct-typed messages (e.g.,
68    /// PythonMessage) have no enum arm but do carry an endpoint name inside
69    /// their payload. Types that use `register_type!` get a default that
70    /// delegates to `arm_unchecked`, which works for Rust enum handlers.
71    pub endpoint_name: unsafe fn(*const ()) -> Option<String>,
72}
73
74#[allow(dead_code)]
75impl TypeInfo {
76    /// Get the typeinfo for the provided type hash.
77    pub fn get(typehash: u64) -> Option<&'static TypeInfo> {
78        TYPE_INFO.get(&typehash).map(|v| &**v)
79    }
80
81    /// Get the typeinfo for the provided type id.
82    pub fn get_by_typeid(typeid: TypeId) -> Option<&'static TypeInfo> {
83        TYPE_INFO_BY_TYPE_ID.get(&typeid).map(|v| &**v)
84    }
85
86    /// Get the typeinfo for the provided type.
87    pub fn of<T: ?Sized + 'static>() -> Option<&'static TypeInfo> {
88        Self::get_by_typeid(TypeId::of::<T>())
89    }
90
91    /// Get the typename for this type.
92    pub fn typename(&self) -> &'static str {
93        (self.typename)()
94    }
95
96    /// Get the typehash for this type.
97    pub fn typehash(&self) -> u64 {
98        (self.typehash)()
99    }
100
101    /// Get the typeid for this type.
102    pub fn typeid(&self) -> TypeId {
103        (self.typeid)()
104    }
105
106    /// Get the port for this type.
107    pub fn port(&self) -> u64 {
108        (self.port)()
109    }
110
111    /// Dump the serialized data to a JSON value.
112    pub fn dump(&self, data: Any) -> Result<serde_json::Value> {
113        if let Some(dump) = self.dump {
114            (dump)(data)
115        } else {
116            Err(Error::MissingDumper(self.typehash()))
117        }
118    }
119
120    /// Get the arm name for an enum value.
121    ///
122    /// # Safety
123    /// The caller must ensure the value pointer is valid for this type.
124    pub unsafe fn arm_unchecked(&self, value: *const ()) -> Option<&'static str> {
125        // SAFETY: This isn't safe, we're passing it on.
126        unsafe { (self.arm_unchecked)(value) }
127    }
128
129    /// Get the endpoint name for a message value.
130    ///
131    /// # Safety
132    /// The caller must ensure the value pointer is valid for this type.
133    pub unsafe fn endpoint_name(&self, value: *const ()) -> Option<String> {
134        // SAFETY: This isn't safe, we're passing it on.
135        unsafe { (self.endpoint_name)(value) }
136    }
137}
138
139inventory::collect!(TypeInfo);
140
141/// Type infos for all types that have been linked into the binary, keyed by typehash.
142static TYPE_INFO: LazyLock<HashMap<u64, &'static TypeInfo>> = LazyLock::new(|| {
143    inventory::iter::<TypeInfo>()
144        .map(|entry| (entry.typehash(), entry))
145        .collect()
146});
147
148/// Type infos for all types that have been linked into the binary, keyed by typeid.
149static TYPE_INFO_BY_TYPE_ID: LazyLock<HashMap<std::any::TypeId, &'static TypeInfo>> =
150    LazyLock::new(|| {
151        TYPE_INFO
152            .values()
153            .map(|info| (info.typeid(), &**info))
154            .collect()
155    });
156
157/// Register a (concrete) type so that it may be looked up by name or hash. Type registration
158/// is required only to improve diagnostics, as it allows a binary to introspect serialized
159/// payloads under type erasure.
160///
161/// The provided type must implement [`typeuri::Named`], and must be concrete.
162#[macro_export]
163macro_rules! register_type {
164    ($type:ty) => {
165        $crate::submit! {
166            $crate::TypeInfo {
167                typename: <$type as $crate::Named>::typename,
168                typehash: <$type as $crate::Named>::typehash,
169                typeid: <$type as $crate::Named>::typeid,
170                port: <$type as $crate::Named>::port,
171                dump: Some(<$type as $crate::NamedDumpable>::dump),
172                arm_unchecked: <$type as $crate::Named>::arm_unchecked,
173                endpoint_name: |ptr| {
174                    // SAFETY: ptr points to a value of type $type, as guaranteed by the caller.
175                    unsafe { <$type as $crate::Named>::arm_unchecked(ptr).map(|s| s.to_string()) }
176                },
177            }
178        }
179    };
180}
181
182// Re-export inventory::submit for the register_type! macro
183#[doc(hidden)]
184pub use inventory::submit;
185
186/// An enumeration containing the supported encodings of serialized values.
187#[derive(
188    Debug,
189    Clone,
190    Copy,
191    Serialize,
192    Deserialize,
193    PartialEq,
194    Eq,
195    AttrValue,
196    typeuri::Named,
197    strum::EnumIter,
198    strum::Display,
199    strum::EnumString
200)]
201pub enum Encoding {
202    /// Serde bincode encoding.
203    #[strum(to_string = "bincode")]
204    Bincode,
205    /// Serde JSON encoding.
206    #[strum(to_string = "serde_json")]
207    Json,
208    /// Serde multipart encoding.
209    #[strum(to_string = "serde_multipart")]
210    Multipart,
211}
212
213/// The encoding used for a serialized value.
214#[derive(Clone, Serialize, Deserialize, PartialEq, EnumAsInner)]
215enum Encoded {
216    Bincode(bytes::Bytes),
217    Json(bytes::Bytes),
218    Multipart(serde_multipart::Message),
219}
220
221impl Encoded {
222    /// The length of the underlying serialized message
223    pub fn len(&self) -> usize {
224        match &self {
225            Encoded::Bincode(data) => data.len(),
226            Encoded::Json(data) => data.len(),
227            Encoded::Multipart(message) => message.len(),
228        }
229    }
230
231    /// Is the message empty. This should always return false.
232    pub fn is_empty(&self) -> bool {
233        match &self {
234            Encoded::Bincode(data) => data.is_empty(),
235            Encoded::Json(data) => data.is_empty(),
236            Encoded::Multipart(message) => message.is_empty(),
237        }
238    }
239
240    /// Returns the encoding of this serialized value.
241    pub fn encoding(&self) -> Encoding {
242        match &self {
243            Encoded::Bincode(_) => Encoding::Bincode,
244            Encoded::Json(_) => Encoding::Json,
245            Encoded::Multipart(_) => Encoding::Multipart,
246        }
247    }
248
249    /// Computes the 32bit crc of the encoded data
250    pub fn crc(&self) -> u32 {
251        match &self {
252            Encoded::Bincode(data) => crc32fast::hash(data),
253            Encoded::Json(data) => crc32fast::hash(data),
254            Encoded::Multipart(message) => {
255                let mut hasher = crc32fast::Hasher::new();
256                for fragment in message.body().iter() {
257                    hasher.update(fragment);
258                }
259                for part in message.parts() {
260                    for fragment in part.iter() {
261                        hasher.update(fragment);
262                    }
263                }
264                hasher.finalize()
265            }
266        }
267    }
268}
269
270impl std::fmt::Debug for Encoded {
271    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
272        match self {
273            Encoded::Bincode(data) => write!(f, "Encoded::Bincode({})", HexFmt(data)),
274            Encoded::Json(data) => write!(f, "Encoded::Json({})", HexFmt(data)),
275            Encoded::Multipart(message) => {
276                write!(
277                    f,
278                    "Encoded::Multipart(body={}",
279                    HexFmt(&message.body().to_bytes())
280                )?;
281                for (index, part) in message.parts().iter().enumerate() {
282                    write!(f, ", part[{}]={}", index, HexFmt(&part.to_bytes()))?;
283                }
284                write!(f, ")")
285            }
286        }
287    }
288}
289
290/// The type of error returned by operations on [`Any`].
291#[derive(Debug, thiserror::Error)]
292pub enum Error {
293    /// Errors returned from serde bincode.
294    #[error(transparent)]
295    Bincode(#[from] bincode::Error),
296
297    /// Errors returned from serde JSON.
298    #[error(transparent)]
299    Json(#[from] serde_json::Error),
300
301    /// The encoding was not recognized.
302    #[error("unknown encoding: {0}")]
303    InvalidEncoding(String),
304
305    /// Attempted to deserialize a broken Any value.
306    #[error("attempted to deserialize a broken Any value")]
307    BrokenAny,
308
309    /// Type mismatch during deserialization.
310    #[error("type mismatch: expected {expected}, found {actual}")]
311    TypeMismatch {
312        expected: &'static str,
313        actual: String,
314    },
315
316    /// Type info not available for the given typehash.
317    #[error("binary does not have typeinfo for typehash {0}")]
318    MissingTypeInfo(u64),
319
320    /// Dumper not available for the given typehash.
321    #[error("binary does not have dumper for typehash {0}")]
322    MissingDumper(u64),
323
324    /// Operation requires bincode encoding.
325    #[error("only bincode encoding supports prefix operations")]
326    PrefixNotSupported,
327}
328
329/// A specialized Result type for wirevalue operations.
330pub type Result<T> = std::result::Result<T, Error>;
331
332/// Represents a serialized value, wrapping the underlying serialization
333/// and deserialization details, while ensuring that we pass correctly-serialized
334/// message throughout the system.
335///
336/// Currently, Any passes through to bincode, but in the future we may include
337/// content-encoding information to allow for other codecs as well.
338#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
339pub struct Any {
340    /// The encoded data
341    encoded: Encoded,
342    /// The typehash of the serialized value. This is used to provide
343    /// typed introspection of the value.
344    typehash: u64,
345}
346
347impl std::fmt::Display for Any {
348    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
349        match self.dump() {
350            Ok(value) => {
351                // unwrap okay, self.dump() would return Err otherwise.
352                let typename = self.typename().unwrap();
353                // take the basename of the type (e.g. "foo::bar::baz" -> "baz")
354                let basename = typename.split("::").last().unwrap_or(typename);
355                write!(f, "{}{}", basename, JsonFmt(&value))
356            }
357            Err(_) => write!(f, "{:?}", self.encoded),
358        }
359    }
360}
361
362impl Any {
363    /// Construct a new serialized value by serializing the provided T-typed value.
364    /// Serialize uses the default encoding defined by the configuration key
365    /// [`config::DEFAULT_ENCODING`] in the global configuration; use [`serialize_with_encoding`]
366    /// to serialize values with a specific encoding.
367    pub fn serialize<T: Serialize + Named>(value: &T) -> Result<Self> {
368        Self::serialize_with_encoding(
369            hyperactor_config::global::get(config::DEFAULT_ENCODING),
370            value,
371        )
372    }
373
374    /// Serialize U-typed value as a T-typed value. This should be used with care
375    /// (typically only in testing), as the value's representation may be illegally
376    /// coerced.
377    pub fn serialize_as<T: Named, U: Serialize>(value: &U) -> Result<Self> {
378        Self::serialize_with_encoding_as::<T, U>(
379            hyperactor_config::global::get(config::DEFAULT_ENCODING),
380            value,
381        )
382    }
383
384    /// Serialize the value with the using the provided encoding.
385    pub fn serialize_with_encoding<T: Serialize + Named>(
386        encoding: Encoding,
387        value: &T,
388    ) -> Result<Self> {
389        Self::serialize_with_encoding_as::<T, T>(encoding, value)
390    }
391
392    /// Serialize U-typed value as a T-typed value. This should be used with care
393    /// (typically only in testing), as the value's representation may be illegally
394    /// coerced.
395    pub fn serialize_with_encoding_as<T: Named, U: Serialize>(
396        encoding: Encoding,
397        value: &U,
398    ) -> Result<Self> {
399        Ok(Self {
400            encoded: match encoding {
401                Encoding::Bincode => Encoded::Bincode(bincode::serialize(value)?.into()),
402                Encoding::Json => Encoded::Json(serde_json::to_vec(value)?.into()),
403                Encoding::Multipart => {
404                    Encoded::Multipart(serde_multipart::serialize_bincode(value)?)
405                }
406            },
407            typehash: T::typehash(),
408        })
409    }
410
411    /// Create a new broken Any value. A broken value has unknown type and
412    /// no valid data. Attempting to deserialize a broken value will fail.
413    pub fn new_broken() -> Self {
414        Self {
415            encoded: Encoded::Bincode(bytes::Bytes::new()),
416            typehash: BROKEN_TYPEHASH,
417        }
418    }
419
420    /// Returns true if this Any is broken (unknown type, no value).
421    pub fn is_broken(&self) -> bool {
422        self.typehash == BROKEN_TYPEHASH
423    }
424
425    /// Deserialize a value to the provided type T.
426    pub fn deserialized<T: DeserializeOwned + Named>(&self) -> Result<T> {
427        if self.is_broken() {
428            return Err(Error::BrokenAny);
429        }
430        if !self.is::<T>() {
431            return Err(Error::TypeMismatch {
432                expected: T::typename(),
433                actual: self.typename().unwrap_or("unknown").to_string(),
434            });
435        }
436        self.deserialized_unchecked()
437    }
438
439    /// Deserialize a value to the provided type T, without checking for type conformance.
440    /// This should be used carefully, only when you know that the dynamic type check is
441    /// not needed.
442    pub fn deserialized_unchecked<T: DeserializeOwned>(&self) -> Result<T> {
443        match &self.encoded {
444            Encoded::Bincode(data) => Ok(bincode::deserialize(data)?),
445            Encoded::Json(data) => Ok(serde_json::from_slice(data)?),
446            Encoded::Multipart(message) => {
447                Ok(serde_multipart::deserialize_bincode(message.clone())?)
448            }
449        }
450    }
451
452    /// Transcode the serialized value to JSON. This operation will succeed if the type hash
453    /// is embedded in the value, and the corresponding type is available in this binary.
454    pub fn transcode_to_json(self) -> std::result::Result<Self, Self> {
455        match self.encoded {
456            Encoded::Bincode(_) | Encoded::Multipart(_) => {
457                let json_value = match self.dump() {
458                    Ok(json_value) => json_value,
459                    Err(_) => return Err(self),
460                };
461                let json_data = match serde_json::to_vec(&json_value) {
462                    Ok(json_data) => json_data,
463                    Err(_) => return Err(self),
464                };
465                Ok(Self {
466                    encoded: Encoded::Json(json_data.into()),
467                    typehash: self.typehash,
468                })
469            }
470            Encoded::Json(_) => Ok(self),
471        }
472    }
473
474    /// Dump the Any message into a JSON value. This will succeed if: 1) the typehash is embedded
475    /// in the serialized value; 2) the named type is linked into the binary.
476    pub fn dump(&self) -> Result<serde_json::Value> {
477        match &self.encoded {
478            Encoded::Bincode(_) | Encoded::Multipart(_) => {
479                let Some(typeinfo) = TYPE_INFO.get(&self.typehash) else {
480                    return Err(Error::MissingTypeInfo(self.typehash));
481                };
482                typeinfo.dump(self.clone())
483            }
484            Encoded::Json(data) => Ok(serde_json::from_slice(data)?),
485        }
486    }
487
488    /// The encoding used by this serialized value.
489    pub fn encoding(&self) -> Encoding {
490        self.encoded.encoding()
491    }
492
493    /// The typehash of the serialized value.
494    pub fn typehash(&self) -> u64 {
495        self.typehash
496    }
497
498    /// The typename of the serialized value, if available.
499    pub fn typename(&self) -> Option<&'static str> {
500        TYPE_INFO
501            .get(&self.typehash)
502            .map(|typeinfo| typeinfo.typename())
503    }
504
505    /// Deserialize a prefix of the value. This is currently only supported
506    /// for bincode-serialized values.
507    // TODO: we should support this by formalizing the notion of a 'prefix'
508    // serialization, and generalize it to other codecs as well.
509    pub fn prefix<T: DeserializeOwned>(&self) -> Result<T> {
510        match &self.encoded {
511            Encoded::Bincode(data) => Ok(bincode::deserialize(data)?),
512            _ => Err(Error::PrefixNotSupported),
513        }
514    }
515
516    /// Emplace a new prefix to this value. This is currently only supported
517    /// for bincode-serialized values.
518    pub fn emplace_prefix<T: Serialize + DeserializeOwned>(&mut self, prefix: T) -> Result<()> {
519        let data = match &self.encoded {
520            Encoded::Bincode(data) => data,
521            _ => return Err(Error::PrefixNotSupported),
522        };
523
524        // This is a bit ugly, but: we first deserialize out the old prefix,
525        // then serialize the new prefix, then splice the two together.
526        // This is safe because we know that the prefix is the first thing
527        // in the serialized value, and that the serialization format is stable.
528        let mut cursor = Cursor::new(data.clone());
529        let _prefix: T = bincode::deserialize_from(&mut cursor).unwrap();
530        let position = cursor.position() as usize;
531        let suffix = &cursor.into_inner()[position..];
532        let mut data = bincode::serialize(&prefix)?;
533        data.extend_from_slice(suffix);
534        self.encoded = Encoded::Bincode(data.into());
535
536        Ok(())
537    }
538
539    /// The length of the underlying serialized message
540    pub fn len(&self) -> usize {
541        self.encoded.len()
542    }
543
544    /// Is the message empty. This should always return false.
545    pub fn is_empty(&self) -> bool {
546        self.encoded.is_empty()
547    }
548
549    /// Returns the 32bit crc of the serialized data
550    pub fn crc(&self) -> u32 {
551        self.encoded.crc()
552    }
553
554    /// Returns whether this value contains a serialized M-typed value. Returns None
555    /// when type information is unavailable.
556    pub fn is<M: Named>(&self) -> bool {
557        self.typehash == M::typehash()
558    }
559}
560
561const MAX_BYTE_PREVIEW_LENGTH: usize = 8;
562
563fn display_bytes_as_hash(f: &mut impl std::fmt::Write, bytes: &[u8]) -> std::fmt::Result {
564    let hash = crc32fast::hash(bytes);
565    write!(f, "CRC:{:x}", hash)?;
566    // Implementing in this way lets us print without allocating a new intermediate string.
567    for &byte in bytes.iter().take(MAX_BYTE_PREVIEW_LENGTH) {
568        write!(f, " {:x}", byte)?;
569    }
570    if bytes.len() > MAX_BYTE_PREVIEW_LENGTH {
571        write!(f, " [...{} bytes]", bytes.len() - MAX_BYTE_PREVIEW_LENGTH)?;
572    }
573    Ok(())
574}
575
576/// Formats a binary slice as hex when its display function is called.
577pub struct HexFmt<'a>(pub &'a [u8]);
578
579impl std::fmt::Display for HexFmt<'_> {
580    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
581        // calculate a 2 byte checksum to prepend to the message
582        display_bytes_as_hash(f, self.0)
583    }
584}
585
586/// Formats a JSON value for display, printing all keys but
587/// truncating and displaying a hash if the content is too long.
588pub struct JsonFmt<'a>(pub &'a serde_json::Value);
589
590const MAX_JSON_VALUE_DISPLAY_LENGTH: usize = 8;
591
592impl std::fmt::Display for JsonFmt<'_> {
593    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
594        /// Truncate the input string to MAX_JSON_VALUE_DISPLAY_LENGTH and append
595        /// the truncated hash of the full value for easy comparison.
596        fn truncate_and_hash(value_str: &str) -> String {
597            let truncate_at = MAX_JSON_VALUE_DISPLAY_LENGTH.min(value_str.len());
598
599            // Respect UTF-8 boundaries (multi-byte chars like emojis can be up to 4 bytes)
600            let mut safe_truncate_at = truncate_at;
601            while safe_truncate_at > 0 && !value_str.is_char_boundary(safe_truncate_at) {
602                safe_truncate_at -= 1;
603            }
604
605            let truncated_str = &value_str[..safe_truncate_at];
606            let mut result = truncated_str.to_string();
607            result.push_str(&format!("[...{} chars] ", value_str.len()));
608            display_bytes_as_hash(&mut result, value_str.as_bytes()).unwrap();
609            result
610        }
611
612        /// Recursively truncate a serde_json::Value object.
613        fn truncate_json_values(value: &serde_json::Value) -> serde_json::Value {
614            match value {
615                serde_json::Value::String(s) => {
616                    if s.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
617                        serde_json::Value::String(truncate_and_hash(s))
618                    } else {
619                        value.clone()
620                    }
621                }
622                serde_json::Value::Array(arr) => {
623                    let array_str = serde_json::to_string(arr).unwrap();
624                    if array_str.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
625                        serde_json::Value::String(truncate_and_hash(&array_str))
626                    } else {
627                        value.clone()
628                    }
629                }
630                serde_json::Value::Object(obj) => {
631                    let truncated_obj: serde_json::Map<_, _> = obj
632                        .iter()
633                        .map(|(k, v)| (k.clone(), truncate_json_values(v)))
634                        .collect();
635                    serde_json::Value::Object(truncated_obj)
636                }
637                _ => value.clone(),
638            }
639        }
640
641        let truncated = truncate_json_values(self.0);
642        write!(f, "{}", truncated)
643    }
644}
645
646#[cfg(test)]
647mod tests {
648    use serde::Deserialize;
649    use serde::Serialize;
650    use serde_multipart::Part;
651    use strum::IntoEnumIterator;
652    use typeuri::Named;
653
654    use super::*;
655
656    #[derive(typeuri::Named, Serialize, Deserialize)]
657    struct TestStruct;
658
659    #[test]
660    fn test_names() {
661        assert_eq!(String::typename(), "String");
662        assert_eq!(Option::<String>::typename(), "Option<String>");
663        assert_eq!(Vec::<String>::typename(), "Vec<String>");
664        assert_eq!(Vec::<Vec::<String>>::typename(), "Vec<Vec<String>>");
665        assert_eq!(
666            Vec::<Vec::<Vec::<String>>>::typename(),
667            "Vec<Vec<Vec<String>>>"
668        );
669        assert_eq!(
670            <(u64, String, Option::<isize>)>::typename(),
671            "(u64, String, Option<isize>)"
672        );
673        assert_eq!(TestStruct::typename(), "wirevalue::tests::TestStruct");
674        assert_eq!(
675            Vec::<TestStruct>::typename(),
676            "Vec<wirevalue::tests::TestStruct>"
677        );
678    }
679
680    #[test]
681    fn test_ports() {
682        assert_eq!(String::typehash(), 3947244799002047352u64);
683        assert_eq!(String::port(), 13170616835856823160u64);
684        assert_ne!(
685            Vec::<Vec::<Vec::<String>>>::typehash(),
686            Vec::<Vec::<Vec::<Vec::<String>>>>::typehash(),
687        );
688    }
689
690    #[derive(typeuri::Named, Serialize, Deserialize, PartialEq, Eq, Debug)]
691    struct TestDumpStruct {
692        a: String,
693        b: u64,
694        c: Option<i32>,
695        d: Option<Part>,
696    }
697    crate::register_type!(TestDumpStruct);
698
699    #[test]
700    fn test_dump_struct() {
701        let data = TestDumpStruct {
702            a: "hello".to_string(),
703            b: 1234,
704            c: Some(5678),
705            d: None,
706        };
707        let serialized = Any::serialize(&data).unwrap();
708        let serialized_json = serialized.clone().transcode_to_json().unwrap();
709
710        assert!(serialized.encoded.is_multipart());
711        assert!(serialized_json.encoded.is_json());
712
713        let json_string =
714            String::from_utf8(serialized_json.encoded.as_json().unwrap().to_vec().clone()).unwrap();
715        // The serialized data for JSON is just the (compact) JSON string.
716        assert_eq!(
717            json_string,
718            "{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}"
719        );
720
721        for serialized in [serialized, serialized_json] {
722            // Note, at this point, serialized has no knowledge other than its embedded typehash.
723
724            assert_eq!(
725                serialized.typename(),
726                Some("wirevalue::tests::TestDumpStruct")
727            );
728
729            let json = serialized.dump().unwrap();
730            assert_eq!(
731                json,
732                serde_json::json!({
733                    "a": "hello",
734                    "b": 1234,
735                    "c": 5678,
736                    "d": null,
737                })
738            );
739
740            assert_eq!(
741                format!("{}", serialized),
742                "TestDumpStruct{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}",
743            );
744        }
745    }
746
747    #[test]
748    fn test_emplace_prefix() {
749        let config = hyperactor_config::global::lock();
750        let _guard = config.override_key(config::DEFAULT_ENCODING, Encoding::Bincode);
751        let data = TestDumpStruct {
752            a: "hello".to_string(),
753            b: 1234,
754            c: Some(5678),
755            d: None,
756        };
757
758        let mut ser = Any::serialize(&data).unwrap();
759        assert_eq!(ser.prefix::<String>().unwrap(), "hello".to_string());
760
761        ser.emplace_prefix("hello, world, 123!".to_string())
762            .unwrap();
763
764        assert_eq!(
765            ser.deserialized::<TestDumpStruct>().unwrap(),
766            TestDumpStruct {
767                a: "hello, world, 123!".to_string(),
768                b: 1234,
769                c: Some(5678),
770                d: None,
771            }
772        );
773    }
774
775    #[test]
776    fn test_arms() {
777        #[derive(typeuri::Named, Serialize, Deserialize)]
778        enum TestArm {
779            #[allow(dead_code)]
780            A(u32),
781            B,
782            C(),
783            D {
784                #[allow(dead_code)]
785                a: u32,
786                #[allow(dead_code)]
787                b: String,
788            },
789        }
790
791        assert_eq!(TestArm::A(1234).arm(), Some("A"));
792        assert_eq!(TestArm::B.arm(), Some("B"));
793        assert_eq!(TestArm::C().arm(), Some("C"));
794        assert_eq!(
795            TestArm::D {
796                a: 1234,
797                b: "hello".to_string()
798            }
799            .arm(),
800            Some("D")
801        );
802    }
803
804    #[test]
805    fn display_hex() {
806        assert_eq!(
807            format!("{}", HexFmt("hello world".as_bytes())),
808            "CRC:d4a1185 68 65 6c 6c 6f 20 77 6f [...3 bytes]"
809        );
810        assert_eq!(format!("{}", HexFmt("".as_bytes())), "CRC:0");
811        assert_eq!(
812            format!("{}", HexFmt("a very long string that is long".as_bytes())),
813            "CRC:c7e24f62 61 20 76 65 72 79 20 6c [...23 bytes]"
814        );
815    }
816
817    #[test]
818    fn test_json_fmt() {
819        let json_value = serde_json::json!({
820            "name": "test",
821            "number": 42,
822            "nested": {
823                "key": "value"
824            }
825        });
826        // JSON values with short values should print normally
827        assert_eq!(
828            format!("{}", JsonFmt(&json_value)),
829            "{\"name\":\"test\",\"nested\":{\"key\":\"value\"},\"number\":42}",
830        );
831
832        let empty_json = serde_json::json!({});
833        assert_eq!(format!("{}", JsonFmt(&empty_json)), "{}");
834
835        let simple_array = serde_json::json!([1, 2, 3]);
836        assert_eq!(format!("{}", JsonFmt(&simple_array)), "[1,2,3]");
837
838        // JSON values with very long strings should be truncated
839        let long_string_json = serde_json::json!({
840            "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH * 5)
841        });
842        assert_eq!(
843            format!("{}", JsonFmt(&long_string_json)),
844            "{\"long_string\":\"aaaaaaaa[...40 chars] CRC:c95b8a25 61 61 61 61 61 61 61 61 [...32 bytes]\"}"
845        );
846
847        // JSON values with very long arrays should be truncated
848        let long_array_json =
849            serde_json::json!((1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>());
850        assert_eq!(
851            format!("{}", JsonFmt(&long_array_json)),
852            "\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\""
853        );
854
855        // Test for truncation within nested blocks
856        let nested_json = serde_json::json!({
857            "simple_number": 42,
858            "simple_bool": true,
859            "outer": {
860                "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH + 10),
861                "long_array": (1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>(),
862                "inner": {
863                    "simple_value": "short",
864                }
865            }
866        });
867        println!("{}", JsonFmt(&nested_json));
868        assert_eq!(
869            format!("{}", JsonFmt(&nested_json)),
870            "{\"outer\":{\"inner\":{\"simple_value\":\"short\"},\"long_array\":\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\",\"long_string\":\"aaaaaaaa[...18 chars] CRC:b8ac0e31 61 61 61 61 61 61 61 61 [...10 bytes]\"},\"simple_bool\":true,\"simple_number\":42}",
871        );
872    }
873
874    #[test]
875    fn test_json_fmt_utf8_truncation() {
876        // Test that UTF-8 character boundaries are respected during truncation
877        // Create a string with multi-byte characters that would be truncated
878
879        // String with 7 ASCII chars + 4-byte emoji (total 11 bytes, truncates at 8)
880        let utf8_json = serde_json::json!({
881            "emoji": "1234567🦀"  // 7 + 4 = 11 bytes, MAX is 8
882        });
883
884        // Should truncate at byte 7 (before the emoji) to respect UTF-8 boundary
885        let result = format!("{}", JsonFmt(&utf8_json));
886
887        // Verify it doesn't panic and produces valid output
888        assert!(result.contains("1234567"));
889        assert!(!result.contains("🦀")); // Emoji should be truncated away
890
891        // Test with all multi-byte characters
892        let all_multibyte = serde_json::json!({
893            "chinese": "你好世界"  // Each char is 3 bytes = 12 bytes total
894        });
895        let result3 = format!("{}", JsonFmt(&all_multibyte));
896        assert!(!result3.is_empty());
897    }
898
899    #[test]
900    fn test_encodings() {
901        let value = TestDumpStruct {
902            a: "hello, world".to_string(),
903            b: 123,
904            c: Some(321),
905            d: Some(Part::from("hello, world, again")),
906        };
907        for enc in Encoding::iter() {
908            let ser = Any::serialize_with_encoding(enc, &value).unwrap();
909            assert_eq!(ser.encoding(), enc);
910            assert_eq!(ser.deserialized::<TestDumpStruct>().unwrap(), value);
911        }
912    }
913
914    #[test]
915    fn test_broken_any() {
916        let broken = Any::new_broken();
917        assert!(broken.is_broken());
918        assert_eq!(broken.typehash(), BROKEN_TYPEHASH);
919
920        // Normal values are not broken
921        let normal = Any::serialize(&"hello".to_string()).unwrap();
922        assert!(!normal.is_broken());
923
924        // deserialized() should fail for broken values
925        let err = broken.deserialized::<String>().unwrap_err();
926        assert!(err.to_string().contains("broken"));
927    }
928}