hyperactor/
data.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! This module contains core traits and implementation to manage remote data
10//! types in Hyperactor.
11
12use std::any::TypeId;
13use std::collections::HashMap;
14use std::fmt;
15use std::io::Cursor;
16use std::sync::LazyLock;
17
18use enum_as_inner::EnumAsInner;
19use serde::Deserialize;
20use serde::Serialize;
21use serde::de::DeserializeOwned;
22
23use crate as hyperactor;
24use crate::config;
25
26/// A [`Named`] type is a type that has a globally unique name.
27pub trait Named: Sized + 'static {
28    /// The globally unique type name for the type.
29    /// This should typically be the fully qualified Rust name of the type.
30    fn typename() -> &'static str;
31
32    /// A globally unique hash for this type.
33    /// TODO: actually enforce perfect hashing
34    fn typehash() -> u64 {
35        // The `Named` macro overrides this implementation with one that
36        // memoizes the hash.
37        cityhasher::hash(Self::typename())
38    }
39
40    /// The TypeId for this type. TypeIds are unique only within a binary,
41    /// and should not be used for global identification.
42    fn typeid() -> TypeId {
43        TypeId::of::<Self>()
44    }
45
46    /// The globally unique port for this type. Typed ports are in the range
47    /// of 1<<63..1<<64-1.
48    fn port() -> u64 {
49        Self::typehash() | (1 << 63)
50    }
51
52    /// If the named type is an enum, this returns the name of the arm
53    /// of the value self.
54    fn arm(&self) -> Option<&'static str> {
55        None
56    }
57
58    /// An unsafe version of 'arm', accepting a pointer to the value,
59    /// for use in type-erased settings.
60    unsafe fn arm_unchecked(self_: *const ()) -> Option<&'static str> {
61        // SAFETY: This isn't safe. We're passing it on.
62        unsafe { &*(self_ as *const Self) }.arm()
63    }
64}
65
66#[doc(hidden)]
67/// Dump trait for Named types that are also serializable/deserializable.
68/// This is a utility used by [`Serialized::dump`], and is not intended
69/// for direct use.
70pub trait NamedDumpable: Named + Serialize + for<'de> Deserialize<'de> {
71    /// Dump the data in Serialized to a JSON value.
72    fn dump(data: Serialized) -> Result<serde_json::Value, anyhow::Error>;
73}
74
75impl<T: Named + Serialize + for<'de> Deserialize<'de>> NamedDumpable for T {
76    fn dump(data: Serialized) -> Result<serde_json::Value, anyhow::Error> {
77        let value = data.deserialized::<Self>()?;
78        Ok(serde_json::to_value(value)?)
79    }
80}
81
82macro_rules! impl_basic {
83    ($t:ty) => {
84        impl Named for $t {
85            fn typename() -> &'static str {
86                stringify!($t)
87            }
88        }
89    };
90}
91
92impl_basic!(());
93impl_basic!(bool);
94impl_basic!(i8);
95impl_basic!(u8);
96impl_basic!(i16);
97impl_basic!(u16);
98impl_basic!(i32);
99impl_basic!(u32);
100impl_basic!(i64);
101impl_basic!(u64);
102impl_basic!(i128);
103impl_basic!(u128);
104impl_basic!(isize);
105impl_basic!(usize);
106impl_basic!(f32);
107impl_basic!(f64);
108impl_basic!(String);
109impl_basic!(std::net::IpAddr);
110impl_basic!(std::net::Ipv4Addr);
111impl_basic!(std::net::Ipv6Addr);
112impl_basic!(std::time::Duration);
113impl_basic!(std::time::SystemTime);
114impl_basic!(bytes::Bytes);
115// This is somewhat unfortunate. We should separate this module out into
116// its own crate, and just derive(Named) in `ndslice`. As it is, this would
117// create a circular (and heavy!) dependency for `ndslice`.
118impl_basic!(ndslice::Point);
119
120impl Named for &'static str {
121    fn typename() -> &'static str {
122        "&str"
123    }
124}
125
126// A macro that implements type-keyed interning of typenames. This is useful
127// for implementing [`Named`] for generic types.
128#[doc(hidden)] // not part of the public API
129#[macro_export]
130macro_rules! intern_typename {
131    ($key:ty, $format_string:expr, $($args:ty),+) => {
132        {
133            static CACHE: std::sync::LazyLock<$crate::dashmap::DashMap<std::any::TypeId, &'static str>> =
134              std::sync::LazyLock::new($crate::dashmap::DashMap::new);
135
136            match CACHE.entry(std::any::TypeId::of::<$key>()) {
137                $crate::dashmap::mapref::entry::Entry::Vacant(entry) => {
138                    let typename = format!($format_string, $(<$args>::typename()),+).leak();
139                    entry.insert(typename);
140                    typename
141                }
142                $crate::dashmap::mapref::entry::Entry::Occupied(entry) => *entry.get(),
143            }
144        }
145    };
146}
147pub use intern_typename;
148
149macro_rules! tuple_format_string {
150    ($a:ident,) => { "{}" };
151    ($a:ident, $($rest_a:ident,)+) => { concat!("{}, ", tuple_format_string!($($rest_a,)+)) };
152}
153
154macro_rules! impl_tuple_peel {
155    ($name:ident, $($other:ident,)*) => (impl_tuple! { $($other,)* })
156}
157
158macro_rules! impl_tuple {
159    () => ();
160    ( $($name:ident,)+ ) => (
161        impl<$($name:Named + 'static),+> Named for ($($name,)+) {
162            fn typename() -> &'static str {
163                intern_typename!(Self, concat!("(", tuple_format_string!($($name,)+), ")"), $($name),+)
164            }
165        }
166        impl_tuple_peel! { $($name,)+ }
167    )
168}
169
170impl_tuple! { E, D, C, B, A, Z, Y, X, W, V, U, T, }
171
172impl<T: Named + 'static> Named for Option<T> {
173    fn typename() -> &'static str {
174        intern_typename!(Self, "Option<{}>", T)
175    }
176}
177
178impl<T: Named + 'static> Named for Vec<T> {
179    fn typename() -> &'static str {
180        intern_typename!(Self, "Vec<{}>", T)
181    }
182}
183
184impl<K: Named + 'static, V: Named + 'static> Named for HashMap<K, V> {
185    fn typename() -> &'static str {
186        intern_typename!(Self, "HashMap<{}, {}>", K, V)
187    }
188}
189
190impl<T: Named + 'static, E: Named + 'static> Named for Result<T, E> {
191    fn typename() -> &'static str {
192        intern_typename!(Self, "Result<{}, {}>", T, E)
193    }
194}
195
196static SHAPE_CACHED_TYPEHASH: LazyLock<u64> =
197    LazyLock::new(|| cityhasher::hash(<ndslice::shape::Shape as Named>::typename()));
198
199impl Named for ndslice::shape::Shape {
200    fn typename() -> &'static str {
201        "ndslice::shape::Shape"
202    }
203
204    fn typehash() -> u64 {
205        *SHAPE_CACHED_TYPEHASH
206    }
207}
208
209/// Really internal, but needs to be exposed for macro.
210#[doc(hidden)]
211#[derive(Debug)]
212pub struct TypeInfo {
213    /// Named::typename()
214    pub typename: fn() -> &'static str,
215    /// Named::typehash()
216    pub typehash: fn() -> u64,
217    /// Named::typeid()
218    pub typeid: fn() -> TypeId,
219    /// Named::typehash()
220    pub port: fn() -> u64,
221    /// A function that can transcode a serialized value to JSON.
222    pub dump: Option<fn(Serialized) -> Result<serde_json::Value, anyhow::Error>>,
223    /// Return the arm for this type, if available.
224    pub arm_unchecked: unsafe fn(*const ()) -> Option<&'static str>,
225}
226
227#[allow(dead_code)]
228impl TypeInfo {
229    /// Get the typeinfo for the provided type hash.
230    pub(crate) fn get(typehash: u64) -> Option<&'static TypeInfo> {
231        TYPE_INFO.get(&typehash).map(|v| &**v)
232    }
233
234    /// Get the typeinfo for the provided type id.
235    pub(crate) fn get_by_typeid(typeid: TypeId) -> Option<&'static TypeInfo> {
236        TYPE_INFO_BY_TYPE_ID.get(&typeid).map(|v| &**v)
237    }
238
239    /// Get the typeinfo for the provided type.
240    pub(crate) fn of<T: ?Sized + 'static>() -> Option<&'static TypeInfo> {
241        Self::get_by_typeid(TypeId::of::<T>())
242    }
243
244    pub(crate) fn typename(&self) -> &'static str {
245        (self.typename)()
246    }
247    pub(crate) fn typehash(&self) -> u64 {
248        (self.typehash)()
249    }
250    pub(crate) fn typeid(&self) -> TypeId {
251        (self.typeid)()
252    }
253    pub(crate) fn port(&self) -> u64 {
254        (self.port)()
255    }
256    pub(crate) fn dump(&self, data: Serialized) -> Result<serde_json::Value, anyhow::Error> {
257        if let Some(dump) = self.dump {
258            (dump)(data)
259        } else {
260            anyhow::bail!("binary does not have dumper for {}", self.typehash())
261        }
262    }
263    pub(crate) unsafe fn arm_unchecked(&self, value: *const ()) -> Option<&'static str> {
264        // SAFETY: This isn't safe, we're passing it on.
265        unsafe { (self.arm_unchecked)(value) }
266    }
267}
268
269inventory::collect!(TypeInfo);
270
271/// Type infos for all types that have been linked into the binary, keyed by typehash.
272static TYPE_INFO: LazyLock<HashMap<u64, &'static TypeInfo>> = LazyLock::new(|| {
273    inventory::iter::<TypeInfo>()
274        .map(|entry| (entry.typehash(), entry))
275        .collect()
276});
277
278/// Type infos for all types that have been linked into the binary, keyed by typeid.
279static TYPE_INFO_BY_TYPE_ID: LazyLock<HashMap<std::any::TypeId, &'static TypeInfo>> =
280    LazyLock::new(|| {
281        TYPE_INFO
282            .values()
283            .map(|info| (info.typeid(), &**info))
284            .collect()
285    });
286
287/// Register a (concrete) type so that it may be looked up by name or hash. Type registration
288/// is required only to improve diagnostics, as it allows a binary to introspect serialized
289/// payloads under type erasure.
290///
291/// The provided type must implement [`hyperactor::data::Named`], and must be concrete.
292#[macro_export]
293macro_rules! register_type {
294    ($type:ty) => {
295        hyperactor::submit! {
296            hyperactor::data::TypeInfo {
297                typename: <$type as hyperactor::data::Named>::typename,
298                typehash: <$type as hyperactor::data::Named>::typehash,
299                typeid: <$type as hyperactor::data::Named>::typeid,
300                port: <$type as hyperactor::data::Named>::port,
301                dump: Some(<$type as hyperactor::data::NamedDumpable>::dump),
302                arm_unchecked: <$type as hyperactor::data::Named>::arm_unchecked,
303            }
304        }
305    };
306}
307
308/// An enumeration containing the supported encodings of Serialized
309/// values.
310#[derive(
311    Debug,
312    Clone,
313    Copy,
314    Serialize,
315    Deserialize,
316    PartialEq,
317    Eq,
318    crate::AttrValue,
319    crate::Named,
320    strum::EnumIter,
321    strum::Display,
322    strum::EnumString
323)]
324pub enum Encoding {
325    /// Serde bincode encoding.
326    #[strum(to_string = "bincode")]
327    Bincode,
328    /// Serde JSON encoding.
329    #[strum(to_string = "serde_json")]
330    Json,
331    /// Serde multipart encoding.
332    #[strum(to_string = "serde_multipart")]
333    Multipart,
334}
335
336/// The encoding used for a serialized value.
337#[derive(Clone, Serialize, Deserialize, PartialEq, EnumAsInner)]
338enum Encoded {
339    Bincode(bytes::Bytes),
340    Json(bytes::Bytes),
341    Multipart(serde_multipart::Message),
342}
343
344impl Encoded {
345    /// The length of the underlying serialized message
346    pub fn len(&self) -> usize {
347        match &self {
348            Encoded::Bincode(data) => data.len(),
349            Encoded::Json(data) => data.len(),
350            Encoded::Multipart(message) => message.len(),
351        }
352    }
353
354    /// Is the message empty. This should always return false.
355    pub fn is_empty(&self) -> bool {
356        match &self {
357            Encoded::Bincode(data) => data.is_empty(),
358            Encoded::Json(data) => data.is_empty(),
359            Encoded::Multipart(message) => message.is_empty(),
360        }
361    }
362
363    /// Returns the encoding of this serialized value.
364    pub fn encoding(&self) -> Encoding {
365        match &self {
366            Encoded::Bincode(_) => Encoding::Bincode,
367            Encoded::Json(_) => Encoding::Json,
368            Encoded::Multipart(_) => Encoding::Multipart,
369        }
370    }
371
372    /// Computes the 32bit crc of the encoded data
373    pub fn crc(&self) -> u32 {
374        match &self {
375            Encoded::Bincode(data) => crc32fast::hash(data),
376            Encoded::Json(data) => crc32fast::hash(data),
377            Encoded::Multipart(message) => {
378                let mut hasher = crc32fast::Hasher::new();
379                hasher.update(message.body().as_ref());
380                for part in message.parts() {
381                    hasher.update(part.as_ref());
382                }
383                hasher.finalize()
384            }
385        }
386    }
387}
388
389impl std::fmt::Debug for Encoded {
390    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
391        match self {
392            Encoded::Bincode(data) => write!(f, "Encoded::Bincode({})", HexFmt(data)),
393            Encoded::Json(data) => write!(f, "Encoded::Json({})", HexFmt(data)),
394            Encoded::Multipart(message) => {
395                write!(
396                    f,
397                    "Encoded::Multipart(illegal?={} body={}",
398                    message.is_illegal(),
399                    HexFmt(message.body())
400                )?;
401                for (index, part) in message.parts().iter().enumerate() {
402                    write!(f, ", part[{}]={}", index, HexFmt(part))?;
403                }
404                write!(f, ")")
405            }
406        }
407    }
408}
409
410/// The type of error returned by operations on [`Serialized`].
411#[derive(Debug, thiserror::Error)]
412pub enum Error {
413    /// Errors returned from serde bincode.
414    #[error(transparent)]
415    Bincode(#[from] bincode::Error),
416
417    /// Errors returned from serde JSON.
418    #[error(transparent)]
419    Json(#[from] serde_json::Error),
420
421    /// The encoding was not recognized.
422    #[error("unknown encoding: {0}")]
423    InvalidEncoding(String),
424}
425
426/// Represents a serialized value, wrapping the underlying serialization
427/// and deserialization details, while ensuring that we pass correctly-serialized
428/// message throughout the system.
429///
430/// Currently, Serialized passes through to bincode, but in the future we may include
431/// content-encoding information to allow for other codecs as well.
432#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
433pub struct Serialized {
434    /// The encoded data
435    encoded: Encoded,
436    /// The typehash of the serialized value. This is used to provide
437    /// typed introspection of the value.
438    typehash: u64,
439}
440
441impl std::fmt::Display for Serialized {
442    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
443        match self.dump() {
444            Ok(value) => {
445                // unwrap okay, self.dump() would return Err otherwise.
446                let typename = self.typename().unwrap();
447                // take the basename of the type (e.g. "foo::bar::baz" -> "baz")
448                let basename = typename.split("::").last().unwrap_or(typename);
449                write!(f, "{}{}", basename, JsonFmt(&value))
450            }
451            Err(_) => write!(f, "{:?}", self.encoded),
452        }
453    }
454}
455
456impl Serialized {
457    /// Construct a new serialized value by serializing the provided T-typed value.
458    /// Serialize uses the default encoding defined by the configuration key
459    /// [`config::DEFAULT_ENCODING`] in the global configuration; use [`serialize_with_encoding`]
460    /// to serialize values with a specific encoding.
461    pub fn serialize<T: Serialize + Named>(value: &T) -> Result<Self, Error> {
462        Self::serialize_with_encoding(config::global::get(config::DEFAULT_ENCODING), value)
463    }
464
465    /// Serialize U-typed value as a T-typed value. This should be used with care
466    /// (typically only in testing), as the value's representation may be illegally
467    /// coerced.
468    pub fn serialize_as<T: Named, U: Serialize>(value: &U) -> Result<Self, Error> {
469        Self::serialize_with_encoding_as::<T, U>(
470            config::global::get(config::DEFAULT_ENCODING),
471            value,
472        )
473    }
474
475    /// Serialize the value with the using the provided encoding.
476    pub fn serialize_with_encoding<T: Serialize + Named>(
477        encoding: Encoding,
478        value: &T,
479    ) -> Result<Self, Error> {
480        Self::serialize_with_encoding_as::<T, T>(encoding, value)
481    }
482
483    /// Serialize U-typed value as a T-typed value. This should be used with care
484    /// (typically only in testing), as the value's representation may be illegally
485    /// coerced.
486    pub fn serialize_with_encoding_as<T: Named, U: Serialize>(
487        encoding: Encoding,
488        value: &U,
489    ) -> Result<Self, Error> {
490        Ok(Self {
491            encoded: match encoding {
492                Encoding::Bincode => Encoded::Bincode(bincode::serialize(value)?.into()),
493                Encoding::Json => Encoded::Json(serde_json::to_vec(value)?.into()),
494                Encoding::Multipart => {
495                    Encoded::Multipart(serde_multipart::serialize_bincode(value)?)
496                }
497            },
498            typehash: T::typehash(),
499        })
500    }
501
502    /// Deserialize a value to the provided type T.
503    pub fn deserialized<T: DeserializeOwned + Named>(&self) -> Result<T, anyhow::Error> {
504        anyhow::ensure!(
505            self.is::<T>(),
506            "attempted to serialize {}-typed serialized into type {}",
507            self.typename().unwrap_or("unknown"),
508            T::typename()
509        );
510        self.deserialized_unchecked()
511    }
512
513    /// Deserialize a value to the provided type T, without checking for type conformance.
514    /// This should be used carefully, only when you know that the dynamic type check is
515    /// not needed.
516    pub fn deserialized_unchecked<T: DeserializeOwned>(&self) -> Result<T, anyhow::Error> {
517        match &self.encoded {
518            Encoded::Bincode(data) => bincode::deserialize(data).map_err(anyhow::Error::from),
519            Encoded::Json(data) => serde_json::from_slice(data).map_err(anyhow::Error::from),
520            Encoded::Multipart(message) => {
521                serde_multipart::deserialize_bincode(message.clone()).map_err(anyhow::Error::from)
522            }
523        }
524    }
525
526    /// Transcode the serialized value to JSON. This operation will succeed if the type hash
527    /// is embedded in the value, and the corresponding type is available in this binary.
528    pub fn transcode_to_json(self) -> Result<Self, Self> {
529        match self.encoded {
530            Encoded::Bincode(_) | Encoded::Multipart(_) => {
531                let json_value = match self.dump() {
532                    Ok(json_value) => json_value,
533                    Err(_) => return Err(self),
534                };
535                let json_data = match serde_json::to_vec(&json_value) {
536                    Ok(json_data) => json_data,
537                    Err(_) => return Err(self),
538                };
539                Ok(Self {
540                    encoded: Encoded::Json(json_data.into()),
541                    typehash: self.typehash,
542                })
543            }
544            Encoded::Json(_) => Ok(self),
545        }
546    }
547
548    /// Dump the Serialized message into a JSON value. This will succeed if: 1) the typehash is embedded
549    /// in the serialized value; 2) the named type is linked into the binary.
550    pub fn dump(&self) -> Result<serde_json::Value, anyhow::Error> {
551        match &self.encoded {
552            Encoded::Bincode(_) | Encoded::Multipart(_) => {
553                let Some(typeinfo) = TYPE_INFO.get(&self.typehash) else {
554                    anyhow::bail!("binary does not have typeinfo for {}", self.typehash);
555                };
556                typeinfo.dump(self.clone())
557            }
558            Encoded::Json(data) => serde_json::from_slice(data).map_err(anyhow::Error::from),
559        }
560    }
561
562    /// The encoding used by this serialized value.
563    pub fn encoding(&self) -> Encoding {
564        self.encoded.encoding()
565    }
566
567    /// The typehash of the serialized value.
568    pub fn typehash(&self) -> u64 {
569        self.typehash
570    }
571
572    /// The typename of the serialized value, if available.
573    pub fn typename(&self) -> Option<&'static str> {
574        TYPE_INFO
575            .get(&self.typehash)
576            .map(|typeinfo| typeinfo.typename())
577    }
578
579    /// Deserialize a prefix of the value. This is currently only supported
580    /// for bincode-serialized values.
581    // TODO: we should support this by formalizing the notion of a 'prefix'
582    // serialization, and generalize it to other codecs as well.
583    pub fn prefix<T: DeserializeOwned>(&self) -> Result<T, anyhow::Error> {
584        match &self.encoded {
585            Encoded::Bincode(data) => bincode::deserialize(data).map_err(anyhow::Error::from),
586            _ => anyhow::bail!("only bincode supports prefix emplacement"),
587        }
588    }
589
590    /// Emplace a new prefix to this value. This is currently only supported
591    /// for bincode-serialized values.
592    pub fn emplace_prefix<T: Serialize + DeserializeOwned>(
593        &mut self,
594        prefix: T,
595    ) -> Result<(), anyhow::Error> {
596        let data = match &self.encoded {
597            Encoded::Bincode(data) => data,
598            _ => anyhow::bail!("only bincode supports prefix emplacement"),
599        };
600
601        // This is a bit ugly, but: we first deserialize out the old prefix,
602        // then serialize the new prefix, then splice the two together.
603        // This is safe because we know that the prefix is the first thing
604        // in the serialized value, and that the serialization format is stable.
605        let mut cursor = Cursor::new(data.clone());
606        let _prefix: T = bincode::deserialize_from(&mut cursor).unwrap();
607        let position = cursor.position() as usize;
608        let suffix = &cursor.into_inner()[position..];
609        let mut data = bincode::serialize(&prefix)?;
610        data.extend_from_slice(suffix);
611        self.encoded = Encoded::Bincode(data.into());
612
613        Ok(())
614    }
615
616    /// The length of the underlying serialized message
617    pub fn len(&self) -> usize {
618        self.encoded.len()
619    }
620
621    /// Is the message empty. This should always return false.
622    pub fn is_empty(&self) -> bool {
623        self.encoded.is_empty()
624    }
625
626    /// Returns the 32bit crc of the serialized data
627    pub fn crc(&self) -> u32 {
628        self.encoded.crc()
629    }
630
631    /// Returns whether this value contains a serialized M-typed value. Returns None
632    /// when type information is unavailable.
633    pub fn is<M: Named>(&self) -> bool {
634        self.typehash == M::typehash()
635    }
636}
637
638const MAX_BYTE_PREVIEW_LENGTH: usize = 8;
639
640fn display_bytes_as_hash(f: &mut impl std::fmt::Write, bytes: &[u8]) -> std::fmt::Result {
641    let hash = crc32fast::hash(bytes);
642    write!(f, "CRC:{:x}", hash)?;
643    // Implementing in this way lets us print without allocating a new intermediate string.
644    for &byte in bytes.iter().take(MAX_BYTE_PREVIEW_LENGTH) {
645        write!(f, " {:x}", byte)?;
646    }
647    if bytes.len() > MAX_BYTE_PREVIEW_LENGTH {
648        write!(f, " [...{} bytes]", bytes.len() - MAX_BYTE_PREVIEW_LENGTH)?;
649    }
650    Ok(())
651}
652
653/// Formats a binary slice as hex when its display function is called.
654pub struct HexFmt<'a>(pub &'a [u8]);
655
656impl<'a> std::fmt::Display for HexFmt<'a> {
657    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
658        // calculate a 2 byte checksum to prepend to the message
659        display_bytes_as_hash(f, self.0)
660    }
661}
662
663/// Formats a JSON value for display, printing all keys but
664/// truncating and displaying a hash if the content is too long.
665pub struct JsonFmt<'a>(pub &'a serde_json::Value);
666
667const MAX_JSON_VALUE_DISPLAY_LENGTH: usize = 8;
668
669impl<'a> std::fmt::Display for JsonFmt<'a> {
670    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
671        /// Truncate the input string to MAX_JSON_VALUE_DISPLAY_LENGTH and append
672        /// the truncated hash of the full value for easy comparison.
673        fn truncate_and_hash(value_str: &str) -> String {
674            let truncated_str = &value_str[..MAX_JSON_VALUE_DISPLAY_LENGTH];
675            let mut result = truncated_str.to_string();
676            result.push_str(&format!("[...{} chars] ", value_str.len()));
677            display_bytes_as_hash(&mut result, value_str.as_bytes()).unwrap();
678            result
679        }
680
681        /// Recursively truncate a serde_json::Value object.
682        fn truncate_json_values(value: &serde_json::Value) -> serde_json::Value {
683            match value {
684                serde_json::Value::String(s) => {
685                    if s.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
686                        serde_json::Value::String(truncate_and_hash(s))
687                    } else {
688                        value.clone()
689                    }
690                }
691                serde_json::Value::Array(arr) => {
692                    let array_str = serde_json::to_string(arr).unwrap();
693                    if array_str.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
694                        serde_json::Value::String(truncate_and_hash(&array_str))
695                    } else {
696                        value.clone()
697                    }
698                }
699                serde_json::Value::Object(obj) => {
700                    let truncated_obj: serde_json::Map<_, _> = obj
701                        .iter()
702                        .map(|(k, v)| (k.clone(), truncate_json_values(v)))
703                        .collect();
704                    serde_json::Value::Object(truncated_obj)
705                }
706                _ => value.clone(),
707            }
708        }
709
710        let truncated = truncate_json_values(self.0);
711        write!(f, "{}", truncated)
712    }
713}
714
715#[cfg(test)]
716mod tests {
717
718    use serde::Deserialize;
719    use serde::Serialize;
720    use serde_multipart::Part;
721    use strum::IntoEnumIterator;
722
723    use super::*;
724    use crate as hyperactor; // for macros
725    use crate::Named;
726
727    #[derive(Named, Serialize, Deserialize)]
728    struct TestStruct;
729
730    #[test]
731    fn test_names() {
732        assert_eq!(String::typename(), "String");
733        assert_eq!(Option::<String>::typename(), "Option<String>");
734        assert_eq!(Vec::<String>::typename(), "Vec<String>");
735        assert_eq!(Vec::<Vec::<String>>::typename(), "Vec<Vec<String>>");
736        assert_eq!(
737            Vec::<Vec::<Vec::<String>>>::typename(),
738            "Vec<Vec<Vec<String>>>"
739        );
740        assert_eq!(
741            <(u64, String, Option::<isize>)>::typename(),
742            "(u64, String, Option<isize>)"
743        );
744        assert_eq!(
745            TestStruct::typename(),
746            "hyperactor::data::tests::TestStruct"
747        );
748        assert_eq!(
749            Vec::<TestStruct>::typename(),
750            "Vec<hyperactor::data::tests::TestStruct>"
751        );
752    }
753
754    #[test]
755    fn test_ports() {
756        assert_eq!(String::typehash(), 3947244799002047352u64);
757        assert_eq!(String::port(), 13170616835856823160u64);
758        assert_ne!(
759            Vec::<Vec::<Vec::<String>>>::typehash(),
760            Vec::<Vec::<Vec::<Vec::<String>>>>::typehash(),
761        );
762    }
763
764    #[derive(Named, Serialize, Deserialize, PartialEq, Eq, Debug)]
765    struct TestDumpStruct {
766        a: String,
767        b: u64,
768        c: Option<i32>,
769        d: Option<Part>,
770    }
771    crate::register_type!(TestDumpStruct);
772
773    #[test]
774    fn test_dump_struct() {
775        let data = TestDumpStruct {
776            a: "hello".to_string(),
777            b: 1234,
778            c: Some(5678),
779            d: None,
780        };
781        let serialized = Serialized::serialize(&data).unwrap();
782        let serialized_json = serialized.clone().transcode_to_json().unwrap();
783
784        assert!(serialized.encoded.is_multipart());
785        assert!(serialized_json.encoded.is_json());
786
787        let json_string =
788            String::from_utf8(serialized_json.encoded.as_json().unwrap().to_vec().clone()).unwrap();
789        // The serialized data for JSON is just the (compact) JSON string.
790        assert_eq!(
791            json_string,
792            "{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}"
793        );
794
795        for serialized in [serialized, serialized_json] {
796            // Note, at this point, serialized has no knowledge other than its embedded typehash.
797
798            assert_eq!(
799                serialized.typename(),
800                Some("hyperactor::data::tests::TestDumpStruct")
801            );
802
803            let json = serialized.dump().unwrap();
804            assert_eq!(
805                json,
806                serde_json::json!({
807                    "a": "hello",
808                    "b": 1234,
809                    "c": 5678,
810                    "d": null,
811                })
812            );
813
814            assert_eq!(
815                format!("{}", serialized),
816                "TestDumpStruct{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}",
817            );
818        }
819    }
820
821    #[test]
822    fn test_emplace_prefix() {
823        let config = config::global::lock();
824        let _guard = config.override_key(config::DEFAULT_ENCODING, Encoding::Bincode);
825        let data = TestDumpStruct {
826            a: "hello".to_string(),
827            b: 1234,
828            c: Some(5678),
829            d: None,
830        };
831
832        let mut ser = Serialized::serialize(&data).unwrap();
833        assert_eq!(ser.prefix::<String>().unwrap(), "hello".to_string());
834
835        ser.emplace_prefix("hello, world, 123!".to_string())
836            .unwrap();
837
838        assert_eq!(
839            ser.deserialized::<TestDumpStruct>().unwrap(),
840            TestDumpStruct {
841                a: "hello, world, 123!".to_string(),
842                b: 1234,
843                c: Some(5678),
844                d: None,
845            }
846        );
847    }
848
849    #[test]
850    fn test_arms() {
851        #[derive(Named, Serialize, Deserialize)]
852        enum TestArm {
853            #[allow(dead_code)]
854            A(u32),
855            B,
856            C(),
857            D {
858                #[allow(dead_code)]
859                a: u32,
860                #[allow(dead_code)]
861                b: String,
862            },
863        }
864
865        assert_eq!(TestArm::A(1234).arm(), Some("A"));
866        assert_eq!(TestArm::B.arm(), Some("B"));
867        assert_eq!(TestArm::C().arm(), Some("C"));
868        assert_eq!(
869            TestArm::D {
870                a: 1234,
871                b: "hello".to_string()
872            }
873            .arm(),
874            Some("D")
875        );
876    }
877
878    #[test]
879    fn display_hex() {
880        assert_eq!(
881            format!("{}", HexFmt("hello world".as_bytes())),
882            "CRC:d4a1185 68 65 6c 6c 6f 20 77 6f [...3 bytes]"
883        );
884        assert_eq!(format!("{}", HexFmt("".as_bytes())), "CRC:0");
885        assert_eq!(
886            format!("{}", HexFmt("a very long string that is long".as_bytes())),
887            "CRC:c7e24f62 61 20 76 65 72 79 20 6c [...23 bytes]"
888        );
889    }
890
891    #[test]
892    fn test_json_fmt() {
893        let json_value = serde_json::json!({
894            "name": "test",
895            "number": 42,
896            "nested": {
897                "key": "value"
898            }
899        });
900        // JSON values with short values should print normally
901        assert_eq!(
902            format!("{}", JsonFmt(&json_value)),
903            "{\"name\":\"test\",\"nested\":{\"key\":\"value\"},\"number\":42}",
904        );
905
906        let empty_json = serde_json::json!({});
907        assert_eq!(format!("{}", JsonFmt(&empty_json)), "{}");
908
909        let simple_array = serde_json::json!([1, 2, 3]);
910        assert_eq!(format!("{}", JsonFmt(&simple_array)), "[1,2,3]");
911
912        // JSON values with very long strings should be truncated
913        let long_string_json = serde_json::json!({
914            "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH * 5)
915        });
916        assert_eq!(
917            format!("{}", JsonFmt(&long_string_json)),
918            "{\"long_string\":\"aaaaaaaa[...40 chars] CRC:c95b8a25 61 61 61 61 61 61 61 61 [...32 bytes]\"}"
919        );
920
921        // JSON values with very long arrays should be truncated
922        let long_array_json =
923            serde_json::json!((1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>());
924        assert_eq!(
925            format!("{}", JsonFmt(&long_array_json)),
926            "\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\""
927        );
928
929        // Test for truncation within nested blocks
930        let nested_json = serde_json::json!({
931            "simple_number": 42,
932            "simple_bool": true,
933            "outer": {
934                "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH + 10),
935                "long_array": (1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>(),
936                "inner": {
937                    "simple_value": "short",
938                }
939            }
940        });
941        println!("{}", JsonFmt(&nested_json));
942        assert_eq!(
943            format!("{}", JsonFmt(&nested_json)),
944            "{\"outer\":{\"inner\":{\"simple_value\":\"short\"},\"long_array\":\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\",\"long_string\":\"aaaaaaaa[...18 chars] CRC:b8ac0e31 61 61 61 61 61 61 61 61 [...10 bytes]\"},\"simple_bool\":true,\"simple_number\":42}",
945        );
946    }
947
948    #[test]
949    fn test_encodings() {
950        let value = TestDumpStruct {
951            a: "hello, world".to_string(),
952            b: 123,
953            c: Some(321),
954            d: Some(Part::from("hello, world, again")),
955        };
956        for enc in Encoding::iter() {
957            let ser = Serialized::serialize_with_encoding(enc, &value).unwrap();
958            assert_eq!(ser.encoding(), enc);
959            assert_eq!(ser.deserialized::<TestDumpStruct>().unwrap(), value);
960        }
961    }
962}