Skip to main content

wirevalue/
lib.rs

1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9//! Wirevalue provides an erased serialization format. [`Any`] is a type-erased
10//! envelope containing a serialized value identified by a [`typeuri::Named`].
11//!
12//! Wirevalues also provide encoding polymorphism, allowing the same representation
13//! to carry multiple serialization formats, and to transcode between them for
14//! types that are registered through [`register_type!`].
15
16use std::any::TypeId;
17use std::collections::HashMap;
18use std::fmt;
19use std::io::Cursor;
20use std::sync::LazyLock;
21
22use enum_as_inner::EnumAsInner;
23use hyperactor_config::AttrValue;
24use serde::Deserialize;
25use serde::Serialize;
26use serde::de::DeserializeOwned;
27pub use typeuri::Named;
28pub use typeuri::intern_typename;
29
30pub mod config;
31
32/// Typehash value indicating a broken (unknown type, no value) Any.
33pub const BROKEN_TYPEHASH: u64 = 0;
34
35#[doc(hidden)]
36/// Dump trait for Named types that are also serializable/deserializable.
37/// This is a utility used by [`Any::dump`], and is not intended
38/// for direct use.
39pub trait NamedDumpable: Named + Serialize + for<'de> Deserialize<'de> {
40    /// Dump the data in Any to a JSON value.
41    fn dump(data: Any) -> Result<serde_json::Value>;
42}
43
44impl<T: Named + Serialize + for<'de> Deserialize<'de>> NamedDumpable for T {
45    fn dump(data: Any) -> Result<serde_json::Value> {
46        let value = data.deserialized::<Self>()?;
47        Ok(serde_json::to_value(value)?)
48    }
49}
50
51#[doc(hidden)]
52#[derive(Debug)]
53pub struct TypeInfo {
54    /// Named::typename()
55    pub typename: fn() -> &'static str,
56    /// Named::typehash()
57    pub typehash: fn() -> u64,
58    /// Named::typeid()
59    pub typeid: fn() -> TypeId,
60    /// Named::typehash()
61    pub port: fn() -> u64,
62    /// A function that can transcode a serialized value to JSON.
63    pub dump: Option<fn(Any) -> Result<serde_json::Value>>,
64    /// Return the arm for this type, if available.
65    pub arm_unchecked: unsafe fn(*const ()) -> Option<&'static str>,
66    /// Return the endpoint name for this message, if available.
67    /// Separate from `arm_unchecked` because struct-typed messages (e.g.,
68    /// PythonMessage) have no enum arm but do carry an endpoint name inside
69    /// their payload. Types that use `register_type!` get a default that
70    /// delegates to `arm_unchecked`, which works for Rust enum handlers.
71    pub endpoint_name: unsafe fn(*const ()) -> Option<String>,
72}
73
74#[allow(dead_code)]
75impl TypeInfo {
76    /// Get the typeinfo for the provided type hash.
77    pub fn get(typehash: u64) -> Option<&'static TypeInfo> {
78        TYPE_INFO.get(&typehash).map(|v| &**v)
79    }
80
81    /// Get the typeinfo for the provided type id.
82    pub fn get_by_typeid(typeid: TypeId) -> Option<&'static TypeInfo> {
83        TYPE_INFO_BY_TYPE_ID.get(&typeid).map(|v| &**v)
84    }
85
86    /// Get the typeinfo for the provided type.
87    pub fn of<T: ?Sized + 'static>() -> Option<&'static TypeInfo> {
88        Self::get_by_typeid(TypeId::of::<T>())
89    }
90
91    /// Get the typename for this type.
92    pub fn typename(&self) -> &'static str {
93        (self.typename)()
94    }
95
96    /// Get the typehash for this type.
97    pub fn typehash(&self) -> u64 {
98        (self.typehash)()
99    }
100
101    /// Get the typeid for this type.
102    pub fn typeid(&self) -> TypeId {
103        (self.typeid)()
104    }
105
106    /// Get the port for this type.
107    pub fn port(&self) -> u64 {
108        (self.port)()
109    }
110
111    /// Dump the serialized data to a JSON value.
112    pub fn dump(&self, data: Any) -> Result<serde_json::Value> {
113        if let Some(dump) = self.dump {
114            (dump)(data)
115        } else {
116            Err(Error::MissingDumper(self.typehash()))
117        }
118    }
119
120    /// Get the arm name for an enum value.
121    ///
122    /// # Safety
123    /// The caller must ensure the value pointer is valid for this type.
124    pub unsafe fn arm_unchecked(&self, value: *const ()) -> Option<&'static str> {
125        // SAFETY: This isn't safe, we're passing it on.
126        unsafe { (self.arm_unchecked)(value) }
127    }
128
129    /// Get the endpoint name for a message value.
130    ///
131    /// # Safety
132    /// The caller must ensure the value pointer is valid for this type.
133    pub unsafe fn endpoint_name(&self, value: *const ()) -> Option<String> {
134        // SAFETY: This isn't safe, we're passing it on.
135        unsafe { (self.endpoint_name)(value) }
136    }
137}
138
139inventory::collect!(TypeInfo);
140
141/// Type infos for all types that have been linked into the binary, keyed by typehash.
142static TYPE_INFO: LazyLock<HashMap<u64, &'static TypeInfo>> = LazyLock::new(|| {
143    inventory::iter::<TypeInfo>()
144        .map(|entry| (entry.typehash(), entry))
145        .collect()
146});
147
148/// Type infos for all types that have been linked into the binary, keyed by typeid.
149static TYPE_INFO_BY_TYPE_ID: LazyLock<HashMap<std::any::TypeId, &'static TypeInfo>> =
150    LazyLock::new(|| {
151        TYPE_INFO
152            .values()
153            .map(|info| (info.typeid(), &**info))
154            .collect()
155    });
156
157/// Register a (concrete) type so that it may be looked up by name or hash. Type registration
158/// is required only to improve diagnostics, as it allows a binary to introspect serialized
159/// payloads under type erasure.
160///
161/// The provided type must implement [`typeuri::Named`], and must be concrete.
162#[macro_export]
163macro_rules! register_type {
164    ($type:ty) => {
165        $crate::submit! {
166            $crate::TypeInfo {
167                typename: <$type as $crate::Named>::typename,
168                typehash: <$type as $crate::Named>::typehash,
169                typeid: <$type as $crate::Named>::typeid,
170                port: <$type as $crate::Named>::port,
171                dump: Some(<$type as $crate::NamedDumpable>::dump),
172                arm_unchecked: <$type as $crate::Named>::arm_unchecked,
173                endpoint_name: |ptr| {
174                    // SAFETY: ptr points to a value of type $type, as guaranteed by the caller.
175                    unsafe { <$type as $crate::Named>::arm_unchecked(ptr).map(|s| s.to_string()) }
176                },
177            }
178        }
179    };
180}
181
182// Re-export inventory::submit for the register_type! macro
183#[doc(hidden)]
184pub use inventory::submit;
185
186/// An enumeration containing the supported encodings of serialized values.
187#[derive(
188    Debug,
189    Clone,
190    Copy,
191    Serialize,
192    Deserialize,
193    PartialEq,
194    Eq,
195    AttrValue,
196    typeuri::Named,
197    strum::EnumIter,
198    strum::Display,
199    strum::EnumString
200)]
201pub enum Encoding {
202    /// Serde bincode encoding.
203    #[strum(to_string = "bincode")]
204    Bincode,
205    /// Serde JSON encoding.
206    #[strum(to_string = "serde_json")]
207    Json,
208    /// Serde multipart encoding.
209    #[strum(to_string = "serde_multipart")]
210    Multipart,
211}
212
213/// The encoding used for a serialized value.
214#[derive(Clone, Serialize, Deserialize, PartialEq, EnumAsInner)]
215enum Encoded {
216    Bincode(bytes::Bytes),
217    Json(bytes::Bytes),
218    Multipart(serde_multipart::Message),
219}
220
221impl Encoded {
222    /// The length of the underlying serialized message
223    pub fn len(&self) -> usize {
224        match &self {
225            Encoded::Bincode(data) => data.len(),
226            Encoded::Json(data) => data.len(),
227            Encoded::Multipart(message) => message.len(),
228        }
229    }
230
231    /// Is the message empty. This should always return false.
232    pub fn is_empty(&self) -> bool {
233        match &self {
234            Encoded::Bincode(data) => data.is_empty(),
235            Encoded::Json(data) => data.is_empty(),
236            Encoded::Multipart(message) => message.is_empty(),
237        }
238    }
239
240    /// Returns the encoding of this serialized value.
241    pub fn encoding(&self) -> Encoding {
242        match &self {
243            Encoded::Bincode(_) => Encoding::Bincode,
244            Encoded::Json(_) => Encoding::Json,
245            Encoded::Multipart(_) => Encoding::Multipart,
246        }
247    }
248
249    /// Computes the 32bit crc of the encoded data
250    pub fn crc(&self) -> u32 {
251        match &self {
252            Encoded::Bincode(data) => crc32fast::hash(data),
253            Encoded::Json(data) => crc32fast::hash(data),
254            Encoded::Multipart(message) => {
255                let mut hasher = crc32fast::Hasher::new();
256                for fragment in message.body().iter() {
257                    hasher.update(fragment);
258                }
259                for part in message.parts() {
260                    for fragment in part.iter() {
261                        hasher.update(fragment);
262                    }
263                }
264                hasher.finalize()
265            }
266        }
267    }
268}
269
270impl std::fmt::Debug for Encoded {
271    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
272        match self {
273            Encoded::Bincode(data) => write!(f, "Encoded::Bincode({})", HexFmt(data)),
274            Encoded::Json(data) => write!(f, "Encoded::Json({})", HexFmt(data)),
275            Encoded::Multipart(message) => {
276                write!(
277                    f,
278                    "Encoded::Multipart(body={}",
279                    HexFmt(&message.body().to_bytes())
280                )?;
281                for (index, part) in message.parts().iter().enumerate() {
282                    write!(f, ", part[{}]={}", index, HexFmt(&part.to_bytes()))?;
283                }
284                write!(f, ")")
285            }
286        }
287    }
288}
289
290/// The type of error returned by operations on [`Any`].
291#[derive(Debug, thiserror::Error)]
292pub enum Error {
293    /// Errors returned from serde bincode encoding.
294    #[error(transparent)]
295    BincodeEncode(#[from] bincode::error::EncodeError),
296
297    /// Errors returned from serde bincode decoding.
298    #[error(transparent)]
299    BincodeDecode(#[from] bincode::error::DecodeError),
300
301    /// Errors returned from serde JSON.
302    #[error(transparent)]
303    Json(#[from] serde_json::Error),
304
305    /// The encoding was not recognized.
306    #[error("unknown encoding: {0}")]
307    InvalidEncoding(String),
308
309    /// Attempted to deserialize a broken Any value.
310    #[error("attempted to deserialize a broken Any value")]
311    BrokenAny,
312
313    /// Type mismatch during deserialization.
314    #[error("type mismatch: expected {expected}, found {actual}")]
315    TypeMismatch {
316        expected: &'static str,
317        actual: String,
318    },
319
320    /// Type info not available for the given typehash.
321    #[error("binary does not have typeinfo for typehash {0}")]
322    MissingTypeInfo(u64),
323
324    /// Dumper not available for the given typehash.
325    #[error("binary does not have dumper for typehash {0}")]
326    MissingDumper(u64),
327
328    /// Operation requires bincode encoding.
329    #[error("only bincode encoding supports prefix operations")]
330    PrefixNotSupported,
331}
332
333/// A specialized Result type for wirevalue operations.
334pub type Result<T> = std::result::Result<T, Error>;
335
336/// Represents a serialized value, wrapping the underlying serialization
337/// and deserialization details, while ensuring that we pass correctly-serialized
338/// message throughout the system.
339///
340/// Currently, Any passes through to bincode, but in the future we may include
341/// content-encoding information to allow for other codecs as well.
342#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
343pub struct Any {
344    /// The encoded data
345    encoded: Encoded,
346    /// The typehash of the serialized value. This is used to provide
347    /// typed introspection of the value.
348    typehash: u64,
349}
350
351impl std::fmt::Display for Any {
352    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353        match self.dump() {
354            Ok(value) => {
355                // unwrap okay, self.dump() would return Err otherwise.
356                let typename = self.typename().unwrap();
357                // take the basename of the type (e.g. "foo::bar::baz" -> "baz")
358                let basename = typename.split("::").last().unwrap_or(typename);
359                write!(f, "{}{}", basename, JsonFmt(&value))
360            }
361            Err(_) => write!(f, "{:?}", self.encoded),
362        }
363    }
364}
365
366impl Any {
367    /// Construct a new serialized value by serializing the provided T-typed value.
368    /// Serialize uses the default encoding defined by the configuration key
369    /// [`config::DEFAULT_ENCODING`] in the global configuration; use [`serialize_with_encoding`]
370    /// to serialize values with a specific encoding.
371    pub fn serialize<T: Serialize + Named>(value: &T) -> Result<Self> {
372        Self::serialize_with_encoding(
373            hyperactor_config::global::get(config::DEFAULT_ENCODING),
374            value,
375        )
376    }
377
378    /// Serialize U-typed value as a T-typed value. This should be used with care
379    /// (typically only in testing), as the value's representation may be illegally
380    /// coerced.
381    pub fn serialize_as<T: Named, U: Serialize>(value: &U) -> Result<Self> {
382        Self::serialize_with_encoding_as::<T, U>(
383            hyperactor_config::global::get(config::DEFAULT_ENCODING),
384            value,
385        )
386    }
387
388    /// Serialize the value with the using the provided encoding.
389    pub fn serialize_with_encoding<T: Serialize + Named>(
390        encoding: Encoding,
391        value: &T,
392    ) -> Result<Self> {
393        Self::serialize_with_encoding_as::<T, T>(encoding, value)
394    }
395
396    /// Serialize U-typed value as a T-typed value. This should be used with care
397    /// (typically only in testing), as the value's representation may be illegally
398    /// coerced.
399    pub fn serialize_with_encoding_as<T: Named, U: Serialize>(
400        encoding: Encoding,
401        value: &U,
402    ) -> Result<Self> {
403        Ok(Self {
404            encoded: match encoding {
405                Encoding::Bincode => Encoded::Bincode(
406                    bincode::serde::encode_to_vec(value, bincode::config::legacy())?.into(),
407                ),
408                Encoding::Json => Encoded::Json(serde_json::to_vec(value)?.into()),
409                Encoding::Multipart => Encoded::Multipart(
410                    serde_multipart::serialize_bincode(value)
411                        .map_err(|e| Error::InvalidEncoding(e.to_string()))?,
412                ),
413            },
414            typehash: T::typehash(),
415        })
416    }
417
418    /// Create a new broken Any value. A broken value has unknown type and
419    /// no valid data. Attempting to deserialize a broken value will fail.
420    pub fn new_broken() -> Self {
421        Self {
422            encoded: Encoded::Bincode(bytes::Bytes::new()),
423            typehash: BROKEN_TYPEHASH,
424        }
425    }
426
427    /// Returns true if this Any is broken (unknown type, no value).
428    pub fn is_broken(&self) -> bool {
429        self.typehash == BROKEN_TYPEHASH
430    }
431
432    /// Deserialize a value to the provided type T.
433    pub fn deserialized<T: DeserializeOwned + Named>(&self) -> Result<T> {
434        if self.is_broken() {
435            return Err(Error::BrokenAny);
436        }
437        if !self.is::<T>() {
438            return Err(Error::TypeMismatch {
439                expected: T::typename(),
440                actual: self.typename().unwrap_or("unknown").to_string(),
441            });
442        }
443        self.deserialized_unchecked()
444    }
445
446    /// Deserialize a value to the provided type T, without checking for type conformance.
447    /// This should be used carefully, only when you know that the dynamic type check is
448    /// not needed.
449    pub fn deserialized_unchecked<T: DeserializeOwned>(&self) -> Result<T> {
450        match &self.encoded {
451            Encoded::Bincode(data) => Ok(bincode::serde::decode_from_slice(
452                data,
453                bincode::config::legacy(),
454            )
455            .map(|(v, _)| v)?),
456            Encoded::Json(data) => Ok(serde_json::from_slice(data)?),
457            Encoded::Multipart(message) => {
458                Ok(serde_multipart::deserialize_bincode(message.clone())
459                    .map_err(|e| Error::InvalidEncoding(e.to_string()))?)
460            }
461        }
462    }
463
464    /// Transcode the serialized value to JSON. This operation will succeed if the type hash
465    /// is embedded in the value, and the corresponding type is available in this binary.
466    pub fn transcode_to_json(self) -> std::result::Result<Self, Self> {
467        match self.encoded {
468            Encoded::Bincode(_) | Encoded::Multipart(_) => {
469                let json_value = match self.dump() {
470                    Ok(json_value) => json_value,
471                    Err(_) => return Err(self),
472                };
473                let json_data = match serde_json::to_vec(&json_value) {
474                    Ok(json_data) => json_data,
475                    Err(_) => return Err(self),
476                };
477                Ok(Self {
478                    encoded: Encoded::Json(json_data.into()),
479                    typehash: self.typehash,
480                })
481            }
482            Encoded::Json(_) => Ok(self),
483        }
484    }
485
486    /// Dump the Any message into a JSON value. This will succeed if: 1) the typehash is embedded
487    /// in the serialized value; 2) the named type is linked into the binary.
488    pub fn dump(&self) -> Result<serde_json::Value> {
489        match &self.encoded {
490            Encoded::Bincode(_) | Encoded::Multipart(_) => {
491                let Some(typeinfo) = TYPE_INFO.get(&self.typehash) else {
492                    return Err(Error::MissingTypeInfo(self.typehash));
493                };
494                typeinfo.dump(self.clone())
495            }
496            Encoded::Json(data) => Ok(serde_json::from_slice(data)?),
497        }
498    }
499
500    /// The encoding used by this serialized value.
501    pub fn encoding(&self) -> Encoding {
502        self.encoded.encoding()
503    }
504
505    /// The typehash of the serialized value.
506    pub fn typehash(&self) -> u64 {
507        self.typehash
508    }
509
510    /// The typename of the serialized value, if available.
511    pub fn typename(&self) -> Option<&'static str> {
512        TYPE_INFO
513            .get(&self.typehash)
514            .map(|typeinfo| typeinfo.typename())
515    }
516
517    /// Deserialize a prefix of the value. This is currently only supported
518    /// for bincode-serialized values.
519    // TODO: we should support this by formalizing the notion of a 'prefix'
520    // serialization, and generalize it to other codecs as well.
521    pub fn prefix<T: DeserializeOwned>(&self) -> Result<T> {
522        match &self.encoded {
523            Encoded::Bincode(data) => Ok(bincode::serde::decode_from_slice(
524                data,
525                bincode::config::legacy(),
526            )
527            .map(|(v, _)| v)?),
528            _ => Err(Error::PrefixNotSupported),
529        }
530    }
531
532    /// Emplace a new prefix to this value. This is currently only supported
533    /// for bincode-serialized values.
534    pub fn emplace_prefix<T: Serialize + DeserializeOwned>(&mut self, prefix: T) -> Result<()> {
535        let data = match &self.encoded {
536            Encoded::Bincode(data) => data,
537            _ => return Err(Error::PrefixNotSupported),
538        };
539
540        // This is a bit ugly, but: we first deserialize out the old prefix,
541        // then serialize the new prefix, then splice the two together.
542        // This is safe because we know that the prefix is the first thing
543        // in the serialized value, and that the serialization format is stable.
544        let mut cursor = Cursor::new(data.clone());
545        let _prefix: T =
546            bincode::serde::decode_from_std_read(&mut cursor, bincode::config::legacy()).unwrap();
547        let position = cursor.position() as usize;
548        let suffix = &cursor.into_inner()[position..];
549        let mut data = bincode::serde::encode_to_vec(&prefix, bincode::config::legacy())?;
550        data.extend_from_slice(suffix);
551        self.encoded = Encoded::Bincode(data.into());
552
553        Ok(())
554    }
555
556    /// The length of the underlying serialized message
557    pub fn len(&self) -> usize {
558        self.encoded.len()
559    }
560
561    /// Is the message empty. This should always return false.
562    pub fn is_empty(&self) -> bool {
563        self.encoded.is_empty()
564    }
565
566    /// Returns the 32bit crc of the serialized data
567    pub fn crc(&self) -> u32 {
568        self.encoded.crc()
569    }
570
571    /// Returns whether this value contains a serialized M-typed value. Returns None
572    /// when type information is unavailable.
573    pub fn is<M: Named>(&self) -> bool {
574        self.typehash == M::typehash()
575    }
576}
577
578const MAX_BYTE_PREVIEW_LENGTH: usize = 8;
579
580fn display_bytes_as_hash(f: &mut impl std::fmt::Write, bytes: &[u8]) -> std::fmt::Result {
581    let hash = crc32fast::hash(bytes);
582    write!(f, "CRC:{:x}", hash)?;
583    // Implementing in this way lets us print without allocating a new intermediate string.
584    for &byte in bytes.iter().take(MAX_BYTE_PREVIEW_LENGTH) {
585        write!(f, " {:x}", byte)?;
586    }
587    if bytes.len() > MAX_BYTE_PREVIEW_LENGTH {
588        write!(f, " [...{} bytes]", bytes.len() - MAX_BYTE_PREVIEW_LENGTH)?;
589    }
590    Ok(())
591}
592
593/// Formats a binary slice as hex when its display function is called.
594pub struct HexFmt<'a>(pub &'a [u8]);
595
596impl std::fmt::Display for HexFmt<'_> {
597    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
598        // calculate a 2 byte checksum to prepend to the message
599        display_bytes_as_hash(f, self.0)
600    }
601}
602
603/// Formats a JSON value for display, printing all keys but
604/// truncating and displaying a hash if the content is too long.
605pub struct JsonFmt<'a>(pub &'a serde_json::Value);
606
607const MAX_JSON_VALUE_DISPLAY_LENGTH: usize = 8;
608
609impl std::fmt::Display for JsonFmt<'_> {
610    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
611        /// Truncate the input string to MAX_JSON_VALUE_DISPLAY_LENGTH and append
612        /// the truncated hash of the full value for easy comparison.
613        fn truncate_and_hash(value_str: &str) -> String {
614            let truncate_at = MAX_JSON_VALUE_DISPLAY_LENGTH.min(value_str.len());
615
616            // Respect UTF-8 boundaries (multi-byte chars like emojis can be up to 4 bytes)
617            let mut safe_truncate_at = truncate_at;
618            while safe_truncate_at > 0 && !value_str.is_char_boundary(safe_truncate_at) {
619                safe_truncate_at -= 1;
620            }
621
622            let truncated_str = &value_str[..safe_truncate_at];
623            let mut result = truncated_str.to_string();
624            result.push_str(&format!("[...{} chars] ", value_str.len()));
625            display_bytes_as_hash(&mut result, value_str.as_bytes()).unwrap();
626            result
627        }
628
629        /// Recursively truncate a serde_json::Value object.
630        fn truncate_json_values(value: &serde_json::Value) -> serde_json::Value {
631            match value {
632                serde_json::Value::String(s) => {
633                    if s.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
634                        serde_json::Value::String(truncate_and_hash(s))
635                    } else {
636                        value.clone()
637                    }
638                }
639                serde_json::Value::Array(arr) => {
640                    let array_str = serde_json::to_string(arr).unwrap();
641                    if array_str.len() > MAX_JSON_VALUE_DISPLAY_LENGTH {
642                        serde_json::Value::String(truncate_and_hash(&array_str))
643                    } else {
644                        value.clone()
645                    }
646                }
647                serde_json::Value::Object(obj) => {
648                    let truncated_obj: serde_json::Map<_, _> = obj
649                        .iter()
650                        .map(|(k, v)| (k.clone(), truncate_json_values(v)))
651                        .collect();
652                    serde_json::Value::Object(truncated_obj)
653                }
654                _ => value.clone(),
655            }
656        }
657
658        let truncated = truncate_json_values(self.0);
659        write!(f, "{}", truncated)
660    }
661}
662
663#[cfg(test)]
664mod tests {
665    use serde::Deserialize;
666    use serde::Serialize;
667    use serde_multipart::Part;
668    use strum::IntoEnumIterator;
669    use typeuri::Named;
670
671    use super::*;
672
673    #[derive(typeuri::Named, Serialize, Deserialize)]
674    struct TestStruct;
675
676    #[test]
677    fn test_names() {
678        assert_eq!(String::typename(), "String");
679        assert_eq!(Option::<String>::typename(), "Option<String>");
680        assert_eq!(Vec::<String>::typename(), "Vec<String>");
681        assert_eq!(Vec::<Vec::<String>>::typename(), "Vec<Vec<String>>");
682        assert_eq!(
683            Vec::<Vec::<Vec::<String>>>::typename(),
684            "Vec<Vec<Vec<String>>>"
685        );
686        assert_eq!(
687            <(u64, String, Option::<isize>)>::typename(),
688            "(u64, String, Option<isize>)"
689        );
690        assert_eq!(TestStruct::typename(), "wirevalue::tests::TestStruct");
691        assert_eq!(
692            Vec::<TestStruct>::typename(),
693            "Vec<wirevalue::tests::TestStruct>"
694        );
695    }
696
697    #[test]
698    fn test_ports() {
699        assert_eq!(String::typehash(), 3947244799002047352u64);
700        assert_eq!(String::port(), 13170616835856823160u64);
701        assert_ne!(
702            Vec::<Vec::<Vec::<String>>>::typehash(),
703            Vec::<Vec::<Vec::<Vec::<String>>>>::typehash(),
704        );
705    }
706
707    #[derive(typeuri::Named, Serialize, Deserialize, PartialEq, Eq, Debug)]
708    struct TestDumpStruct {
709        a: String,
710        b: u64,
711        c: Option<i32>,
712        d: Option<Part>,
713    }
714    crate::register_type!(TestDumpStruct);
715
716    #[test]
717    fn test_dump_struct() {
718        let data = TestDumpStruct {
719            a: "hello".to_string(),
720            b: 1234,
721            c: Some(5678),
722            d: None,
723        };
724        let serialized = Any::serialize(&data).unwrap();
725        let serialized_json = serialized.clone().transcode_to_json().unwrap();
726
727        assert!(serialized.encoded.is_multipart());
728        assert!(serialized_json.encoded.is_json());
729
730        let json_string =
731            String::from_utf8(serialized_json.encoded.as_json().unwrap().to_vec().clone()).unwrap();
732        // The serialized data for JSON is just the (compact) JSON string.
733        assert_eq!(
734            json_string,
735            "{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}"
736        );
737
738        for serialized in [serialized, serialized_json] {
739            // Note, at this point, serialized has no knowledge other than its embedded typehash.
740
741            assert_eq!(
742                serialized.typename(),
743                Some("wirevalue::tests::TestDumpStruct")
744            );
745
746            let json = serialized.dump().unwrap();
747            assert_eq!(
748                json,
749                serde_json::json!({
750                    "a": "hello",
751                    "b": 1234,
752                    "c": 5678,
753                    "d": null,
754                })
755            );
756
757            assert_eq!(
758                format!("{}", serialized),
759                "TestDumpStruct{\"a\":\"hello\",\"b\":1234,\"c\":5678,\"d\":null}",
760            );
761        }
762    }
763
764    #[test]
765    fn test_emplace_prefix() {
766        let config = hyperactor_config::global::lock();
767        let _guard = config.override_key(config::DEFAULT_ENCODING, Encoding::Bincode);
768        let data = TestDumpStruct {
769            a: "hello".to_string(),
770            b: 1234,
771            c: Some(5678),
772            d: None,
773        };
774
775        let mut ser = Any::serialize(&data).unwrap();
776        assert_eq!(ser.prefix::<String>().unwrap(), "hello".to_string());
777
778        ser.emplace_prefix("hello, world, 123!".to_string())
779            .unwrap();
780
781        assert_eq!(
782            ser.deserialized::<TestDumpStruct>().unwrap(),
783            TestDumpStruct {
784                a: "hello, world, 123!".to_string(),
785                b: 1234,
786                c: Some(5678),
787                d: None,
788            }
789        );
790    }
791
792    #[test]
793    fn test_arms() {
794        #[derive(typeuri::Named, Serialize, Deserialize)]
795        enum TestArm {
796            #[allow(dead_code)]
797            A(u32),
798            B,
799            C(),
800            D {
801                #[allow(dead_code)]
802                a: u32,
803                #[allow(dead_code)]
804                b: String,
805            },
806        }
807
808        assert_eq!(TestArm::A(1234).arm(), Some("A"));
809        assert_eq!(TestArm::B.arm(), Some("B"));
810        assert_eq!(TestArm::C().arm(), Some("C"));
811        assert_eq!(
812            TestArm::D {
813                a: 1234,
814                b: "hello".to_string()
815            }
816            .arm(),
817            Some("D")
818        );
819    }
820
821    #[test]
822    fn display_hex() {
823        assert_eq!(
824            format!("{}", HexFmt("hello world".as_bytes())),
825            "CRC:d4a1185 68 65 6c 6c 6f 20 77 6f [...3 bytes]"
826        );
827        assert_eq!(format!("{}", HexFmt("".as_bytes())), "CRC:0");
828        assert_eq!(
829            format!("{}", HexFmt("a very long string that is long".as_bytes())),
830            "CRC:c7e24f62 61 20 76 65 72 79 20 6c [...23 bytes]"
831        );
832    }
833
834    #[test]
835    fn test_json_fmt() {
836        let json_value = serde_json::json!({
837            "name": "test",
838            "number": 42,
839            "nested": {
840                "key": "value"
841            }
842        });
843        // JSON values with short values should print normally
844        assert_eq!(
845            format!("{}", JsonFmt(&json_value)),
846            "{\"name\":\"test\",\"nested\":{\"key\":\"value\"},\"number\":42}",
847        );
848
849        let empty_json = serde_json::json!({});
850        assert_eq!(format!("{}", JsonFmt(&empty_json)), "{}");
851
852        let simple_array = serde_json::json!([1, 2, 3]);
853        assert_eq!(format!("{}", JsonFmt(&simple_array)), "[1,2,3]");
854
855        // JSON values with very long strings should be truncated
856        let long_string_json = serde_json::json!({
857            "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH * 5)
858        });
859        assert_eq!(
860            format!("{}", JsonFmt(&long_string_json)),
861            "{\"long_string\":\"aaaaaaaa[...40 chars] CRC:c95b8a25 61 61 61 61 61 61 61 61 [...32 bytes]\"}"
862        );
863
864        // JSON values with very long arrays should be truncated
865        let long_array_json =
866            serde_json::json!((1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>());
867        assert_eq!(
868            format!("{}", JsonFmt(&long_array_json)),
869            "\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\""
870        );
871
872        // Test for truncation within nested blocks
873        let nested_json = serde_json::json!({
874            "simple_number": 42,
875            "simple_bool": true,
876            "outer": {
877                "long_string": "a".repeat(MAX_JSON_VALUE_DISPLAY_LENGTH + 10),
878                "long_array": (1..=(MAX_JSON_VALUE_DISPLAY_LENGTH + 4)).collect::<Vec<_>>(),
879                "inner": {
880                    "simple_value": "short",
881                }
882            }
883        });
884        println!("{}", JsonFmt(&nested_json));
885        assert_eq!(
886            format!("{}", JsonFmt(&nested_json)),
887            "{\"outer\":{\"inner\":{\"simple_value\":\"short\"},\"long_array\":\"[1,2,3,4[...28 chars] CRC:e5c881af 5b 31 2c 32 2c 33 2c 34 [...20 bytes]\",\"long_string\":\"aaaaaaaa[...18 chars] CRC:b8ac0e31 61 61 61 61 61 61 61 61 [...10 bytes]\"},\"simple_bool\":true,\"simple_number\":42}",
888        );
889    }
890
891    #[test]
892    fn test_json_fmt_utf8_truncation() {
893        // Test that UTF-8 character boundaries are respected during truncation
894        // Create a string with multi-byte characters that would be truncated
895
896        // String with 7 ASCII chars + 4-byte emoji (total 11 bytes, truncates at 8)
897        let utf8_json = serde_json::json!({
898            "emoji": "1234567🦀"  // 7 + 4 = 11 bytes, MAX is 8
899        });
900
901        // Should truncate at byte 7 (before the emoji) to respect UTF-8 boundary
902        let result = format!("{}", JsonFmt(&utf8_json));
903
904        // Verify it doesn't panic and produces valid output
905        assert!(result.contains("1234567"));
906        assert!(!result.contains("🦀")); // Emoji should be truncated away
907
908        // Test with all multi-byte characters
909        let all_multibyte = serde_json::json!({
910            "chinese": "你好世界"  // Each char is 3 bytes = 12 bytes total
911        });
912        let result3 = format!("{}", JsonFmt(&all_multibyte));
913        assert!(!result3.is_empty());
914    }
915
916    #[test]
917    fn test_encodings() {
918        let value = TestDumpStruct {
919            a: "hello, world".to_string(),
920            b: 123,
921            c: Some(321),
922            d: Some(Part::from("hello, world, again")),
923        };
924        for enc in Encoding::iter() {
925            let ser = Any::serialize_with_encoding(enc, &value).unwrap();
926            assert_eq!(ser.encoding(), enc);
927            assert_eq!(ser.deserialized::<TestDumpStruct>().unwrap(), value);
928        }
929    }
930
931    #[test]
932    fn test_broken_any() {
933        let broken = Any::new_broken();
934        assert!(broken.is_broken());
935        assert_eq!(broken.typehash(), BROKEN_TYPEHASH);
936
937        // Normal values are not broken
938        let normal = Any::serialize(&"hello".to_string()).unwrap();
939        assert!(!normal.is_broken());
940
941        // deserialized() should fail for broken values
942        let err = broken.deserialized::<String>().unwrap_err();
943        assert!(err.to_string().contains("broken"));
944    }
945}