Skip to main content

reth_db_api/models/
integer_list.rs

1//! Implements [`Compress`] and [`Decompress`] for [`IntegerList`]
2
3use crate::table::{Compress, Decompress};
4use bytes::BufMut;
5use core::fmt;
6use derive_more::Deref;
7use reth_codecs::DecompressError;
8use roaring::RoaringTreemap;
9
10/// A data structure that uses Roaring Bitmaps to efficiently store a list of integers.
11///
12/// This structure provides excellent compression while allowing direct access to individual
13/// elements without the need for full decompression.
14///
15/// Key features:
16/// - Efficient compression: the underlying Roaring Bitmaps significantly reduce memory usage.
17/// - Direct access: elements can be accessed or queried without needing to decode the entire list.
18/// - [`RoaringTreemap`] backing: internally backed by [`RoaringTreemap`], which supports 64-bit
19///   integers.
20#[derive(Clone, PartialEq, Default, Deref)]
21pub struct IntegerList(pub RoaringTreemap);
22
23impl fmt::Debug for IntegerList {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        f.write_str("IntegerList")?;
26        f.debug_list().entries(self.0.iter()).finish()
27    }
28}
29
30impl IntegerList {
31    /// Creates a new empty [`IntegerList`].
32    pub fn empty() -> Self {
33        Self(RoaringTreemap::new())
34    }
35
36    /// Creates an [`IntegerList`] from a list of integers.
37    ///
38    /// Returns an error if the list is not pre-sorted.
39    pub fn new(list: impl IntoIterator<Item = u64>) -> Result<Self, IntegerListError> {
40        RoaringTreemap::from_sorted_iter(list)
41            .map(Self)
42            .map_err(|_| IntegerListError::UnsortedInput)
43    }
44
45    /// Creates an [`IntegerList`] from a pre-sorted list of integers.
46    ///
47    /// # Panics
48    ///
49    /// Panics if the list is not pre-sorted.
50    #[inline]
51    #[track_caller]
52    pub fn new_pre_sorted(list: impl IntoIterator<Item = u64>) -> Self {
53        Self::new(list).expect("IntegerList must be pre-sorted and non-empty")
54    }
55
56    /// Appends a list of integers to the current list.
57    pub fn append(&mut self, list: impl IntoIterator<Item = u64>) -> Result<u64, IntegerListError> {
58        self.0.append(list).map_err(|_| IntegerListError::UnsortedInput)
59    }
60
61    /// Pushes a new integer to the list.
62    pub fn push(&mut self, value: u64) -> Result<(), IntegerListError> {
63        self.0.try_push(value).map_err(|_| IntegerListError::UnsortedInput)
64    }
65
66    /// Clears the list.
67    pub fn clear(&mut self) {
68        self.0.clear();
69    }
70
71    /// Serializes an [`IntegerList`] into a sequence of bytes.
72    pub fn to_bytes(&self) -> Vec<u8> {
73        let mut vec = Vec::with_capacity(self.0.serialized_size());
74        self.0.serialize_into(&mut vec).expect("not able to encode IntegerList");
75        vec
76    }
77
78    /// Serializes an [`IntegerList`] into a sequence of bytes.
79    pub fn to_mut_bytes<B: bytes::BufMut>(&self, buf: &mut B) {
80        self.0.serialize_into(buf.writer()).unwrap();
81    }
82
83    /// Deserializes a sequence of bytes into a proper [`IntegerList`].
84    pub fn from_bytes(data: &[u8]) -> Result<Self, IntegerListError> {
85        RoaringTreemap::deserialize_from(data)
86            .map(Self)
87            .map_err(|_| IntegerListError::FailedToDeserialize)
88    }
89}
90
91impl serde::Serialize for IntegerList {
92    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
93    where
94        S: serde::Serializer,
95    {
96        use serde::ser::SerializeSeq;
97
98        let mut seq = serializer.serialize_seq(Some(self.len() as usize))?;
99        for e in &self.0 {
100            seq.serialize_element(&e)?;
101        }
102        seq.end()
103    }
104}
105
106struct IntegerListVisitor;
107
108impl<'de> serde::de::Visitor<'de> for IntegerListVisitor {
109    type Value = IntegerList;
110
111    fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        f.write_str("a usize array")
113    }
114
115    fn visit_seq<E>(self, mut seq: E) -> Result<Self::Value, E::Error>
116    where
117        E: serde::de::SeqAccess<'de>,
118    {
119        let mut list = IntegerList::empty();
120        while let Some(item) = seq.next_element()? {
121            list.push(item).map_err(serde::de::Error::custom)?;
122        }
123        Ok(list)
124    }
125}
126
127impl<'de> serde::Deserialize<'de> for IntegerList {
128    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
129    where
130        D: serde::Deserializer<'de>,
131    {
132        deserializer.deserialize_byte_buf(IntegerListVisitor)
133    }
134}
135
136#[cfg(any(test, feature = "arbitrary"))]
137use arbitrary::{Arbitrary, Unstructured};
138
139#[cfg(any(test, feature = "arbitrary"))]
140impl<'a> Arbitrary<'a> for IntegerList {
141    fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self, arbitrary::Error> {
142        let mut nums: Vec<u64> = Vec::arbitrary(u)?;
143        nums.sort_unstable();
144        Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat)
145    }
146}
147
148/// Primitives error type.
149#[derive(Debug, derive_more::Display, derive_more::Error)]
150pub enum IntegerListError {
151    /// The provided input is unsorted.
152    #[display("the provided input is unsorted")]
153    UnsortedInput,
154    /// Failed to deserialize data into type.
155    #[display("failed to deserialize data into type")]
156    FailedToDeserialize,
157}
158
159impl Compress for IntegerList {
160    type Compressed = Vec<u8>;
161
162    fn compress(self) -> Self::Compressed {
163        self.to_bytes()
164    }
165
166    fn compress_to_buf<B: bytes::BufMut + AsMut<[u8]>>(&self, buf: &mut B) {
167        self.to_mut_bytes(buf)
168    }
169}
170
171impl Decompress for IntegerList {
172    fn decompress(value: &[u8]) -> Result<Self, DecompressError> {
173        Self::from_bytes(value).map_err(DecompressError::new)
174    }
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn empty_list() {
183        assert_eq!(IntegerList::empty().len(), 0);
184        assert_eq!(IntegerList::new_pre_sorted(std::iter::empty()).len(), 0);
185    }
186
187    #[test]
188    fn test_integer_list() {
189        let original_list = [1, 2, 3];
190        let ef_list = IntegerList::new(original_list).unwrap();
191        assert_eq!(ef_list.iter().collect::<Vec<_>>(), original_list);
192    }
193
194    #[test]
195    fn test_integer_list_serialization() {
196        let original_list = [1, 2, 3];
197        let ef_list = IntegerList::new(original_list).unwrap();
198
199        let blist = ef_list.to_bytes();
200        assert_eq!(IntegerList::from_bytes(&blist).unwrap(), ef_list)
201    }
202}