Skip to main content

reth_trie_sparse/
lfu.rs

1use core::{fmt, hash};
2
3use alloc::vec::Vec;
4use alloy_primitives::map::HashMap;
5
6/// Maximum frequency value. Frequencies are capped here to bound bucket storage.
7const LFU_MAX_FREQ: u16 = 255;
8
9/// Per-entry metadata stored in the LFU lookup map.
10#[derive(Debug, Clone, Copy)]
11struct LfuEntryMeta {
12    /// Current frequency (1..=`LFU_MAX_FREQ`).
13    freq: u16,
14    /// Index of this key within `buckets[freq]`.
15    pos: usize,
16}
17
18/// Bucketed LFU cache with O(1) eviction and O(1) touch operations.
19///
20/// Generic over the key type `K`.
21#[derive(Debug)]
22pub(crate) struct BucketedLfu<K> {
23    capacity: usize,
24    /// Maps each key to its frequency and bucket position.
25    entries: HashMap<K, LfuEntryMeta>,
26    /// `buckets[f]` holds all keys currently at frequency `f`.
27    /// Index 0 is unused; valid frequencies are 1..=`LFU_MAX_FREQ`.
28    buckets: Vec<Vec<K>>,
29    /// Smallest non-empty frequency bucket.
30    min_freq: u16,
31}
32
33impl<K> Default for BucketedLfu<K> {
34    fn default() -> Self {
35        Self::new(0)
36    }
37}
38
39impl<K> BucketedLfu<K> {
40    pub(crate) fn new(capacity: usize) -> Self {
41        Self {
42            capacity,
43            entries: HashMap::default(),
44            buckets: (0..=LFU_MAX_FREQ).map(|_| Vec::new()).collect(),
45            min_freq: 1,
46        }
47    }
48}
49
50impl<K: fmt::Debug + Copy + Eq + hash::Hash> BucketedLfu<K> {
51    /// Updates the capacity and evicts the least-frequently-used entries that exceed it.
52    ///
53    /// Entries accumulate via [`Self::touch`] over time. This method trims the cache
54    /// so that only the `capacity` hottest entries are retained.
55    pub(crate) fn decay_and_evict(&mut self, capacity: usize) {
56        self.capacity = capacity;
57
58        if self.capacity == 0 {
59            self.entries.clear();
60            for b in &mut self.buckets {
61                b.clear();
62            }
63            self.min_freq = 1;
64            return;
65        }
66
67        while self.entries.len() > self.capacity {
68            self.evict_one();
69        }
70    }
71
72    /// Evicts a single entry from the lowest-frequency bucket.
73    fn evict_one(&mut self) {
74        if self.entries.is_empty() {
75            self.min_freq = 1;
76            return;
77        }
78
79        // Advance min_freq to the next non-empty bucket.
80        while (self.min_freq as usize) < self.buckets.len() &&
81            self.buckets[self.min_freq as usize].is_empty()
82        {
83            self.min_freq += 1;
84        }
85
86        if (self.min_freq as usize) >= self.buckets.len() {
87            self.min_freq = 1;
88            return;
89        }
90
91        if let Some(key) = self.buckets[self.min_freq as usize].pop() {
92            self.entries.remove(&key);
93
94            while (self.min_freq as usize) < self.buckets.len() &&
95                self.buckets[self.min_freq as usize].is_empty()
96            {
97                self.min_freq += 1;
98            }
99
100            if (self.min_freq as usize) >= self.buckets.len() {
101                self.min_freq = 1;
102            }
103        }
104    }
105
106    /// Records a key touch. O(1) amortized.
107    pub(crate) fn touch(&mut self, key: K) {
108        if self.capacity == 0 {
109            return;
110        }
111
112        if let Some(meta) = self.entries.get(&key).copied() {
113            debug_assert_eq!(self.buckets[meta.freq as usize][meta.pos], key);
114
115            let old_freq = meta.freq as usize;
116            let new_freq = meta.freq.saturating_add(1).min(LFU_MAX_FREQ);
117
118            if new_freq as usize != old_freq {
119                // Remove from old bucket via swap_remove and fix the swapped element's pos.
120                self.buckets[old_freq].swap_remove(meta.pos);
121                if let Some(&moved_key) = self.buckets[old_freq].get(meta.pos) {
122                    self.entries.get_mut(&moved_key).expect("moved key must exist").pos = meta.pos;
123                }
124
125                // Insert into new bucket.
126                let new_pos = self.buckets[new_freq as usize].len();
127                self.buckets[new_freq as usize].push(key);
128
129                let entry = self.entries.get_mut(&key).expect("key must exist");
130                entry.freq = new_freq;
131                entry.pos = new_pos;
132
133                // Update min_freq if old bucket is now empty.
134                if self.buckets[old_freq].is_empty() && old_freq == self.min_freq as usize {
135                    self.min_freq = new_freq;
136                }
137            }
138        } else {
139            // Evict if at capacity before inserting.
140            if self.entries.len() >= self.capacity {
141                self.evict_one();
142            }
143
144            // New entry at frequency 1.
145            let pos = self.buckets[1].len();
146            self.buckets[1].push(key);
147            self.entries.insert(key, LfuEntryMeta { freq: 1, pos });
148            self.min_freq = 1;
149        }
150    }
151
152    /// Returns an iterator over all retained keys.
153    #[cfg(any(test, feature = "std"))]
154    pub(crate) fn keys(&self) -> impl Iterator<Item = &K> {
155        self.entries.keys()
156    }
157
158    /// Returns the number of retained keys.
159    #[cfg(any(test, feature = "std"))]
160    pub(crate) fn len(&self) -> usize {
161        self.entries.len()
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168    use alloc::collections::{BTreeMap, BTreeSet};
169    use proptest::prelude::*;
170
171    #[derive(Clone, Copy, Debug)]
172    enum Op {
173        SetCapacity(usize),
174        Touch(u8),
175    }
176
177    #[derive(Debug, Default)]
178    struct ModelLfu {
179        capacity: usize,
180        entries: BTreeMap<u8, u16>,
181    }
182
183    impl ModelLfu {
184        fn apply(&mut self, op: Op) {
185            match op {
186                Op::SetCapacity(capacity) => self.set_capacity(capacity),
187                Op::Touch(key) => self.touch(key),
188            }
189        }
190
191        fn set_capacity(&mut self, capacity: usize) {
192            self.capacity = capacity;
193
194            if capacity == 0 {
195                self.entries.clear();
196                return;
197            }
198
199            while self.entries.len() > self.capacity {
200                self.evict_one();
201            }
202        }
203
204        fn touch(&mut self, key: u8) {
205            if self.capacity == 0 {
206                return;
207            }
208
209            if let Some(freq) = self.entries.get_mut(&key) {
210                *freq = freq.saturating_add(1).min(LFU_MAX_FREQ);
211                return;
212            }
213
214            if self.entries.len() >= self.capacity {
215                self.evict_one();
216            }
217
218            self.entries.insert(key, 1);
219        }
220
221        fn evict_one(&mut self) {
222            let victim = self
223                .entries
224                .iter()
225                .min_by_key(|(key, freq)| (**freq, **key))
226                .map(|(key, _)| *key)
227                .expect("model eviction requires a live entry");
228            self.entries.remove(&victim);
229        }
230
231        fn has_unique_frequencies(&self) -> bool {
232            let mut freqs = BTreeSet::new();
233            self.entries.values().all(|freq| freqs.insert(*freq))
234        }
235
236        fn snapshot(&self) -> BTreeMap<u8, u16> {
237            self.entries.clone()
238        }
239    }
240
241    fn apply_real(lfu: &mut BucketedLfu<u8>, op: Op) {
242        match op {
243            Op::SetCapacity(capacity) => lfu.decay_and_evict(capacity),
244            Op::Touch(key) => lfu.touch(key),
245        }
246    }
247
248    fn snapshot(lfu: &BucketedLfu<u8>) -> BTreeMap<u8, u16> {
249        lfu.entries.iter().map(|(key, meta)| (*key, meta.freq)).collect()
250    }
251
252    fn assert_valid(lfu: &BucketedLfu<u8>) {
253        let mut seen = BTreeSet::new();
254        let mut actual_min_freq = None;
255
256        for freq in 1..lfu.buckets.len() {
257            for (pos, key) in lfu.buckets[freq].iter().copied().enumerate() {
258                assert!(seen.insert(key), "duplicate key {key} in buckets");
259
260                let meta =
261                    lfu.entries.get(&key).unwrap_or_else(|| panic!("missing entry for {key}"));
262                assert_eq!(meta.freq as usize, freq, "wrong frequency for key {key}");
263                assert_eq!(meta.pos, pos, "wrong position for key {key}");
264
265                actual_min_freq.get_or_insert(freq as u16);
266            }
267        }
268
269        assert_eq!(seen.len(), lfu.entries.len(), "bucket/entry count mismatch");
270
271        for (key, meta) in &lfu.entries {
272            assert_ne!(meta.freq, 0, "zero frequency for key {key}");
273            assert!(
274                meta.pos < lfu.buckets[meta.freq as usize].len(),
275                "position out of bounds for key {key}"
276            );
277            assert_eq!(
278                lfu.buckets[meta.freq as usize][meta.pos], *key,
279                "bucket position mismatch for key {key}"
280            );
281        }
282
283        assert_eq!(lfu.min_freq, actual_min_freq.unwrap_or(1), "min_freq mismatch");
284    }
285
286    fn is_safe_op(model: &ModelLfu, op: Op) -> bool {
287        match op {
288            Op::SetCapacity(capacity) => {
289                capacity >= model.entries.len() || model.has_unique_frequencies()
290            }
291            Op::Touch(key) => {
292                if model.capacity == 0 {
293                    return true;
294                }
295
296                let is_new_key = !model.entries.contains_key(&key);
297                let would_evict = is_new_key && model.entries.len() >= model.capacity;
298                !would_evict || model.has_unique_frequencies()
299            }
300        }
301    }
302
303    fn sanitize_trace(raw_ops: Vec<Op>) -> Vec<Op> {
304        let mut model = ModelLfu::default();
305        let mut safe_ops = Vec::with_capacity(raw_ops.len());
306
307        for op in raw_ops {
308            if is_safe_op(&model, op) {
309                model.apply(op);
310                safe_ops.push(op);
311            }
312        }
313
314        safe_ops
315    }
316
317    fn raw_trace_strategy() -> impl Strategy<Value = Vec<Op>> {
318        prop::collection::vec(
319            prop_oneof![(0usize..=4).prop_map(Op::SetCapacity), (0u8..=5).prop_map(Op::Touch),],
320            0..256,
321        )
322        .prop_map(sanitize_trace)
323    }
324
325    fn check_trace(ops: &[Op]) {
326        let mut real = BucketedLfu::new(0);
327        let mut model = ModelLfu::default();
328
329        for (step, &op) in ops.iter().enumerate() {
330            apply_real(&mut real, op);
331            model.apply(op);
332
333            assert_valid(&real);
334            assert_eq!(snapshot(&real), model.snapshot(), "mismatch at step {step}: {op:?}");
335        }
336    }
337
338    #[test]
339    fn bucketed_lfu_zero_capacity_touches_are_ignored() {
340        let mut lfu = BucketedLfu::default();
341
342        lfu.touch(1);
343        lfu.touch(2);
344
345        assert_valid(&lfu);
346        assert!(lfu.entries.is_empty());
347        assert_eq!(lfu.min_freq, 1);
348    }
349
350    #[test]
351    fn bucketed_lfu_retouch_does_not_duplicate_keys() {
352        check_trace(&[Op::SetCapacity(2), Op::Touch(1), Op::Touch(1), Op::Touch(1)]);
353    }
354
355    #[test]
356    fn bucketed_lfu_shrink_keeps_hottest_keys() {
357        check_trace(&[
358            Op::SetCapacity(4),
359            Op::Touch(1),
360            Op::Touch(2),
361            Op::Touch(2),
362            Op::Touch(3),
363            Op::Touch(3),
364            Op::Touch(3),
365            Op::Touch(4),
366            Op::Touch(4),
367            Op::Touch(4),
368            Op::Touch(4),
369            Op::SetCapacity(2),
370        ]);
371    }
372
373    #[test]
374    fn bucketed_lfu_touch_saturates_frequency() {
375        let mut lfu = BucketedLfu::new(1);
376
377        lfu.touch(1);
378        for _ in 0..(LFU_MAX_FREQ as usize + 16) {
379            lfu.touch(1);
380        }
381
382        assert_valid(&lfu);
383        assert_eq!(lfu.entries[&1].freq, LFU_MAX_FREQ);
384        assert_eq!(lfu.buckets[LFU_MAX_FREQ as usize], vec![1]);
385    }
386
387    #[test]
388    fn bucketed_lfu_tie_eviction_keeps_new_key_and_one_old_key_is_dropped() {
389        let mut lfu = BucketedLfu::new(2);
390
391        lfu.touch(1);
392        lfu.touch(2);
393        lfu.touch(3);
394
395        assert_valid(&lfu);
396        assert_eq!(lfu.entries.len(), 2);
397        assert!(lfu.entries.contains_key(&3));
398        assert_eq!(
399            [lfu.entries.contains_key(&1), lfu.entries.contains_key(&2)]
400                .into_iter()
401                .filter(|present| *present)
402                .count(),
403            1
404        );
405    }
406
407    proptest! {
408        #![proptest_config(ProptestConfig::with_cases(128))]
409
410        #[test]
411        fn bucketed_lfu_model_safe_proptest_traces(ops in raw_trace_strategy()) {
412            check_trace(&ops);
413        }
414    }
415}