rotating_bloom_filter/
lib.rs

1// SPDX-FileCopyrightText: 2025 eaon <eaon@posteo.net>
2// SPDX-License-Identifier: EUPL-1.2
3
4#![cfg_attr(docsrs, feature(doc_auto_cfg))]
5#![doc = include_str!("../README.md")]
6#![warn(missing_docs)]
7
8use fastbloom::BloomFilter;
9use rand_core::RngCore;
10
11/// A probabilistic data structure that rotates out old items to maintain recent membership.
12///
13/// `RotatingBloomFilter` provides membership testing for recently inserted items, with older items
14/// automatically rotating out. Items are guaranteed to be retained for at least the minimum
15/// retention period, and typically remain for up to twice that duration.
16///
17/// Uses a dual-buffer rotation mechanism where items are added to both buffers, and periodically
18/// the older buffer is discarded, causing old items to rotate out.
19///
20/// # Examples
21///
22/// ```rust
23/// # use rotating_bloom_filter::*;
24/// # use rand_core::OsRng;
25/// let mut filter = RotatingBloomFilter::new(0.001, 1000, &mut OsRng);
26/// filter.insert("hello");
27/// assert!(filter.contains("hello"));
28/// // After 2000 more insertions, "hello" will rotate out
29/// ```
30pub struct RotatingBloomFilter {
31    current: BloomFilter,
32    next: BloomFilter,
33    hashes: usize,
34    min_retention: usize,
35}
36
37impl RotatingBloomFilter {
38    /// Creates a new `RotatingBloomFilter` with a given false positive rate and minimum retention
39    /// period.
40    pub fn new(false_pos: f64, min_retention: usize, csprng: &mut impl RngCore) -> Self {
41        let mut seed_bytes = [0u8; 16];
42        csprng.fill_bytes(&mut seed_bytes);
43        let seed = u128::from_le_bytes(seed_bytes);
44
45        Self {
46            // The initial size of current only needs to be equal to min_retention, because at
47            // rotation it will be replaced with the settings of next, which has settings to support
48            // twice as many items.
49            current: BloomFilter::with_false_pos(false_pos)
50                .seed(&seed)
51                .expected_items(min_retention),
52            next: BloomFilter::with_false_pos(false_pos)
53                .seed(&seed)
54                .expected_items(min_retention * 2),
55            hashes: 0,
56            min_retention,
57        }
58    }
59
60    /// Inserts an item into the rotating filter.
61    ///
62    /// Items are guaranteed to be retained for at least the minimum retention period, typically
63    /// remaining for up to twice that duration before rotating out.
64    ///
65    /// Returns a tuple: `(was_in_current, was_in_next)`
66    #[inline]
67    pub fn insert(&mut self, value: &(impl std::hash::Hash + ?Sized)) -> (bool, bool) {
68        if self.hashes >= self.min_retention {
69            self.current =
70                BloomFilter::from_vec(self.next.as_slice().to_vec()).hashes(self.next.num_hashes());
71            self.next.clear();
72            self.hashes = 0;
73        };
74        let hashed_value = self.current.source_hash(value);
75        let presence = (
76            self.current.insert_hash(hashed_value),
77            self.next.insert_hash(hashed_value),
78        );
79        self.hashes += 1;
80
81        presence
82    }
83
84    /// Tests whether an item might be present in the current rotation.
85    ///
86    /// May return false positives but never false negatives.
87    #[inline]
88    pub fn contains(&self, value: &(impl std::hash::Hash + ?Sized)) -> bool {
89        let hashed_value = self.current.source_hash(value);
90        self.current.contains_hash(hashed_value) || self.next.contains_hash(hashed_value)
91    }
92}