rotating_bloom_filter/lib.rs
1// SPDX-FileCopyrightText: 2025 eaon <eaon@posteo.net>
2// SPDX-License-Identifier: EUPL-1.2
3
4#![cfg_attr(docsrs, feature(doc_auto_cfg))]
5#![doc = include_str!("../README.md")]
6#![warn(missing_docs)]
7
8use fastbloom::BloomFilter;
9use rand_core::RngCore;
10
11/// A probabilistic data structure that rotates out old items to maintain recent membership.
12///
13/// `RotatingBloomFilter` provides membership testing for recently inserted items, with older items
14/// automatically rotating out. Items are guaranteed to be retained for at least the minimum
15/// retention period, and typically remain for up to twice that duration.
16///
17/// Uses a dual-buffer rotation mechanism where items are added to both buffers, and periodically
18/// the older buffer is discarded, causing old items to rotate out.
19///
20/// # Examples
21///
22/// ```rust
23/// # use rotating_bloom_filter::*;
24/// # use rand_core::OsRng;
25/// let mut filter = RotatingBloomFilter::new(0.001, 1000, &mut OsRng);
26/// filter.insert("hello");
27/// assert!(filter.contains("hello"));
28/// // After 2000 more insertions, "hello" will rotate out
29/// ```
30pub struct RotatingBloomFilter {
31 current: BloomFilter,
32 next: BloomFilter,
33 hashes: usize,
34 min_retention: usize,
35}
36
37impl RotatingBloomFilter {
38 /// Creates a new `RotatingBloomFilter` with a given false positive rate and minimum retention
39 /// period.
40 pub fn new(false_pos: f64, min_retention: usize, csprng: &mut impl RngCore) -> Self {
41 let mut seed_bytes = [0u8; 16];
42 csprng.fill_bytes(&mut seed_bytes);
43 let seed = u128::from_le_bytes(seed_bytes);
44
45 Self {
46 // The initial size of current only needs to be equal to min_retention, because at
47 // rotation it will be replaced with the settings of next, which has settings to support
48 // twice as many items.
49 current: BloomFilter::with_false_pos(false_pos)
50 .seed(&seed)
51 .expected_items(min_retention),
52 next: BloomFilter::with_false_pos(false_pos)
53 .seed(&seed)
54 .expected_items(min_retention * 2),
55 hashes: 0,
56 min_retention,
57 }
58 }
59
60 /// Inserts an item into the rotating filter.
61 ///
62 /// Items are guaranteed to be retained for at least the minimum retention period, typically
63 /// remaining for up to twice that duration before rotating out.
64 ///
65 /// Returns a tuple: `(was_in_current, was_in_next)`
66 #[inline]
67 pub fn insert(&mut self, value: &(impl std::hash::Hash + ?Sized)) -> (bool, bool) {
68 if self.hashes >= self.min_retention {
69 self.current =
70 BloomFilter::from_vec(self.next.as_slice().to_vec()).hashes(self.next.num_hashes());
71 self.next.clear();
72 self.hashes = 0;
73 };
74 let hashed_value = self.current.source_hash(value);
75 let presence = (
76 self.current.insert_hash(hashed_value),
77 self.next.insert_hash(hashed_value),
78 );
79 self.hashes += 1;
80
81 presence
82 }
83
84 /// Tests whether an item might be present in the current rotation.
85 ///
86 /// May return false positives but never false negatives.
87 #[inline]
88 pub fn contains(&self, value: &(impl std::hash::Hash + ?Sized)) -> bool {
89 let hashed_value = self.current.source_hash(value);
90 self.current.contains_hash(hashed_value) || self.next.contains_hash(hashed_value)
91 }
92}