automerge/rust/automerge/src/sync/bloom.rs
Alex Good dd3c6d1303
Move rust workspace into ./rust
After some discussion with PVH I realise that the repo structure in the
last reorg was very rust-centric. In an attempt to put each language on
a level footing move the rust code and project files into ./rust
2022-10-16 19:55:51 +01:00

160 lines
4.7 KiB
Rust

use std::borrow::Borrow;
use crate::storage::parse;
use crate::ChangeHash;
// These constants correspond to a 1% false positive rate. The values can be changed without
// breaking compatibility of the network protocol, since the parameters used for a particular
// Bloom filter are encoded in the wire format.
const BITS_PER_ENTRY: u32 = 10;
const NUM_PROBES: u32 = 7;
#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize)]
pub struct BloomFilter {
num_entries: u32,
num_bits_per_entry: u32,
num_probes: u32,
bits: Vec<u8>,
}
impl Default for BloomFilter {
fn default() -> Self {
BloomFilter {
num_entries: 0,
num_bits_per_entry: BITS_PER_ENTRY,
num_probes: NUM_PROBES,
bits: Vec::new(),
}
}
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum ParseError {
#[error(transparent)]
Leb128(#[from] parse::leb128::Error),
}
impl BloomFilter {
pub fn to_bytes(&self) -> Vec<u8> {
let mut buf = Vec::new();
if self.num_entries != 0 {
leb128::write::unsigned(&mut buf, self.num_entries as u64).unwrap();
leb128::write::unsigned(&mut buf, self.num_bits_per_entry as u64).unwrap();
leb128::write::unsigned(&mut buf, self.num_probes as u64).unwrap();
buf.extend(&self.bits);
}
buf
}
pub(crate) fn parse(input: parse::Input<'_>) -> parse::ParseResult<'_, Self, ParseError> {
if input.is_empty() {
Ok((input, Self::default()))
} else {
let (i, num_entries) = parse::leb128_u32(input)?;
let (i, num_bits_per_entry) = parse::leb128_u32(i)?;
let (i, num_probes) = parse::leb128_u32(i)?;
let (i, bits) = parse::take_n(bits_capacity(num_entries, num_bits_per_entry), i)?;
Ok((
i,
Self {
num_entries,
num_bits_per_entry,
num_probes,
bits: bits.to_vec(),
},
))
}
}
fn get_probes(&self, hash: &ChangeHash) -> Vec<u32> {
let hash_bytes = hash.0;
let modulo = 8 * self.bits.len() as u32;
let mut x =
u32::from_le_bytes([hash_bytes[0], hash_bytes[1], hash_bytes[2], hash_bytes[3]])
% modulo;
let mut y =
u32::from_le_bytes([hash_bytes[4], hash_bytes[5], hash_bytes[6], hash_bytes[7]])
% modulo;
let z = u32::from_le_bytes([hash_bytes[8], hash_bytes[9], hash_bytes[10], hash_bytes[11]])
% modulo;
let mut probes = Vec::with_capacity(self.num_probes as usize);
probes.push(x);
for _ in 1..self.num_probes {
x = (x + y) % modulo;
y = (y + z) % modulo;
probes.push(x);
}
probes
}
fn add_hash(&mut self, hash: &ChangeHash) {
for probe in self.get_probes(hash) {
self.set_bit(probe as usize);
}
}
fn set_bit(&mut self, probe: usize) {
if let Some(byte) = self.bits.get_mut(probe >> 3) {
*byte |= 1 << (probe & 7);
}
}
fn get_bit(&self, probe: usize) -> Option<u8> {
self.bits
.get(probe >> 3)
.map(|byte| byte & (1 << (probe & 7)))
}
pub fn contains_hash(&self, hash: &ChangeHash) -> bool {
if self.num_entries == 0 {
false
} else {
for probe in self.get_probes(hash) {
if let Some(bit) = self.get_bit(probe as usize) {
if bit == 0 {
return false;
}
}
}
true
}
}
pub fn from_hashes<H: Borrow<ChangeHash>>(hashes: impl ExactSizeIterator<Item = H>) -> Self {
let num_entries = hashes.len() as u32;
let num_bits_per_entry = BITS_PER_ENTRY;
let num_probes = NUM_PROBES;
let bits = vec![0; bits_capacity(num_entries, num_bits_per_entry) as usize];
let mut filter = Self {
num_entries,
num_bits_per_entry,
num_probes,
bits,
};
for hash in hashes {
filter.add_hash(hash.borrow());
}
filter
}
}
fn bits_capacity(num_entries: u32, num_bits_per_entry: u32) -> usize {
let f = ((f64::from(num_entries) * f64::from(num_bits_per_entry)) / 8_f64).ceil();
f as usize
}
#[derive(thiserror::Error, Debug)]
#[error("{0}")]
pub struct DecodeError(String);
impl TryFrom<&[u8]> for BloomFilter {
type Error = DecodeError;
fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
Self::parse(parse::Input::new(bytes))
.map(|(_, b)| b)
.map_err(|e| DecodeError(e.to_string()))
}
}