Mercurial > lbo > hg > leveldb-rs
changeset 254:ec870dbc65f7
filter/filter_block: Update FilterBlockBuilder to not depend on inputs' lifetimes.
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Tue, 19 Sep 2017 16:01:17 +0200 |
parents | 90c0529076ab |
children | 07acc60da6ee |
files | src/filter.rs src/filter_block.rs |
diffstat | 2 files changed, 71 insertions(+), 42 deletions(-) [+] |
line wrap: on
line diff
--- a/src/filter.rs Tue Sep 19 14:54:38 2017 +0200 +++ b/src/filter.rs Tue Sep 19 16:01:17 2017 +0200 @@ -9,8 +9,9 @@ pub trait FilterPolicy { /// Returns a string identifying this policy. fn name(&self) -> &'static str; - /// Create a filter matching the given keys. - fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8>; + /// Create a filter matching the given keys. Keys are given as a long byte array that is + /// indexed by the offsets contained in key_offsets. + fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8>; /// Check whether the given key may match the filter. fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool; } @@ -23,8 +24,8 @@ fn name(&self) -> &'static str { (**self).name() } - fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8> { - (**self).create_filter(keys) + fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> { + (**self).create_filter(keys, key_offsets) } fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool { (**self).key_may_match(key, filter) @@ -45,7 +46,7 @@ fn name(&self) -> &'static str { "_" } - fn create_filter(&self, _: &Vec<&[u8]>) -> Vec<u8> { + fn create_filter(&self, _: &[u8], _: &[usize]) -> Vec<u8> { vec![] } fn key_may_match(&self, _: &[u8], _: &[u8]) -> bool { @@ -125,8 +126,8 @@ fn name(&self) -> &'static str { "leveldb.BuiltinBloomFilter2" } - fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8> { - let filter_bits = keys.len() * self.bits_per_key as usize; + fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> { + let filter_bits = key_offsets.len() * self.bits_per_key as usize; let mut filter: Vec<u8>; if filter_bits < 64 { @@ -142,16 +143,16 @@ // Encode k at the end of the filter. filter.push(self.k as u8); - for key in keys { + // Add all keys to the filter. + offset_data_iterate(keys, key_offsets, |key| { let mut h = self.bloom_hash(key); let delta = (h >> 17) | (h << 15); - for _ in 0..self.k { let bitpos = (h % adj_filter_bits) as usize; filter[bitpos / 8] |= 1 << (bitpos % 8); h = (h as u64 + delta as u64) as u32; } - } + }); filter } @@ -201,15 +202,16 @@ self.internal.name() } - fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8> { - let mut mod_keys = keys.clone(); + fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec<u8> { + let mut mod_keys = Vec::with_capacity(keys.len() - key_offsets.len() * 8); + let mut mod_key_offsets = Vec::with_capacity(key_offsets.len()); let mut i = 0; - for key in keys { - mod_keys[i] = &key[0..key.len() - 8]; - i += 1; - } - self.internal.create_filter(&mod_keys) + offset_data_iterate(keys, key_offsets, |key| { + mod_key_offsets.push(mod_keys.len()); + mod_keys.extend_from_slice(&key[0..key.len() - 8]); + }); + self.internal.create_filter(&mod_keys, &mod_key_offsets) } fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool { @@ -217,6 +219,19 @@ } } +/// offset_data_iterate iterates over the entries in data that are indexed by the offsets given in +/// offsets. This is e.g. the internal format of a FilterBlock. +fn offset_data_iterate<F: FnMut(&[u8])>(data: &[u8], offsets: &[usize], mut f: F) { + for offix in 0..offsets.len() { + let upper = if offix == offsets.len() - 1 { + data.len() + } else { + offsets[offix + 1] + }; + let piece = &data[offsets[offix]..upper]; + f(piece); + } +} #[cfg(test)] mod tests { @@ -225,18 +240,26 @@ const _BITS_PER_KEY: u32 = 12; - fn input_data() -> Vec<&'static [u8]> { - let data = vec!["abc123def456".as_bytes(), - "xxx111xxx222".as_bytes(), - "ab00cd00ab".as_bytes(), - "908070605040302010".as_bytes()]; - data + fn input_data() -> (Vec<u8>, Vec<usize>) { + let mut concat = vec![]; + let mut offs = vec![]; + + for d in ["abc123def456".as_bytes(), + "xxx111xxx222".as_bytes(), + "ab00cd00ab".as_bytes(), + "908070605040302010".as_bytes()] + .iter() { + offs.push(concat.len()); + concat.extend_from_slice(d); + } + (concat, offs) } /// Creates a filter using the keys from input_data(). fn create_filter() -> Vec<u8> { let fpol = BloomPolicy::new(_BITS_PER_KEY); - let filter = fpol.create_filter(&input_data()); + let (data, offs) = input_data(); + let filter = fpol.create_filter(&data, &offs); assert_eq!(filter, vec![194, 148, 129, 140, 192, 196, 132, 164, 8]); filter @@ -245,13 +268,15 @@ /// Creates a filter using the keys from input_data() but converted to InternalKey format. fn create_internalkey_filter() -> Vec<u8> { let fpol = Rc::new(Box::new(InternalFilterPolicy::new(BloomPolicy::new(_BITS_PER_KEY)))); - let input: Vec<Vec<u8>> = input_data() - .into_iter() - .map(|k| LookupKey::new(k, 123).internal_key().to_vec()) - .collect(); - let input_ = input.iter().map(|k| k.as_slice()).collect(); + let (data, offs) = input_data(); + let (mut intdata, mut intoffs) = (vec![], vec![]); - let filter = fpol.create_filter(&input_); + offset_data_iterate(&data, &offs, |key| { + let ikey = LookupKey::new(key, 123); + intoffs.push(intdata.len()); + intdata.extend_from_slice(ikey.internal_key()); + }); + let filter = fpol.create_filter(&intdata, &intoffs); filter } @@ -260,10 +285,11 @@ fn test_filter_bloom() { let f = create_filter(); let fp = BloomPolicy::new(_BITS_PER_KEY); + let (data, offs) = input_data(); - for k in input_data().iter() { - assert!(fp.key_may_match(k, &f)); - } + offset_data_iterate(&data, &offs, |key| { + assert!(fp.key_may_match(key, &f)); + }); } /// This test verifies that InternalFilterPolicy works correctly.
--- a/src/filter_block.rs Tue Sep 19 14:54:38 2017 +0200 +++ b/src/filter_block.rs Tue Sep 19 16:01:17 2017 +0200 @@ -24,23 +24,25 @@ /// Two consecutive filter offsets may be the same. /// /// TODO: See if we can remove the lifetime parameter. -pub struct FilterBlockBuilder<'a> { +pub struct FilterBlockBuilder { policy: BoxedFilterPolicy, // filters, concatenated filters: Vec<u8>, filter_offsets: Vec<usize>, // Reset on every start_block() - keys: Vec<&'a [u8]>, + key_offsets: Vec<usize>, + keys: Vec<u8>, } -impl<'a> FilterBlockBuilder<'a> { - pub fn new(fp: BoxedFilterPolicy) -> FilterBlockBuilder<'a> { +impl FilterBlockBuilder { + pub fn new(fp: BoxedFilterPolicy) -> FilterBlockBuilder { FilterBlockBuilder { policy: fp, // some pre-allocation - filters: Vec::with_capacity(1024 * 40), + filters: Vec::with_capacity(1024), filter_offsets: Vec::with_capacity(1024), + key_offsets: Vec::with_capacity(1024), keys: Vec::with_capacity(1024), } } @@ -49,8 +51,9 @@ self.policy.name() } - pub fn add_key(&mut self, key: &'a [u8]) { - self.keys.push(key); + pub fn add_key(&mut self, key: &[u8]) { + self.key_offsets.push(self.keys.len()); + self.keys.extend_from_slice(key); } pub fn start_block(&mut self, offset: usize) { @@ -64,15 +67,15 @@ fn generate_filter(&mut self) { self.filter_offsets.push(self.filters.len()); - if self.keys.is_empty() { return; } - let filter = self.policy.create_filter(&self.keys); + let filter = self.policy.create_filter(&self.keys, &self.key_offsets); self.filters.extend_from_slice(&filter); self.keys.clear(); + self.key_offsets.clear(); } pub fn finish(mut self) -> Vec<u8> {