Mercurial > lbo > hg > leveldb-rs
changeset 140:3ae8a50f1b7d
Remove FilterPolicy type parameter; use boxed filter policies instead
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Mon, 02 Jan 2017 15:30:40 +0100 |
parents | 794926329f9f |
children | ad36c0aba2f2 |
files | src/filter.rs src/filter_block.rs src/table_builder.rs src/table_reader.rs |
diffstat | 4 files changed, 67 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/src/filter.rs Mon Jan 02 12:06:08 2017 +0100 +++ b/src/filter.rs Mon Jan 02 15:30:40 2017 +0100 @@ -1,7 +1,11 @@ +use std::sync::Arc; + use integer_encoding::FixedInt; /// Encapsulates a filter algorithm allowing to search for keys more efficiently. -pub trait FilterPolicy: Clone { +/// Usually, policies are used as a BoxedFilterPolicy (see below), so they +/// can be easily cloned and nested. +pub trait FilterPolicy { /// Returns a string identifying this policy. fn name(&self) -> &'static str; /// Create a filter matching the given keys. @@ -10,10 +14,20 @@ fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool; } +/// A boxed and refcounted filter policy (reference-counted because a Box with unsized content +/// couldn't be cloned otherwise) +pub type BoxedFilterPolicy = Arc<Box<FilterPolicy>>; + /// Used for tables that don't have filter blocks but need a type parameter. #[derive(Clone)] pub struct NoFilterPolicy; +impl NoFilterPolicy { + pub fn new() -> BoxedFilterPolicy { + Arc::new(Box::new(NoFilterPolicy)) + } +} + impl FilterPolicy for NoFilterPolicy { fn name(&self) -> &'static str { "_" @@ -37,7 +51,13 @@ /// Beware the magic numbers... impl BloomPolicy { - pub fn new(bits_per_key: u32) -> BloomPolicy { + /// Returns a new boxed BloomPolicy. + pub fn new(bits_per_key: u32) -> BoxedFilterPolicy { + Arc::new(Box::new(BloomPolicy::new_unwrapped(bits_per_key))) + } + + /// Returns a new BloomPolicy with the given parameter. + fn new_unwrapped(bits_per_key: u32) -> BloomPolicy { let mut k = (bits_per_key as f32 * 0.69) as u32; if k < 1 { @@ -155,17 +175,17 @@ /// A User Key is u8*. /// An Internal Key is u8* u8{8} (where the second part encodes a tag and a sequence number). #[derive(Clone)] -pub struct InternalFilterPolicy<FP: FilterPolicy> { - internal: FP, +pub struct InternalFilterPolicy { + internal: BoxedFilterPolicy, } -impl<FP: FilterPolicy> InternalFilterPolicy<FP> { - pub fn new(inner: FP) -> InternalFilterPolicy<FP> { - InternalFilterPolicy { internal: inner } +impl InternalFilterPolicy { + pub fn new(inner: BoxedFilterPolicy) -> BoxedFilterPolicy { + Arc::new(Box::new(InternalFilterPolicy { internal: inner })) } } -impl<FP: FilterPolicy> FilterPolicy for InternalFilterPolicy<FP> { +impl FilterPolicy for InternalFilterPolicy { fn name(&self) -> &'static str { self.internal.name() } @@ -202,6 +222,7 @@ data } + /// Creates a filter using the keys from input_data(). fn create_filter() -> Vec<u8> { let fpol = BloomPolicy::new(_BITS_PER_KEY); let filter = fpol.create_filter(&input_data()); @@ -210,7 +231,7 @@ filter } - + /// Creates a filter using the keys from input_data() but converted to InternalKey format. fn create_internalkey_filter() -> Vec<u8> { let fpol = InternalFilterPolicy::new(BloomPolicy::new(_BITS_PER_KEY)); let input: Vec<Vec<u8>> = input_data() @@ -225,7 +246,7 @@ } #[test] - fn test_filter() { + fn test_filter_bloom() { let f = create_filter(); let fp = BloomPolicy::new(_BITS_PER_KEY); @@ -234,20 +255,20 @@ } } - // This test verifies that InternalFilterPolicy works correctly. + /// This test verifies that InternalFilterPolicy works correctly. #[test] fn test_filter_internal_keys_identical() { assert_eq!(create_filter(), create_internalkey_filter()); } #[test] - fn hash_test() { + fn test_filter_bloom_hash() { let d1 = vec![0x62]; let d2 = vec![0xc3, 0x97]; let d3 = vec![0xe2, 0x99, 0xa5]; let d4 = vec![0xe1, 0x80, 0xb9, 0x32]; - let fp = BloomPolicy::new(_BITS_PER_KEY); + let fp = BloomPolicy::new_unwrapped(_BITS_PER_KEY); assert_eq!(fp.bloom_hash(&d1), 0xef1345c4); assert_eq!(fp.bloom_hash(&d2), 0x5b663814);
--- a/src/filter_block.rs Mon Jan 02 12:06:08 2017 +0100 +++ b/src/filter_block.rs Mon Jan 02 15:30:40 2017 +0100 @@ -1,4 +1,5 @@ -use filter::FilterPolicy; +use block::BlockContents; +use filter::BoxedFilterPolicy; use std::rc::Rc; @@ -21,8 +22,8 @@ /// /// where offsets are 4 bytes, offset of offsets is 4 bytes, and log2 of FILTER_BASE is 1 byte. /// Two consecutive filter offsets may be the same. -pub struct FilterBlockBuilder<'a, FP: FilterPolicy> { - policy: FP, +pub struct FilterBlockBuilder<'a> { + policy: BoxedFilterPolicy, // filters, concatenated filters: Vec<u8>, filter_offsets: Vec<usize>, @@ -31,8 +32,8 @@ keys: Vec<&'a [u8]>, } -impl<'a, FP: FilterPolicy> FilterBlockBuilder<'a, FP> { - pub fn new(fp: FP) -> FilterBlockBuilder<'a, FP> { +impl<'a> FilterBlockBuilder<'a> { + pub fn new(fp: BoxedFilterPolicy) -> FilterBlockBuilder<'a> { FilterBlockBuilder { policy: fp, // some pre-allocation @@ -99,20 +100,20 @@ } #[derive(Clone)] -pub struct FilterBlockReader<FP: FilterPolicy> { - policy: FP, - block: Rc<Vec<u8>>, +pub struct FilterBlockReader { + policy: BoxedFilterPolicy, + block: Rc<BlockContents>, offsets_offset: usize, filter_base_lg2: u32, } -impl<FP: FilterPolicy> FilterBlockReader<FP> { - pub fn new_owned(pol: FP, data: Vec<u8>) -> FilterBlockReader<FP> { +impl FilterBlockReader { + pub fn new_owned(pol: BoxedFilterPolicy, data: Vec<u8>) -> FilterBlockReader { FilterBlockReader::new(pol, Rc::new(data)) } - pub fn new(pol: FP, data: Rc<Vec<u8>>) -> FilterBlockReader<FP> { + pub fn new(pol: BoxedFilterPolicy, data: Rc<Vec<u8>>) -> FilterBlockReader { assert!(data.len() >= 5); let fbase = data[data.len() - 1] as u32;
--- a/src/table_builder.rs Mon Jan 02 12:06:08 2017 +0100 +++ b/src/table_builder.rs Mon Jan 02 15:30:40 2017 +0100 @@ -1,7 +1,7 @@ use block::{BlockBuilder, BlockContents}; use blockhandle::BlockHandle; use cmp::InternalKeyCmp; -use filter::{FilterPolicy, NoFilterPolicy}; +use filter::{BoxedFilterPolicy, NoFilterPolicy}; use filter_block::FilterBlockBuilder; use key_types::InternalKey; use options::{CompressionType, Options}; @@ -73,7 +73,7 @@ /// the index block, padding to fill up to 40 B and at the end the 8B magic number /// 0xdb4775248b80fb57. -pub struct TableBuilder<'a, Dst: Write, FilterPol: FilterPolicy> { +pub struct TableBuilder<'a, Dst: Write> { opt: Options, dst: Dst, @@ -83,12 +83,12 @@ data_block: Option<BlockBuilder>, index_block: Option<BlockBuilder>, - filter_block: Option<FilterBlockBuilder<'a, FilterPol>>, + filter_block: Option<FilterBlockBuilder<'a>>, } -impl<'a, Dst: Write> TableBuilder<'a, Dst, NoFilterPolicy> { - pub fn new_no_filter(opt: Options, dst: Dst) -> TableBuilder<'a, Dst, NoFilterPolicy> { - TableBuilder::new(opt, dst, NoFilterPolicy) +impl<'a, Dst: Write> TableBuilder<'a, Dst> { + pub fn new_no_filter(opt: Options, dst: Dst) -> TableBuilder<'a, Dst> { + TableBuilder::new(opt, dst, NoFilterPolicy::new()) } } @@ -96,16 +96,16 @@ /// calculating checksums and bloom filters. /// It's recommended that you use InternalFilterPolicy as FilterPol, as that policy extracts the /// underlying user keys from the InternalKeys used as keys in the table. -impl<'a, Dst: Write, FilterPol: FilterPolicy> TableBuilder<'a, Dst, FilterPol> { +impl<'a, Dst: Write> TableBuilder<'a, Dst> { /// Create a new table builder. /// The comparator in opt will be wrapped in a InternalKeyCmp. - pub fn new(mut opt: Options, dst: Dst, fpol: FilterPol) -> TableBuilder<'a, Dst, FilterPol> { + pub fn new(mut opt: Options, dst: Dst, fpol: BoxedFilterPolicy) -> TableBuilder<'a, Dst> { opt.cmp = Arc::new(Box::new(InternalKeyCmp(opt.cmp.clone()))); TableBuilder::new_raw(opt, dst, fpol) } /// Like new(), but doesn't wrap the comparator in an InternalKeyCmp (for testing) - pub fn new_raw(opt: Options, dst: Dst, fpol: FilterPol) -> TableBuilder<'a, Dst, FilterPol> { + pub fn new_raw(opt: Options, dst: Dst, fpol: BoxedFilterPolicy) -> TableBuilder<'a, Dst> { TableBuilder { opt: opt.clone(), dst: dst,
--- a/src/table_reader.rs Mon Jan 02 12:06:08 2017 +0100 +++ b/src/table_reader.rs Mon Jan 02 15:30:40 2017 +0100 @@ -1,7 +1,7 @@ use block::{Block, BlockIter}; use blockhandle::BlockHandle; use cache::CacheID; -use filter::{InternalFilterPolicy, FilterPolicy}; +use filter::{BoxedFilterPolicy, InternalFilterPolicy}; use filter_block::FilterBlockReader; use key_types::InternalKey; use cmp::InternalKeyCmp; @@ -75,7 +75,7 @@ } } -pub struct Table<R: Read + Seek, FP: FilterPolicy> { +pub struct Table<R: Read + Seek> { file: R, file_size: usize, cache_id: CacheID, @@ -84,12 +84,12 @@ footer: Footer, indexblock: Block, - filters: Option<FilterBlockReader<FP>>, + filters: Option<FilterBlockReader>, } -impl<R: Read + Seek, FP: FilterPolicy> Table<R, FP> { +impl<R: Read + Seek> Table<R> { /// Creates a new table reader operating on unformatted keys (i.e., UserKey). - fn new_raw(opt: Options, mut file: R, size: usize, fp: FP) -> Result<Table<R, FP>> { + fn new_raw(opt: Options, mut file: R, size: usize, fp: BoxedFilterPolicy) -> Result<Table<R>> { let footer = try!(read_footer(&mut file, size)); let indexblock = try!(TableBlock::read_block(opt.clone(), &mut file, &footer.index)); @@ -136,11 +136,7 @@ /// Creates a new table reader operating on internal keys (i.e., InternalKey). This means that /// a different comparator (internal_key_cmp) and a different filter policy /// (InternalFilterPolicy) are used. - pub fn new(mut opt: Options, - file: R, - size: usize, - fp: FP) - -> Result<Table<R, InternalFilterPolicy<FP>>> { + pub fn new(mut opt: Options, file: R, size: usize, fp: BoxedFilterPolicy) -> Result<Table<R>> { opt.cmp = Arc::new(Box::new(InternalKeyCmp(opt.cmp.clone()))); let t = try!(Table::new_raw(opt, file, size, InternalFilterPolicy::new(fp))); Ok(t) @@ -171,7 +167,7 @@ } // Iterators read from the file; thus only one iterator can be borrowed (mutably) per scope - fn iter<'a>(&'a mut self) -> TableIterator<'a, R, FP> { + fn iter<'a>(&'a mut self) -> TableIterator<'a, R> { let iter = TableIterator { current_block: self.indexblock.iter(), init: false, @@ -224,8 +220,8 @@ /// This iterator is a "TwoLevelIterator"; it uses an index block in order to get an offset hint /// into the data blocks. -pub struct TableIterator<'a, R: 'a + Read + Seek, FP: 'a + FilterPolicy> { - table: &'a mut Table<R, FP>, +pub struct TableIterator<'a, R: 'a + Read + Seek> { + table: &'a mut Table<R>, opt: Options, // We're not using Option<BlockIter>, but instead a separate `init` field. That makes it easier // working with the current block in the iterator methods (no borrowing annoyance as with @@ -236,7 +232,7 @@ index_block: BlockIter, } -impl<'a, R: Read + Seek, FP: FilterPolicy> TableIterator<'a, R, FP> { +impl<'a, R: Read + Seek> TableIterator<'a, R> { // Skips to the entry referenced by the next entry in the index block. // This is called once a block has run out of entries. // Err means corruption or I/O error; Ok(true) means a new block was loaded; Ok(false) means @@ -262,7 +258,7 @@ } } -impl<'a, R: Read + Seek, FP: FilterPolicy> Iterator for TableIterator<'a, R, FP> { +impl<'a, R: Read + Seek> Iterator for TableIterator<'a, R> { type Item = (Vec<u8>, Vec<u8>); fn next(&mut self) -> Option<Self::Item> { @@ -294,7 +290,7 @@ } } -impl<'a, R: Read + Seek, FP: FilterPolicy> LdbIterator for TableIterator<'a, R, FP> { +impl<'a, R: Read + Seek> LdbIterator for TableIterator<'a, R> { // A call to valid() after seeking is necessary to ensure that the seek worked (e.g., no error // while reading from disk) fn seek(&mut self, to: &[u8]) {