changeset 140:3ae8a50f1b7d

Remove FilterPolicy type parameter; use boxed filter policies instead
author Lewin Bormann <lbo@spheniscida.de>
date Mon, 02 Jan 2017 15:30:40 +0100
parents 794926329f9f
children ad36c0aba2f2
files src/filter.rs src/filter_block.rs src/table_builder.rs src/table_reader.rs
diffstat 4 files changed, 67 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/src/filter.rs	Mon Jan 02 12:06:08 2017 +0100
+++ b/src/filter.rs	Mon Jan 02 15:30:40 2017 +0100
@@ -1,7 +1,11 @@
+use std::sync::Arc;
+
 use integer_encoding::FixedInt;
 
 /// Encapsulates a filter algorithm allowing to search for keys more efficiently.
-pub trait FilterPolicy: Clone {
+/// Usually, policies are used as a BoxedFilterPolicy (see below), so they
+/// can be easily cloned and nested.
+pub trait FilterPolicy {
     /// Returns a string identifying this policy.
     fn name(&self) -> &'static str;
     /// Create a filter matching the given keys.
@@ -10,10 +14,20 @@
     fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool;
 }
 
+/// A boxed and refcounted filter policy (reference-counted because a Box with unsized content
+/// couldn't be cloned otherwise)
+pub type BoxedFilterPolicy = Arc<Box<FilterPolicy>>;
+
 /// Used for tables that don't have filter blocks but need a type parameter.
 #[derive(Clone)]
 pub struct NoFilterPolicy;
 
+impl NoFilterPolicy {
+    pub fn new() -> BoxedFilterPolicy {
+        Arc::new(Box::new(NoFilterPolicy))
+    }
+}
+
 impl FilterPolicy for NoFilterPolicy {
     fn name(&self) -> &'static str {
         "_"
@@ -37,7 +51,13 @@
 
 /// Beware the magic numbers...
 impl BloomPolicy {
-    pub fn new(bits_per_key: u32) -> BloomPolicy {
+    /// Returns a new boxed BloomPolicy.
+    pub fn new(bits_per_key: u32) -> BoxedFilterPolicy {
+        Arc::new(Box::new(BloomPolicy::new_unwrapped(bits_per_key)))
+    }
+
+    /// Returns a new BloomPolicy with the given parameter.
+    fn new_unwrapped(bits_per_key: u32) -> BloomPolicy {
         let mut k = (bits_per_key as f32 * 0.69) as u32;
 
         if k < 1 {
@@ -155,17 +175,17 @@
 /// A User Key is u8*.
 /// An Internal Key is u8* u8{8} (where the second part encodes a tag and a sequence number).
 #[derive(Clone)]
-pub struct InternalFilterPolicy<FP: FilterPolicy> {
-    internal: FP,
+pub struct InternalFilterPolicy {
+    internal: BoxedFilterPolicy,
 }
 
-impl<FP: FilterPolicy> InternalFilterPolicy<FP> {
-    pub fn new(inner: FP) -> InternalFilterPolicy<FP> {
-        InternalFilterPolicy { internal: inner }
+impl InternalFilterPolicy {
+    pub fn new(inner: BoxedFilterPolicy) -> BoxedFilterPolicy {
+        Arc::new(Box::new(InternalFilterPolicy { internal: inner }))
     }
 }
 
-impl<FP: FilterPolicy> FilterPolicy for InternalFilterPolicy<FP> {
+impl FilterPolicy for InternalFilterPolicy {
     fn name(&self) -> &'static str {
         self.internal.name()
     }
@@ -202,6 +222,7 @@
         data
     }
 
+    /// Creates a filter using the keys from input_data().
     fn create_filter() -> Vec<u8> {
         let fpol = BloomPolicy::new(_BITS_PER_KEY);
         let filter = fpol.create_filter(&input_data());
@@ -210,7 +231,7 @@
         filter
     }
 
-
+    /// Creates a filter using the keys from input_data() but converted to InternalKey format.
     fn create_internalkey_filter() -> Vec<u8> {
         let fpol = InternalFilterPolicy::new(BloomPolicy::new(_BITS_PER_KEY));
         let input: Vec<Vec<u8>> = input_data()
@@ -225,7 +246,7 @@
     }
 
     #[test]
-    fn test_filter() {
+    fn test_filter_bloom() {
         let f = create_filter();
         let fp = BloomPolicy::new(_BITS_PER_KEY);
 
@@ -234,20 +255,20 @@
         }
     }
 
-    // This test verifies that InternalFilterPolicy works correctly.
+    /// This test verifies that InternalFilterPolicy works correctly.
     #[test]
     fn test_filter_internal_keys_identical() {
         assert_eq!(create_filter(), create_internalkey_filter());
     }
 
     #[test]
-    fn hash_test() {
+    fn test_filter_bloom_hash() {
         let d1 = vec![0x62];
         let d2 = vec![0xc3, 0x97];
         let d3 = vec![0xe2, 0x99, 0xa5];
         let d4 = vec![0xe1, 0x80, 0xb9, 0x32];
 
-        let fp = BloomPolicy::new(_BITS_PER_KEY);
+        let fp = BloomPolicy::new_unwrapped(_BITS_PER_KEY);
 
         assert_eq!(fp.bloom_hash(&d1), 0xef1345c4);
         assert_eq!(fp.bloom_hash(&d2), 0x5b663814);
--- a/src/filter_block.rs	Mon Jan 02 12:06:08 2017 +0100
+++ b/src/filter_block.rs	Mon Jan 02 15:30:40 2017 +0100
@@ -1,4 +1,5 @@
-use filter::FilterPolicy;
+use block::BlockContents;
+use filter::BoxedFilterPolicy;
 
 use std::rc::Rc;
 
@@ -21,8 +22,8 @@
 ///
 /// where offsets are 4 bytes, offset of offsets is 4 bytes, and log2 of FILTER_BASE is 1 byte.
 /// Two consecutive filter offsets may be the same.
-pub struct FilterBlockBuilder<'a, FP: FilterPolicy> {
-    policy: FP,
+pub struct FilterBlockBuilder<'a> {
+    policy: BoxedFilterPolicy,
     // filters, concatenated
     filters: Vec<u8>,
     filter_offsets: Vec<usize>,
@@ -31,8 +32,8 @@
     keys: Vec<&'a [u8]>,
 }
 
-impl<'a, FP: FilterPolicy> FilterBlockBuilder<'a, FP> {
-    pub fn new(fp: FP) -> FilterBlockBuilder<'a, FP> {
+impl<'a> FilterBlockBuilder<'a> {
+    pub fn new(fp: BoxedFilterPolicy) -> FilterBlockBuilder<'a> {
         FilterBlockBuilder {
             policy: fp,
             // some pre-allocation
@@ -99,20 +100,20 @@
 }
 
 #[derive(Clone)]
-pub struct FilterBlockReader<FP: FilterPolicy> {
-    policy: FP,
-    block: Rc<Vec<u8>>,
+pub struct FilterBlockReader {
+    policy: BoxedFilterPolicy,
+    block: Rc<BlockContents>,
 
     offsets_offset: usize,
     filter_base_lg2: u32,
 }
 
-impl<FP: FilterPolicy> FilterBlockReader<FP> {
-    pub fn new_owned(pol: FP, data: Vec<u8>) -> FilterBlockReader<FP> {
+impl FilterBlockReader {
+    pub fn new_owned(pol: BoxedFilterPolicy, data: Vec<u8>) -> FilterBlockReader {
         FilterBlockReader::new(pol, Rc::new(data))
     }
 
-    pub fn new(pol: FP, data: Rc<Vec<u8>>) -> FilterBlockReader<FP> {
+    pub fn new(pol: BoxedFilterPolicy, data: Rc<Vec<u8>>) -> FilterBlockReader {
         assert!(data.len() >= 5);
 
         let fbase = data[data.len() - 1] as u32;
--- a/src/table_builder.rs	Mon Jan 02 12:06:08 2017 +0100
+++ b/src/table_builder.rs	Mon Jan 02 15:30:40 2017 +0100
@@ -1,7 +1,7 @@
 use block::{BlockBuilder, BlockContents};
 use blockhandle::BlockHandle;
 use cmp::InternalKeyCmp;
-use filter::{FilterPolicy, NoFilterPolicy};
+use filter::{BoxedFilterPolicy, NoFilterPolicy};
 use filter_block::FilterBlockBuilder;
 use key_types::InternalKey;
 use options::{CompressionType, Options};
@@ -73,7 +73,7 @@
 /// the index block, padding to fill up to 40 B and at the end the 8B magic number
 /// 0xdb4775248b80fb57.
 
-pub struct TableBuilder<'a, Dst: Write, FilterPol: FilterPolicy> {
+pub struct TableBuilder<'a, Dst: Write> {
     opt: Options,
     dst: Dst,
 
@@ -83,12 +83,12 @@
 
     data_block: Option<BlockBuilder>,
     index_block: Option<BlockBuilder>,
-    filter_block: Option<FilterBlockBuilder<'a, FilterPol>>,
+    filter_block: Option<FilterBlockBuilder<'a>>,
 }
 
-impl<'a, Dst: Write> TableBuilder<'a, Dst, NoFilterPolicy> {
-    pub fn new_no_filter(opt: Options, dst: Dst) -> TableBuilder<'a, Dst, NoFilterPolicy> {
-        TableBuilder::new(opt, dst, NoFilterPolicy)
+impl<'a, Dst: Write> TableBuilder<'a, Dst> {
+    pub fn new_no_filter(opt: Options, dst: Dst) -> TableBuilder<'a, Dst> {
+        TableBuilder::new(opt, dst, NoFilterPolicy::new())
     }
 }
 
@@ -96,16 +96,16 @@
 /// calculating checksums and bloom filters.
 /// It's recommended that you use InternalFilterPolicy as FilterPol, as that policy extracts the
 /// underlying user keys from the InternalKeys used as keys in the table.
-impl<'a, Dst: Write, FilterPol: FilterPolicy> TableBuilder<'a, Dst, FilterPol> {
+impl<'a, Dst: Write> TableBuilder<'a, Dst> {
     /// Create a new table builder.
     /// The comparator in opt will be wrapped in a InternalKeyCmp.
-    pub fn new(mut opt: Options, dst: Dst, fpol: FilterPol) -> TableBuilder<'a, Dst, FilterPol> {
+    pub fn new(mut opt: Options, dst: Dst, fpol: BoxedFilterPolicy) -> TableBuilder<'a, Dst> {
         opt.cmp = Arc::new(Box::new(InternalKeyCmp(opt.cmp.clone())));
         TableBuilder::new_raw(opt, dst, fpol)
     }
 
     /// Like new(), but doesn't wrap the comparator in an InternalKeyCmp (for testing)
-    pub fn new_raw(opt: Options, dst: Dst, fpol: FilterPol) -> TableBuilder<'a, Dst, FilterPol> {
+    pub fn new_raw(opt: Options, dst: Dst, fpol: BoxedFilterPolicy) -> TableBuilder<'a, Dst> {
         TableBuilder {
             opt: opt.clone(),
             dst: dst,
--- a/src/table_reader.rs	Mon Jan 02 12:06:08 2017 +0100
+++ b/src/table_reader.rs	Mon Jan 02 15:30:40 2017 +0100
@@ -1,7 +1,7 @@
 use block::{Block, BlockIter};
 use blockhandle::BlockHandle;
 use cache::CacheID;
-use filter::{InternalFilterPolicy, FilterPolicy};
+use filter::{BoxedFilterPolicy, InternalFilterPolicy};
 use filter_block::FilterBlockReader;
 use key_types::InternalKey;
 use cmp::InternalKeyCmp;
@@ -75,7 +75,7 @@
     }
 }
 
-pub struct Table<R: Read + Seek, FP: FilterPolicy> {
+pub struct Table<R: Read + Seek> {
     file: R,
     file_size: usize,
     cache_id: CacheID,
@@ -84,12 +84,12 @@
 
     footer: Footer,
     indexblock: Block,
-    filters: Option<FilterBlockReader<FP>>,
+    filters: Option<FilterBlockReader>,
 }
 
-impl<R: Read + Seek, FP: FilterPolicy> Table<R, FP> {
+impl<R: Read + Seek> Table<R> {
     /// Creates a new table reader operating on unformatted keys (i.e., UserKey).
-    fn new_raw(opt: Options, mut file: R, size: usize, fp: FP) -> Result<Table<R, FP>> {
+    fn new_raw(opt: Options, mut file: R, size: usize, fp: BoxedFilterPolicy) -> Result<Table<R>> {
         let footer = try!(read_footer(&mut file, size));
 
         let indexblock = try!(TableBlock::read_block(opt.clone(), &mut file, &footer.index));
@@ -136,11 +136,7 @@
     /// Creates a new table reader operating on internal keys (i.e., InternalKey). This means that
     /// a different comparator (internal_key_cmp) and a different filter policy
     /// (InternalFilterPolicy) are used.
-    pub fn new(mut opt: Options,
-               file: R,
-               size: usize,
-               fp: FP)
-               -> Result<Table<R, InternalFilterPolicy<FP>>> {
+    pub fn new(mut opt: Options, file: R, size: usize, fp: BoxedFilterPolicy) -> Result<Table<R>> {
         opt.cmp = Arc::new(Box::new(InternalKeyCmp(opt.cmp.clone())));
         let t = try!(Table::new_raw(opt, file, size, InternalFilterPolicy::new(fp)));
         Ok(t)
@@ -171,7 +167,7 @@
     }
 
     // Iterators read from the file; thus only one iterator can be borrowed (mutably) per scope
-    fn iter<'a>(&'a mut self) -> TableIterator<'a, R, FP> {
+    fn iter<'a>(&'a mut self) -> TableIterator<'a, R> {
         let iter = TableIterator {
             current_block: self.indexblock.iter(),
             init: false,
@@ -224,8 +220,8 @@
 
 /// This iterator is a "TwoLevelIterator"; it uses an index block in order to get an offset hint
 /// into the data blocks.
-pub struct TableIterator<'a, R: 'a + Read + Seek, FP: 'a + FilterPolicy> {
-    table: &'a mut Table<R, FP>,
+pub struct TableIterator<'a, R: 'a + Read + Seek> {
+    table: &'a mut Table<R>,
     opt: Options,
     // We're not using Option<BlockIter>, but instead a separate `init` field. That makes it easier
     // working with the current block in the iterator methods (no borrowing annoyance as with
@@ -236,7 +232,7 @@
     index_block: BlockIter,
 }
 
-impl<'a, R: Read + Seek, FP: FilterPolicy> TableIterator<'a, R, FP> {
+impl<'a, R: Read + Seek> TableIterator<'a, R> {
     // Skips to the entry referenced by the next entry in the index block.
     // This is called once a block has run out of entries.
     // Err means corruption or I/O error; Ok(true) means a new block was loaded; Ok(false) means
@@ -262,7 +258,7 @@
     }
 }
 
-impl<'a, R: Read + Seek, FP: FilterPolicy> Iterator for TableIterator<'a, R, FP> {
+impl<'a, R: Read + Seek> Iterator for TableIterator<'a, R> {
     type Item = (Vec<u8>, Vec<u8>);
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -294,7 +290,7 @@
     }
 }
 
-impl<'a, R: Read + Seek, FP: FilterPolicy> LdbIterator for TableIterator<'a, R, FP> {
+impl<'a, R: Read + Seek> LdbIterator for TableIterator<'a, R> {
     // A call to valid() after seeking is necessary to ensure that the seek worked (e.g., no error
     // while reading from disk)
     fn seek(&mut self, to: &[u8]) {