Mercurial > lbo > hg > sstable

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cmp.rs	Tue Jan 03 19:45:33 2017 +0100
@@ -0,0 +1,161 @@
+use key_types::{self, LookupKey};
+use types;
+
+use std::cmp::Ordering;
+use std::sync::Arc;
+
+/// Comparator trait, supporting types that can be nested (i.e., add additional functionality on
+/// top of an inner comparator)
+pub trait Cmp {
+    fn cmp(&self, &[u8], &[u8]) -> Ordering;
+    fn find_shortest_sep(&self, &[u8], &[u8]) -> Vec<u8>;
+    fn find_short_succ(&self, &[u8]) -> Vec<u8>;
+}
+
+/// Lexical comparator.
+#[derive(Clone)]
+pub struct DefaultCmp;
+
+impl Cmp for DefaultCmp {
+    fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering {
+        a.cmp(b)
+    }
+
+    fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec<u8> {
+        if a == b {
+            return a.to_vec();
+        }
+
+        let min = if a.len() < b.len() { a.len() } else { b.len() };
+        let mut diff_at = 0;
+
+        while diff_at < min && a[diff_at] == b[diff_at] {
+            diff_at += 1;
+        }
+
+        while diff_at < min {
+            let diff = a[diff_at];
+            if diff < 0xff && diff + 1 < b[diff_at] {
+                let mut sep = Vec::from(&a[0..diff_at + 1]);
+                sep[diff_at] += 1;
+                assert!(self.cmp(&sep, b) == Ordering::Less);
+                return sep;
+            }
+
+            diff_at += 1;
+        }
+        return a.to_vec();
+    }
+
+    fn find_short_succ(&self, a: &[u8]) -> Vec<u8> {
+        let mut result = a.to_vec();
+        for i in 0..a.len() {
+            if a[i] != 0xff {
+                result[i] += 1;
+                result.resize(i + 1, 0);
+                return result;
+            }
+        }
+        // Rare path
+        result.push(255);
+        return result;
+    }
+}
+
+/// Same as memtable_key_cmp, but for InternalKeys.
+#[derive(Clone)]
+pub struct InternalKeyCmp(pub Arc<Box<Cmp>>);
+
+impl Cmp for InternalKeyCmp {
+    fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering {
+        let (_, seqa, keya) = key_types::parse_internal_key(a);
+        let (_, seqb, keyb) = key_types::parse_internal_key(b);
+
+        match self.0.cmp(keya, keyb) {
+            Ordering::Less => Ordering::Less,
+            Ordering::Greater => Ordering::Greater,
+            // reverse comparison!
+            Ordering::Equal => seqb.cmp(&seqa),
+        }
+    }
+
+    fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec<u8> {
+        let (_, seqa, keya) = key_types::parse_internal_key(a);
+        let (_, _, keyb) = key_types::parse_internal_key(b);
+
+        let sep: Vec<u8> = self.0.find_shortest_sep(keya, keyb);
+
+        if sep.len() < keya.len() && self.0.cmp(keya, &sep) == Ordering::Less {
+            return LookupKey::new(&sep, types::MAX_SEQUENCE_NUMBER).internal_key().to_vec();
+        }
+
+        return LookupKey::new(&sep, seqa).internal_key().to_vec();
+    }
+
+    fn find_short_succ(&self, a: &[u8]) -> Vec<u8> {
+        let (_, seq, key) = key_types::parse_internal_key(a);
+        let succ: Vec<u8> = self.0.find_short_succ(key);
+        return LookupKey::new(&succ, seq).internal_key().to_vec();
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use key_types::LookupKey;
+    use types;
+
+    use std::sync::Arc;
+
+    #[test]
+    fn test_cmp_defaultcmp_shortest_sep() {
+        assert_eq!(DefaultCmp.find_shortest_sep("abcd".as_bytes(), "abcf".as_bytes()),
+                   "abce".as_bytes());
+        assert_eq!(DefaultCmp.find_shortest_sep("abc".as_bytes(), "acd".as_bytes()),
+                   "abc".as_bytes());
+        assert_eq!(DefaultCmp.find_shortest_sep("abcdefghi".as_bytes(), "abcffghi".as_bytes()),
+                   "abce".as_bytes());
+        assert_eq!(DefaultCmp.find_shortest_sep("a".as_bytes(), "a".as_bytes()),
+                   "a".as_bytes());
+        assert_eq!(DefaultCmp.find_shortest_sep("a".as_bytes(), "b".as_bytes()),
+                   "a".as_bytes());
+        assert_eq!(DefaultCmp.find_shortest_sep("abc".as_bytes(), "zzz".as_bytes()),
+                   "b".as_bytes());
+        assert_eq!(DefaultCmp.find_shortest_sep("".as_bytes(), "".as_bytes()),
+                   "".as_bytes());
+    }
+
+    #[test]
+    fn test_cmp_defaultcmp_short_succ() {
+        assert_eq!(DefaultCmp.find_short_succ("abcd".as_bytes()),
+                   "b".as_bytes());
+        assert_eq!(DefaultCmp.find_short_succ("zzzz".as_bytes()),
+                   "{".as_bytes());
+        assert_eq!(DefaultCmp.find_short_succ(&[]), &[0xff]);
+        assert_eq!(DefaultCmp.find_short_succ(&[0xff, 0xff, 0xff]),
+                   &[0xff, 0xff, 0xff, 0xff]);
+    }
+
+    #[test]
+    fn test_cmp_internalkeycmp_shortest_sep() {
+        let cmp = InternalKeyCmp(Arc::new(Box::new(DefaultCmp)));
+        assert_eq!(cmp.find_shortest_sep(LookupKey::new("abcd".as_bytes(), 1).internal_key(),
+                                         LookupKey::new("abcf".as_bytes(), 2).internal_key()),
+                   LookupKey::new("abce".as_bytes(), 1).internal_key());
+        assert_eq!(cmp.find_shortest_sep(LookupKey::new("abc".as_bytes(), 1).internal_key(),
+                                         LookupKey::new("zzz".as_bytes(), 2).internal_key()),
+                   LookupKey::new("b".as_bytes(), types::MAX_SEQUENCE_NUMBER).internal_key());
+        assert_eq!(cmp.find_shortest_sep(LookupKey::new("abc".as_bytes(), 1).internal_key(),
+                                         LookupKey::new("acd".as_bytes(), 2).internal_key()),
+                   LookupKey::new("abc".as_bytes(), 1).internal_key());
+        assert_eq!(cmp.find_shortest_sep(LookupKey::new("abc".as_bytes(), 1).internal_key(),
+                                         LookupKey::new("abe".as_bytes(), 2).internal_key()),
+                   LookupKey::new("abd".as_bytes(), 1).internal_key());
+        assert_eq!(cmp.find_shortest_sep(LookupKey::new("".as_bytes(), 1).internal_key(),
+                                         LookupKey::new("".as_bytes(), 2).internal_key()),
+                   LookupKey::new("".as_bytes(), 1).internal_key());
+        assert_eq!(cmp.find_shortest_sep(LookupKey::new("abc".as_bytes(), 2).internal_key(),
+                                         LookupKey::new("abc".as_bytes(), 2).internal_key()),
+                   LookupKey::new("abc".as_bytes(), 2).internal_key());
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/filter.rs	Tue Jan 03 19:45:33 2017 +0100
@@ -0,0 +1,283 @@
+#![allow(dead_code)]
+
+//! This module is yet to be used, but will speed up table lookups.
+//!
+
+use std::sync::Arc;
+
+use integer_encoding::FixedInt;
+
+/// Encapsulates a filter algorithm allowing to search for keys more efficiently.
+/// Usually, policies are used as a BoxedFilterPolicy (see below), so they
+/// can be easily cloned and nested.
+pub trait FilterPolicy {
+    /// Returns a string identifying this policy.
+    fn name(&self) -> &'static str;
+    /// Create a filter matching the given keys.
+    fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8>;
+    /// Check whether the given key may match the filter.
+    fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool;
+}
+
+/// A boxed and refcounted filter policy (reference-counted because a Box with unsized content
+/// couldn't be cloned otherwise)
+pub type BoxedFilterPolicy = Arc<Box<FilterPolicy>>;
+
+/// Used for tables that don't have filter blocks but need a type parameter.
+#[derive(Clone)]
+pub struct NoFilterPolicy;
+
+impl NoFilterPolicy {
+    pub fn new() -> BoxedFilterPolicy {
+        Arc::new(Box::new(NoFilterPolicy))
+    }
+}
+
+impl FilterPolicy for NoFilterPolicy {
+    fn name(&self) -> &'static str {
+        "_"
+    }
+    fn create_filter(&self, _: &Vec<&[u8]>) -> Vec<u8> {
+        vec![]
+    }
+    fn key_may_match(&self, _: &[u8], _: &[u8]) -> bool {
+        true
+    }
+}
+
+const BLOOM_SEED: u32 = 0xbc9f1d34;
+
+/// A filter policy using a bloom filter internally.
+#[derive(Clone)]
+pub struct BloomPolicy {
+    bits_per_key: u32,
+    k: u32,
+}
+
+/// Beware the magic numbers...
+impl BloomPolicy {
+    /// Returns a new boxed BloomPolicy.
+    pub fn new(bits_per_key: u32) -> BoxedFilterPolicy {
+        Arc::new(Box::new(BloomPolicy::new_unwrapped(bits_per_key)))
+    }
+
+    /// Returns a new BloomPolicy with the given parameter.
+    fn new_unwrapped(bits_per_key: u32) -> BloomPolicy {
+        let mut k = (bits_per_key as f32 * 0.69) as u32;
+
+        if k < 1 {
+            k = 1;
+        } else if k > 30 {
+            k = 30;
+        }
+
+        BloomPolicy {
+            bits_per_key: bits_per_key,
+            k: k,
+        }
+    }
+
+    fn bloom_hash(&self, data: &[u8]) -> u32 {
+        let m: u32 = 0xc6a4a793;
+        let r: u32 = 24;
+
+        let mut ix = 0;
+        let limit = data.len();
+
+        let mut h: u32 = BLOOM_SEED ^ (limit as u64 * m as u64) as u32;
+
+        while ix + 4 <= limit {
+            let w = u32::decode_fixed(&data[ix..ix + 4]);
+            ix += 4;
+
+            h = (h as u64 + w as u64) as u32;
+            h = (h as u64 * m as u64) as u32;
+            h ^= h >> 16;
+        }
+
+        // Process left-over bytes
+        assert!(limit - ix < 4);
+
+        if limit - ix > 0 {
+            let mut i = 0;
+
+            for b in data[ix..].iter() {
+                h += (*b as u32) << (8 * i);
+                i += 1;
+            }
+
+            h = (h as u64 * m as u64) as u32;
+            h ^= h >> r;
+        }
+        h
+    }
+}
+
+impl FilterPolicy for BloomPolicy {
+    fn name(&self) -> &'static str {
+        "leveldb.BuiltinBloomFilter2"
+    }
+    fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8> {
+        let filter_bits = keys.len() * self.bits_per_key as usize;
+        let mut filter = Vec::new();
+
+        if filter_bits < 64 {
+            // Preallocate, then resize
+            filter.reserve(8 + 1);
+            filter.resize(8, 0 as u8);
+        } else {
+            // Preallocate, then resize
+            filter.reserve(1 + ((filter_bits + 7) / 8));
+            filter.resize((filter_bits + 7) / 8, 0);
+        }
+
+        let adj_filter_bits = (filter.len() * 8) as u32;
+
+        // Encode k at the end of the filter.
+        filter.push(self.k as u8);
+
+        for key in keys {
+            let mut h = self.bloom_hash(key);
+            let delta = (h >> 17) | (h << 15);
+
+            for _ in 0..self.k {
+                let bitpos = (h % adj_filter_bits) as usize;
+                filter[bitpos / 8] |= 1 << (bitpos % 8);
+                h = (h as u64 + delta as u64) as u32;
+            }
+        }
+
+        filter
+    }
+    fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool {
+        if filter.len() == 0 {
+            return true;
+        }
+
+        let bits = (filter.len() - 1) as u32 * 8;
+        let k = filter[filter.len() - 1];
+        let filter_adj = &filter[0..filter.len() - 1];
+
+        if k > 30 {
+            return true;
+        }
+
+        let mut h = self.bloom_hash(key);
+        let delta = (h >> 17) | (h << 15);
+        for _ in 0..k {
+            let bitpos = (h % bits) as usize;
+            if (filter_adj[bitpos / 8] & (1 << (bitpos % 8))) == 0 {
+                return false;
+            }
+            h = (h as u64 + delta as u64) as u32;
+        }
+        true
+    }
+}
+
+/// A filter policy wrapping another policy; extracting the user key from internal keys for all
+/// operations.
+/// A User Key is u8*.
+/// An Internal Key is u8* u8{8} (where the second part encodes a tag and a sequence number).
+#[derive(Clone)]
+pub struct InternalFilterPolicy {
+    internal: BoxedFilterPolicy,
+}
+
+impl InternalFilterPolicy {
+    pub fn new(inner: BoxedFilterPolicy) -> BoxedFilterPolicy {
+        Arc::new(Box::new(InternalFilterPolicy { internal: inner }))
+    }
+}
+
+impl FilterPolicy for InternalFilterPolicy {
+    fn name(&self) -> &'static str {
+        self.internal.name()
+    }
+
+    fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8> {
+        let mut mod_keys = keys.clone();
+        let mut i = 0;
+
+        for key in keys {
+            mod_keys[i] = &key[0..key.len() - 8];
+            i += 1;
+        }
+        self.internal.create_filter(&mod_keys)
+    }
+
+    fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool {
+        self.internal.key_may_match(&key[0..key.len() - 8], filter)
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use key_types::LookupKey;
+
+    const _BITS_PER_KEY: u32 = 12;
+
+    fn input_data() -> Vec<&'static [u8]> {
+        let data = vec!["abc123def456".as_bytes(),
+                        "xxx111xxx222".as_bytes(),
+                        "ab00cd00ab".as_bytes(),
+                        "908070605040302010".as_bytes()];
+        data
+    }
+
+    /// Creates a filter using the keys from input_data().
+    fn create_filter() -> Vec<u8> {
+        let fpol = BloomPolicy::new(_BITS_PER_KEY);
+        let filter = fpol.create_filter(&input_data());
+
+        assert_eq!(filter, vec![194, 148, 129, 140, 192, 196, 132, 164, 8]);
+        filter
+    }
+
+    /// Creates a filter using the keys from input_data() but converted to InternalKey format.
+    fn create_internalkey_filter() -> Vec<u8> {
+        let fpol = InternalFilterPolicy::new(BloomPolicy::new(_BITS_PER_KEY));
+        let input: Vec<Vec<u8>> = input_data()
+            .into_iter()
+            .map(|k| LookupKey::new(k, 123).internal_key().to_vec())
+            .collect();
+        let input_ = input.iter().map(|k| k.as_slice()).collect();
+
+        let filter = fpol.create_filter(&input_);
+
+        filter
+    }
+
+    #[test]
+    fn test_filter_bloom() {
+        let f = create_filter();
+        let fp = BloomPolicy::new(_BITS_PER_KEY);
+
+        for k in input_data().iter() {
+            assert!(fp.key_may_match(k, &f));
+        }
+    }
+
+    /// This test verifies that InternalFilterPolicy works correctly.
+    #[test]
+    fn test_filter_internal_keys_identical() {
+        assert_eq!(create_filter(), create_internalkey_filter());
+    }
+
+    #[test]
+    fn test_filter_bloom_hash() {
+        let d1 = vec![0x62];
+        let d2 = vec![0xc3, 0x97];
+        let d3 = vec![0xe2, 0x99, 0xa5];
+        let d4 = vec![0xe1, 0x80, 0xb9, 0x32];
+
+        let fp = BloomPolicy::new_unwrapped(_BITS_PER_KEY);
+
+        assert_eq!(fp.bloom_hash(&d1), 0xef1345c4);
+        assert_eq!(fp.bloom_hash(&d2), 0x5b663814);
+        assert_eq!(fp.bloom_hash(&d3), 0x323c078f);
+        assert_eq!(fp.bloom_hash(&d4), 0xed21633a);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/filter_block.rs	Tue Jan 03 19:45:33 2017 +0100
@@ -0,0 +1,233 @@
+#![allow(dead_code)]
+
+//! parts of this module are not yet used (the reading part).
+
+use block::BlockContents;
+use filter::BoxedFilterPolicy;
+
+use std::rc::Rc;
+
+use integer_encoding::FixedInt;
+
+const FILTER_BASE_LOG2: u32 = 11;
+
+#[allow(dead_code)]
+const FILTER_BASE: u32 = 1 << FILTER_BASE_LOG2; // 2 KiB
+
+/// For a given byte offset, returns the index of the filter that includes the key at that offset.
+#[inline]
+fn get_filter_index(offset: usize, base_lg2: u32) -> u32 {
+    // divide by 2048
+    (offset >> base_lg2 as usize) as u32
+}
+
+/// A Filter Block is built like this:
+///
+/// [filter0, filter1, filter2, ..., offset of filter0, offset of filter1, ..., offset of offsets
+/// array, log2 of FILTER_BASE]
+///
+/// where offsets are 4 bytes, offset of offsets is 4 bytes, and log2 of FILTER_BASE is 1 byte.
+/// Two consecutive filter offsets may be the same.
+pub struct FilterBlockBuilder<'a> {
+    policy: BoxedFilterPolicy,
+    // filters, concatenated
+    filters: Vec<u8>,
+    filter_offsets: Vec<usize>,
+
+    // Reset on every start_block()
+    keys: Vec<&'a [u8]>,
+}
+
+impl<'a> FilterBlockBuilder<'a> {
+    pub fn new(fp: BoxedFilterPolicy) -> FilterBlockBuilder<'a> {
+        FilterBlockBuilder {
+            policy: fp,
+            // some pre-allocation
+            filters: Vec::with_capacity(1024 * 40),
+            filter_offsets: Vec::with_capacity(1024),
+            keys: Vec::with_capacity(1024),
+        }
+    }
+
+    pub fn filter_name(&self) -> &'static str {
+        self.policy.name()
+    }
+
+    pub fn add_key(&mut self, key: &'a [u8]) {
+        self.keys.push(key);
+    }
+
+    pub fn start_block(&mut self, offset: usize) {
+        let filter_ix = get_filter_index(offset, FILTER_BASE_LOG2);
+        assert!(filter_ix >= self.filter_offsets.len() as u32);
+
+        while filter_ix > self.filter_offsets.len() as u32 {
+            self.generate_filter();
+        }
+    }
+
+    fn generate_filter(&mut self) {
+        self.filter_offsets.push(self.filters.len());
+
+        if self.keys.is_empty() {
+            return;
+        }
+
+        let filter = self.policy.create_filter(&self.keys);
+        self.filters.extend_from_slice(&filter);
+
+        self.keys.clear();
+    }
+
+    pub fn finish(mut self) -> Vec<u8> {
+        if !self.keys.is_empty() {
+            self.generate_filter();
+        }
+
+        let mut result = self.filters;
+        let offsets_offset = result.len();
+        let mut ix = result.len();
+
+        result.resize(ix + 4 * self.filter_offsets.len() + 5, 0);
+
+        // Put filter offsets at the end
+        for offset in self.filter_offsets.into_iter() {
+            (offset as u32).encode_fixed(&mut result[ix..ix + 4]);
+            ix += 4;
+        }
+
+        (offsets_offset as u32).encode_fixed(&mut result[ix..ix + 4]);
+        ix += 4;
+
+        result[ix] = FILTER_BASE_LOG2 as u8;
+
+        result
+    }
+}
+
+#[derive(Clone)]
+pub struct FilterBlockReader {
+    policy: BoxedFilterPolicy,
+    block: Rc<BlockContents>,
+
+    offsets_offset: usize,
+    filter_base_lg2: u32,
+}
+
+impl FilterBlockReader {
+    pub fn new_owned(pol: BoxedFilterPolicy, data: Vec<u8>) -> FilterBlockReader {
+        FilterBlockReader::new(pol, Rc::new(data))
+    }
+
+    pub fn new(pol: BoxedFilterPolicy, data: Rc<Vec<u8>>) -> FilterBlockReader {
+        assert!(data.len() >= 5);
+
+        let fbase = data[data.len() - 1] as u32;
+        let offset = u32::decode_fixed(&data[data.len() - 5..data.len() - 1]) as usize;
+
+        FilterBlockReader {
+            policy: pol,
+            block: data,
+            filter_base_lg2: fbase,
+            offsets_offset: offset,
+        }
+    }
+
+    /// Returns number of filters
+    pub fn num(&self) -> u32 {
+        ((self.block.len() - self.offsets_offset - 5) / 4) as u32
+    }
+
+    /// Returns the offset of the offset with index i.
+    fn offset_of(&self, i: u32) -> usize {
+        let offset_offset = self.offsets_offset + 4 * i as usize;
+        u32::decode_fixed(&self.block[offset_offset..offset_offset + 4]) as usize
+    }
+
+    /// blk_offset is the offset of the block containing key. Returns whether the key matches the
+    /// filter for the block at blk_offset.
+    pub fn key_may_match(&self, blk_offset: usize, key: &[u8]) -> bool {
+        if get_filter_index(blk_offset, self.filter_base_lg2) > self.num() {
+            return true;
+        }
+
+        let filter_begin = self.offset_of(get_filter_index(blk_offset, self.filter_base_lg2));
+        let filter_end = self.offset_of(get_filter_index(blk_offset, self.filter_base_lg2) + 1);
+
+        assert!(filter_begin < filter_end);
+        assert!(filter_end <= self.offsets_offset);
+
+        self.policy.key_may_match(key, &self.block[filter_begin..filter_end])
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use filter::BloomPolicy;
+    use super::*;
+    use super::FILTER_BASE_LOG2;
+    use super::get_filter_index;
+
+    #[test]
+    fn test_filter_index() {
+        assert_eq!(get_filter_index(3777, FILTER_BASE_LOG2), 1);
+        assert_eq!(get_filter_index(10000, FILTER_BASE_LOG2), 4);
+    }
+
+    fn get_keys() -> Vec<&'static [u8]> {
+        vec!["abcd".as_bytes(), "efgh".as_bytes(), "ijkl".as_bytes(), "mnopqrstuvwxyz".as_bytes()]
+    }
+
+    fn produce_filter_block() -> Vec<u8> {
+        let keys = get_keys();
+        let mut bld = FilterBlockBuilder::new(BloomPolicy::new(32));
+
+        bld.start_block(0);
+
+        for k in keys.iter() {
+            bld.add_key(k);
+        }
+
+        // second block
+        bld.start_block(5000);
+
+        for k in keys.iter() {
+            bld.add_key(k);
+        }
+
+        bld.finish()
+    }
+
+    #[test]
+    fn test_filter_block_builder() {
+        let result = produce_filter_block();
+        // 2 blocks of 4 filters of 4 bytes plus 1B for `k`; plus three filter offsets (because of
+        //   the block offsets of 0 and 5000); plus footer
+        assert_eq!(result.len(), 2 * (get_keys().len() * 4 + 1) + (3 * 4) + 5);
+        assert_eq!(result,
+                   vec![234, 195, 25, 155, 61, 141, 173, 140, 221, 28, 222, 92, 220, 112, 234,
+                        227, 22, 234, 195, 25, 155, 61, 141, 173, 140, 221, 28, 222, 92, 220,
+                        112, 234, 227, 22, 0, 0, 0, 0, 17, 0, 0, 0, 17, 0, 0, 0, 34, 0, 0, 0, 11]);
+    }
+
+    #[test]
+    fn test_filter_block_build_read() {
+        let result = produce_filter_block();
+        let reader = FilterBlockReader::new_owned(BloomPolicy::new(32), result);
+
+        assert_eq!(reader.offset_of(get_filter_index(5121, FILTER_BASE_LOG2)),
+                   17); // third block in third filter
+
+        let unknown_keys = vec!["xsb".as_bytes(), "9sad".as_bytes(), "assssaaaass".as_bytes()];
+
+        for block_offset in vec![0, 1024, 5000, 6025].into_iter() {
+            for key in get_keys().iter() {
+                assert!(reader.key_may_match(block_offset, key),
+                        format!("{} {:?} ", block_offset, key));
+            }
+            for key in unknown_keys.iter() {
+                assert!(!reader.key_may_match(block_offset, key));
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/key_types.rs	Tue Jan 03 19:45:33 2017 +0100
@@ -0,0 +1,89 @@
+use options::{CompressionType, int_to_compressiontype};
+use types::{ValueType, SequenceNumber};
+
+use integer_encoding::{FixedInt, VarInt};
+
+// The following typedefs are used to distinguish between the different key formats used internally
+// by different modules.
+
+// TODO: At some point, convert those into actual types with conversions between them. That's a lot
+// of boilerplate, but increases type safety.
+
+/// A UserKey is the actual key supplied by the calling application, without any internal
+/// decorations.
+pub type UserKey<'a> = &'a [u8];
+
+/// An InternalKey consists of [key, tag], and is used as item type for Table iterators.
+pub type InternalKey<'a> = &'a [u8];
+
+/// A LookupKey is the first part of a memtable key, consisting of [keylen: varint32, key: *u8,
+/// tag: u64]
+/// keylen is the length of key plus 8 (for the tag; this for LevelDB compatibility)
+#[derive(Clone, Debug)]
+pub struct LookupKey {
+    key: Vec<u8>,
+    key_offset: usize,
+}
+
+impl LookupKey {
+    #[allow(unused_assignments)]
+    pub fn new(k: &[u8], s: SequenceNumber) -> LookupKey {
+        let mut key = Vec::with_capacity(k.len() + k.len().required_space() +
+                                         <u64 as FixedInt>::required_space());
+        let internal_keylen = k.len() + 8;
+        let mut i = 0;
+
+        key.reserve(internal_keylen.required_space() + internal_keylen);
+
+        key.resize(internal_keylen.required_space(), 0);
+        i += internal_keylen.encode_var(&mut key[i..]);
+
+        key.extend_from_slice(k);
+        i += k.len();
+
+        key.resize(i + <u64 as FixedInt>::required_space(), 0);
+        (s << 8 | ValueType::TypeValue as u64).encode_fixed(&mut key[i..]);
+        i += <u64 as FixedInt>::required_space();
+
+        LookupKey {
+            key: key,
+            key_offset: k.len().required_space(),
+        }
+    }
+
+    // Returns only key
+    #[allow(dead_code)]
+    pub fn user_key<'a>(&'a self) -> UserKey<'a> {
+        &self.key[self.key_offset..self.key.len() - 8]
+    }
+
+    // Returns key+tag
+    pub fn internal_key<'a>(&'a self) -> InternalKey<'a> {
+        &self.key[self.key_offset..]
+    }
+}
+
+/// Parses a tag into (type, sequence number)
+pub fn parse_tag(tag: u64) -> (ValueType, u64) {
+    let seq = tag >> 8;
+    let typ = tag & 0xff;
+
+    match typ {
+        0 => (ValueType::TypeDeletion, seq),
+        1 => (ValueType::TypeValue, seq),
+        _ => (ValueType::TypeValue, seq),
+    }
+}
+
+/// Parse a key in InternalKey format.
+pub fn parse_internal_key<'a>(ikey: InternalKey<'a>) -> (CompressionType, u64, UserKey<'a>) {
+    assert!(ikey.len() >= 8);
+
+    let (ctype, seq) = parse_tag(FixedInt::decode_fixed(&ikey[ikey.len() - 8..]));
+    let ctype = int_to_compressiontype(ctype as u32).unwrap_or(CompressionType::CompressionNone);
+
+    return (ctype, seq, &ikey[0..ikey.len() - 8]);
+}
+
+#[cfg(test)]
+mod tests {}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/types.rs	Tue Jan 03 19:45:33 2017 +0100
@@ -0,0 +1,86 @@
+#![allow(dead_code)]
+
+//! A collection of fundamental and/or simple types used by other modules
+
+use std::error::Error;
+use std::fmt::{self, Display, Formatter};
+use std::io;
+use std::result;
+
+#[derive(Debug, PartialOrd, PartialEq)]
+pub enum ValueType {
+    TypeDeletion = 0,
+    TypeValue = 1,
+}
+
+/// Represents a sequence number of a single entry.
+pub type SequenceNumber = u64;
+
+pub const MAX_SEQUENCE_NUMBER: SequenceNumber = (1 << 56) - 1;
+
+#[derive(Clone, Debug)]
+#[allow(dead_code)]
+pub enum Status {
+    OK,
+    NotFound(String),
+    Corruption(String),
+    NotSupported(String),
+    InvalidArgument(String),
+    PermissionDenied(String),
+    IOError(String),
+    Unknown(String),
+}
+
+impl Display for Status {
+    fn fmt(&self, fmt: &mut Formatter) -> result::Result<(), fmt::Error> {
+        fmt.write_str(self.description())
+    }
+}
+
+impl Error for Status {
+    fn description(&self) -> &str {
+        match *self {
+            Status::OK => "ok",
+            Status::NotFound(ref s) => s,
+            Status::Corruption(ref s) => s,
+            Status::NotSupported(ref s) => s,
+            Status::InvalidArgument(ref s) => s,
+            Status::PermissionDenied(ref s) => s,
+            Status::IOError(ref s) => s,
+            Status::Unknown(ref s) => s,
+        }
+    }
+}
+
+/// LevelDB's result type
+pub type Result<T> = result::Result<T, Status>;
+
+pub fn from_io_result<T>(e: io::Result<T>) -> Result<T> {
+    match e {
+        Ok(r) => result::Result::Ok(r),
+        Err(e) => {
+            let err = e.description().to_string();
+
+            let r = match e.kind() {
+                io::ErrorKind::NotFound => Err(Status::NotFound(err)),
+                io::ErrorKind::InvalidData => Err(Status::Corruption(err)),
+                io::ErrorKind::InvalidInput => Err(Status::InvalidArgument(err)),
+                io::ErrorKind::PermissionDenied => Err(Status::PermissionDenied(err)),
+                _ => Err(Status::IOError(err)),
+            };
+
+            r
+        }
+    }
+}
+
+/// Describes a file on disk.
+#[derive(Clone, Debug, PartialEq)]
+pub struct FileMetaData {
+    pub allowed_seeks: isize,
+    pub num: u64,
+    pub size: u64,
+    // these are in InternalKey format:
+    pub smallest: Vec<u8>,
+    pub largest: Vec<u8>,
+}