Mercurial > lbo > hg > sstable
changeset 28:8fe35754a39d
Add missing files (forgot to track them before)
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Tue, 03 Jan 2017 19:45:33 +0100 |
parents | d380295dba2f |
children | cf6c50c9a5e7 |
files | src/cmp.rs src/filter.rs src/filter_block.rs src/key_types.rs src/types.rs |
diffstat | 5 files changed, 852 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cmp.rs Tue Jan 03 19:45:33 2017 +0100 @@ -0,0 +1,161 @@ +use key_types::{self, LookupKey}; +use types; + +use std::cmp::Ordering; +use std::sync::Arc; + +/// Comparator trait, supporting types that can be nested (i.e., add additional functionality on +/// top of an inner comparator) +pub trait Cmp { + fn cmp(&self, &[u8], &[u8]) -> Ordering; + fn find_shortest_sep(&self, &[u8], &[u8]) -> Vec<u8>; + fn find_short_succ(&self, &[u8]) -> Vec<u8>; +} + +/// Lexical comparator. +#[derive(Clone)] +pub struct DefaultCmp; + +impl Cmp for DefaultCmp { + fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering { + a.cmp(b) + } + + fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec<u8> { + if a == b { + return a.to_vec(); + } + + let min = if a.len() < b.len() { a.len() } else { b.len() }; + let mut diff_at = 0; + + while diff_at < min && a[diff_at] == b[diff_at] { + diff_at += 1; + } + + while diff_at < min { + let diff = a[diff_at]; + if diff < 0xff && diff + 1 < b[diff_at] { + let mut sep = Vec::from(&a[0..diff_at + 1]); + sep[diff_at] += 1; + assert!(self.cmp(&sep, b) == Ordering::Less); + return sep; + } + + diff_at += 1; + } + return a.to_vec(); + } + + fn find_short_succ(&self, a: &[u8]) -> Vec<u8> { + let mut result = a.to_vec(); + for i in 0..a.len() { + if a[i] != 0xff { + result[i] += 1; + result.resize(i + 1, 0); + return result; + } + } + // Rare path + result.push(255); + return result; + } +} + +/// Same as memtable_key_cmp, but for InternalKeys. +#[derive(Clone)] +pub struct InternalKeyCmp(pub Arc<Box<Cmp>>); + +impl Cmp for InternalKeyCmp { + fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering { + let (_, seqa, keya) = key_types::parse_internal_key(a); + let (_, seqb, keyb) = key_types::parse_internal_key(b); + + match self.0.cmp(keya, keyb) { + Ordering::Less => Ordering::Less, + Ordering::Greater => Ordering::Greater, + // reverse comparison! + Ordering::Equal => seqb.cmp(&seqa), + } + } + + fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec<u8> { + let (_, seqa, keya) = key_types::parse_internal_key(a); + let (_, _, keyb) = key_types::parse_internal_key(b); + + let sep: Vec<u8> = self.0.find_shortest_sep(keya, keyb); + + if sep.len() < keya.len() && self.0.cmp(keya, &sep) == Ordering::Less { + return LookupKey::new(&sep, types::MAX_SEQUENCE_NUMBER).internal_key().to_vec(); + } + + return LookupKey::new(&sep, seqa).internal_key().to_vec(); + } + + fn find_short_succ(&self, a: &[u8]) -> Vec<u8> { + let (_, seq, key) = key_types::parse_internal_key(a); + let succ: Vec<u8> = self.0.find_short_succ(key); + return LookupKey::new(&succ, seq).internal_key().to_vec(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use key_types::LookupKey; + use types; + + use std::sync::Arc; + + #[test] + fn test_cmp_defaultcmp_shortest_sep() { + assert_eq!(DefaultCmp.find_shortest_sep("abcd".as_bytes(), "abcf".as_bytes()), + "abce".as_bytes()); + assert_eq!(DefaultCmp.find_shortest_sep("abc".as_bytes(), "acd".as_bytes()), + "abc".as_bytes()); + assert_eq!(DefaultCmp.find_shortest_sep("abcdefghi".as_bytes(), "abcffghi".as_bytes()), + "abce".as_bytes()); + assert_eq!(DefaultCmp.find_shortest_sep("a".as_bytes(), "a".as_bytes()), + "a".as_bytes()); + assert_eq!(DefaultCmp.find_shortest_sep("a".as_bytes(), "b".as_bytes()), + "a".as_bytes()); + assert_eq!(DefaultCmp.find_shortest_sep("abc".as_bytes(), "zzz".as_bytes()), + "b".as_bytes()); + assert_eq!(DefaultCmp.find_shortest_sep("".as_bytes(), "".as_bytes()), + "".as_bytes()); + } + + #[test] + fn test_cmp_defaultcmp_short_succ() { + assert_eq!(DefaultCmp.find_short_succ("abcd".as_bytes()), + "b".as_bytes()); + assert_eq!(DefaultCmp.find_short_succ("zzzz".as_bytes()), + "{".as_bytes()); + assert_eq!(DefaultCmp.find_short_succ(&[]), &[0xff]); + assert_eq!(DefaultCmp.find_short_succ(&[0xff, 0xff, 0xff]), + &[0xff, 0xff, 0xff, 0xff]); + } + + #[test] + fn test_cmp_internalkeycmp_shortest_sep() { + let cmp = InternalKeyCmp(Arc::new(Box::new(DefaultCmp))); + assert_eq!(cmp.find_shortest_sep(LookupKey::new("abcd".as_bytes(), 1).internal_key(), + LookupKey::new("abcf".as_bytes(), 2).internal_key()), + LookupKey::new("abce".as_bytes(), 1).internal_key()); + assert_eq!(cmp.find_shortest_sep(LookupKey::new("abc".as_bytes(), 1).internal_key(), + LookupKey::new("zzz".as_bytes(), 2).internal_key()), + LookupKey::new("b".as_bytes(), types::MAX_SEQUENCE_NUMBER).internal_key()); + assert_eq!(cmp.find_shortest_sep(LookupKey::new("abc".as_bytes(), 1).internal_key(), + LookupKey::new("acd".as_bytes(), 2).internal_key()), + LookupKey::new("abc".as_bytes(), 1).internal_key()); + assert_eq!(cmp.find_shortest_sep(LookupKey::new("abc".as_bytes(), 1).internal_key(), + LookupKey::new("abe".as_bytes(), 2).internal_key()), + LookupKey::new("abd".as_bytes(), 1).internal_key()); + assert_eq!(cmp.find_shortest_sep(LookupKey::new("".as_bytes(), 1).internal_key(), + LookupKey::new("".as_bytes(), 2).internal_key()), + LookupKey::new("".as_bytes(), 1).internal_key()); + assert_eq!(cmp.find_shortest_sep(LookupKey::new("abc".as_bytes(), 2).internal_key(), + LookupKey::new("abc".as_bytes(), 2).internal_key()), + LookupKey::new("abc".as_bytes(), 2).internal_key()); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filter.rs Tue Jan 03 19:45:33 2017 +0100 @@ -0,0 +1,283 @@ +#![allow(dead_code)] + +//! This module is yet to be used, but will speed up table lookups. +//! + +use std::sync::Arc; + +use integer_encoding::FixedInt; + +/// Encapsulates a filter algorithm allowing to search for keys more efficiently. +/// Usually, policies are used as a BoxedFilterPolicy (see below), so they +/// can be easily cloned and nested. +pub trait FilterPolicy { + /// Returns a string identifying this policy. + fn name(&self) -> &'static str; + /// Create a filter matching the given keys. + fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8>; + /// Check whether the given key may match the filter. + fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool; +} + +/// A boxed and refcounted filter policy (reference-counted because a Box with unsized content +/// couldn't be cloned otherwise) +pub type BoxedFilterPolicy = Arc<Box<FilterPolicy>>; + +/// Used for tables that don't have filter blocks but need a type parameter. +#[derive(Clone)] +pub struct NoFilterPolicy; + +impl NoFilterPolicy { + pub fn new() -> BoxedFilterPolicy { + Arc::new(Box::new(NoFilterPolicy)) + } +} + +impl FilterPolicy for NoFilterPolicy { + fn name(&self) -> &'static str { + "_" + } + fn create_filter(&self, _: &Vec<&[u8]>) -> Vec<u8> { + vec![] + } + fn key_may_match(&self, _: &[u8], _: &[u8]) -> bool { + true + } +} + +const BLOOM_SEED: u32 = 0xbc9f1d34; + +/// A filter policy using a bloom filter internally. +#[derive(Clone)] +pub struct BloomPolicy { + bits_per_key: u32, + k: u32, +} + +/// Beware the magic numbers... +impl BloomPolicy { + /// Returns a new boxed BloomPolicy. + pub fn new(bits_per_key: u32) -> BoxedFilterPolicy { + Arc::new(Box::new(BloomPolicy::new_unwrapped(bits_per_key))) + } + + /// Returns a new BloomPolicy with the given parameter. + fn new_unwrapped(bits_per_key: u32) -> BloomPolicy { + let mut k = (bits_per_key as f32 * 0.69) as u32; + + if k < 1 { + k = 1; + } else if k > 30 { + k = 30; + } + + BloomPolicy { + bits_per_key: bits_per_key, + k: k, + } + } + + fn bloom_hash(&self, data: &[u8]) -> u32 { + let m: u32 = 0xc6a4a793; + let r: u32 = 24; + + let mut ix = 0; + let limit = data.len(); + + let mut h: u32 = BLOOM_SEED ^ (limit as u64 * m as u64) as u32; + + while ix + 4 <= limit { + let w = u32::decode_fixed(&data[ix..ix + 4]); + ix += 4; + + h = (h as u64 + w as u64) as u32; + h = (h as u64 * m as u64) as u32; + h ^= h >> 16; + } + + // Process left-over bytes + assert!(limit - ix < 4); + + if limit - ix > 0 { + let mut i = 0; + + for b in data[ix..].iter() { + h += (*b as u32) << (8 * i); + i += 1; + } + + h = (h as u64 * m as u64) as u32; + h ^= h >> r; + } + h + } +} + +impl FilterPolicy for BloomPolicy { + fn name(&self) -> &'static str { + "leveldb.BuiltinBloomFilter2" + } + fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8> { + let filter_bits = keys.len() * self.bits_per_key as usize; + let mut filter = Vec::new(); + + if filter_bits < 64 { + // Preallocate, then resize + filter.reserve(8 + 1); + filter.resize(8, 0 as u8); + } else { + // Preallocate, then resize + filter.reserve(1 + ((filter_bits + 7) / 8)); + filter.resize((filter_bits + 7) / 8, 0); + } + + let adj_filter_bits = (filter.len() * 8) as u32; + + // Encode k at the end of the filter. + filter.push(self.k as u8); + + for key in keys { + let mut h = self.bloom_hash(key); + let delta = (h >> 17) | (h << 15); + + for _ in 0..self.k { + let bitpos = (h % adj_filter_bits) as usize; + filter[bitpos / 8] |= 1 << (bitpos % 8); + h = (h as u64 + delta as u64) as u32; + } + } + + filter + } + fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool { + if filter.len() == 0 { + return true; + } + + let bits = (filter.len() - 1) as u32 * 8; + let k = filter[filter.len() - 1]; + let filter_adj = &filter[0..filter.len() - 1]; + + if k > 30 { + return true; + } + + let mut h = self.bloom_hash(key); + let delta = (h >> 17) | (h << 15); + for _ in 0..k { + let bitpos = (h % bits) as usize; + if (filter_adj[bitpos / 8] & (1 << (bitpos % 8))) == 0 { + return false; + } + h = (h as u64 + delta as u64) as u32; + } + true + } +} + +/// A filter policy wrapping another policy; extracting the user key from internal keys for all +/// operations. +/// A User Key is u8*. +/// An Internal Key is u8* u8{8} (where the second part encodes a tag and a sequence number). +#[derive(Clone)] +pub struct InternalFilterPolicy { + internal: BoxedFilterPolicy, +} + +impl InternalFilterPolicy { + pub fn new(inner: BoxedFilterPolicy) -> BoxedFilterPolicy { + Arc::new(Box::new(InternalFilterPolicy { internal: inner })) + } +} + +impl FilterPolicy for InternalFilterPolicy { + fn name(&self) -> &'static str { + self.internal.name() + } + + fn create_filter(&self, keys: &Vec<&[u8]>) -> Vec<u8> { + let mut mod_keys = keys.clone(); + let mut i = 0; + + for key in keys { + mod_keys[i] = &key[0..key.len() - 8]; + i += 1; + } + self.internal.create_filter(&mod_keys) + } + + fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool { + self.internal.key_may_match(&key[0..key.len() - 8], filter) + } +} + + +#[cfg(test)] +mod tests { + use super::*; + use key_types::LookupKey; + + const _BITS_PER_KEY: u32 = 12; + + fn input_data() -> Vec<&'static [u8]> { + let data = vec!["abc123def456".as_bytes(), + "xxx111xxx222".as_bytes(), + "ab00cd00ab".as_bytes(), + "908070605040302010".as_bytes()]; + data + } + + /// Creates a filter using the keys from input_data(). + fn create_filter() -> Vec<u8> { + let fpol = BloomPolicy::new(_BITS_PER_KEY); + let filter = fpol.create_filter(&input_data()); + + assert_eq!(filter, vec![194, 148, 129, 140, 192, 196, 132, 164, 8]); + filter + } + + /// Creates a filter using the keys from input_data() but converted to InternalKey format. + fn create_internalkey_filter() -> Vec<u8> { + let fpol = InternalFilterPolicy::new(BloomPolicy::new(_BITS_PER_KEY)); + let input: Vec<Vec<u8>> = input_data() + .into_iter() + .map(|k| LookupKey::new(k, 123).internal_key().to_vec()) + .collect(); + let input_ = input.iter().map(|k| k.as_slice()).collect(); + + let filter = fpol.create_filter(&input_); + + filter + } + + #[test] + fn test_filter_bloom() { + let f = create_filter(); + let fp = BloomPolicy::new(_BITS_PER_KEY); + + for k in input_data().iter() { + assert!(fp.key_may_match(k, &f)); + } + } + + /// This test verifies that InternalFilterPolicy works correctly. + #[test] + fn test_filter_internal_keys_identical() { + assert_eq!(create_filter(), create_internalkey_filter()); + } + + #[test] + fn test_filter_bloom_hash() { + let d1 = vec![0x62]; + let d2 = vec![0xc3, 0x97]; + let d3 = vec![0xe2, 0x99, 0xa5]; + let d4 = vec![0xe1, 0x80, 0xb9, 0x32]; + + let fp = BloomPolicy::new_unwrapped(_BITS_PER_KEY); + + assert_eq!(fp.bloom_hash(&d1), 0xef1345c4); + assert_eq!(fp.bloom_hash(&d2), 0x5b663814); + assert_eq!(fp.bloom_hash(&d3), 0x323c078f); + assert_eq!(fp.bloom_hash(&d4), 0xed21633a); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/filter_block.rs Tue Jan 03 19:45:33 2017 +0100 @@ -0,0 +1,233 @@ +#![allow(dead_code)] + +//! parts of this module are not yet used (the reading part). + +use block::BlockContents; +use filter::BoxedFilterPolicy; + +use std::rc::Rc; + +use integer_encoding::FixedInt; + +const FILTER_BASE_LOG2: u32 = 11; + +#[allow(dead_code)] +const FILTER_BASE: u32 = 1 << FILTER_BASE_LOG2; // 2 KiB + +/// For a given byte offset, returns the index of the filter that includes the key at that offset. +#[inline] +fn get_filter_index(offset: usize, base_lg2: u32) -> u32 { + // divide by 2048 + (offset >> base_lg2 as usize) as u32 +} + +/// A Filter Block is built like this: +/// +/// [filter0, filter1, filter2, ..., offset of filter0, offset of filter1, ..., offset of offsets +/// array, log2 of FILTER_BASE] +/// +/// where offsets are 4 bytes, offset of offsets is 4 bytes, and log2 of FILTER_BASE is 1 byte. +/// Two consecutive filter offsets may be the same. +pub struct FilterBlockBuilder<'a> { + policy: BoxedFilterPolicy, + // filters, concatenated + filters: Vec<u8>, + filter_offsets: Vec<usize>, + + // Reset on every start_block() + keys: Vec<&'a [u8]>, +} + +impl<'a> FilterBlockBuilder<'a> { + pub fn new(fp: BoxedFilterPolicy) -> FilterBlockBuilder<'a> { + FilterBlockBuilder { + policy: fp, + // some pre-allocation + filters: Vec::with_capacity(1024 * 40), + filter_offsets: Vec::with_capacity(1024), + keys: Vec::with_capacity(1024), + } + } + + pub fn filter_name(&self) -> &'static str { + self.policy.name() + } + + pub fn add_key(&mut self, key: &'a [u8]) { + self.keys.push(key); + } + + pub fn start_block(&mut self, offset: usize) { + let filter_ix = get_filter_index(offset, FILTER_BASE_LOG2); + assert!(filter_ix >= self.filter_offsets.len() as u32); + + while filter_ix > self.filter_offsets.len() as u32 { + self.generate_filter(); + } + } + + fn generate_filter(&mut self) { + self.filter_offsets.push(self.filters.len()); + + if self.keys.is_empty() { + return; + } + + let filter = self.policy.create_filter(&self.keys); + self.filters.extend_from_slice(&filter); + + self.keys.clear(); + } + + pub fn finish(mut self) -> Vec<u8> { + if !self.keys.is_empty() { + self.generate_filter(); + } + + let mut result = self.filters; + let offsets_offset = result.len(); + let mut ix = result.len(); + + result.resize(ix + 4 * self.filter_offsets.len() + 5, 0); + + // Put filter offsets at the end + for offset in self.filter_offsets.into_iter() { + (offset as u32).encode_fixed(&mut result[ix..ix + 4]); + ix += 4; + } + + (offsets_offset as u32).encode_fixed(&mut result[ix..ix + 4]); + ix += 4; + + result[ix] = FILTER_BASE_LOG2 as u8; + + result + } +} + +#[derive(Clone)] +pub struct FilterBlockReader { + policy: BoxedFilterPolicy, + block: Rc<BlockContents>, + + offsets_offset: usize, + filter_base_lg2: u32, +} + +impl FilterBlockReader { + pub fn new_owned(pol: BoxedFilterPolicy, data: Vec<u8>) -> FilterBlockReader { + FilterBlockReader::new(pol, Rc::new(data)) + } + + pub fn new(pol: BoxedFilterPolicy, data: Rc<Vec<u8>>) -> FilterBlockReader { + assert!(data.len() >= 5); + + let fbase = data[data.len() - 1] as u32; + let offset = u32::decode_fixed(&data[data.len() - 5..data.len() - 1]) as usize; + + FilterBlockReader { + policy: pol, + block: data, + filter_base_lg2: fbase, + offsets_offset: offset, + } + } + + /// Returns number of filters + pub fn num(&self) -> u32 { + ((self.block.len() - self.offsets_offset - 5) / 4) as u32 + } + + /// Returns the offset of the offset with index i. + fn offset_of(&self, i: u32) -> usize { + let offset_offset = self.offsets_offset + 4 * i as usize; + u32::decode_fixed(&self.block[offset_offset..offset_offset + 4]) as usize + } + + /// blk_offset is the offset of the block containing key. Returns whether the key matches the + /// filter for the block at blk_offset. + pub fn key_may_match(&self, blk_offset: usize, key: &[u8]) -> bool { + if get_filter_index(blk_offset, self.filter_base_lg2) > self.num() { + return true; + } + + let filter_begin = self.offset_of(get_filter_index(blk_offset, self.filter_base_lg2)); + let filter_end = self.offset_of(get_filter_index(blk_offset, self.filter_base_lg2) + 1); + + assert!(filter_begin < filter_end); + assert!(filter_end <= self.offsets_offset); + + self.policy.key_may_match(key, &self.block[filter_begin..filter_end]) + } +} + +#[cfg(test)] +mod tests { + use filter::BloomPolicy; + use super::*; + use super::FILTER_BASE_LOG2; + use super::get_filter_index; + + #[test] + fn test_filter_index() { + assert_eq!(get_filter_index(3777, FILTER_BASE_LOG2), 1); + assert_eq!(get_filter_index(10000, FILTER_BASE_LOG2), 4); + } + + fn get_keys() -> Vec<&'static [u8]> { + vec!["abcd".as_bytes(), "efgh".as_bytes(), "ijkl".as_bytes(), "mnopqrstuvwxyz".as_bytes()] + } + + fn produce_filter_block() -> Vec<u8> { + let keys = get_keys(); + let mut bld = FilterBlockBuilder::new(BloomPolicy::new(32)); + + bld.start_block(0); + + for k in keys.iter() { + bld.add_key(k); + } + + // second block + bld.start_block(5000); + + for k in keys.iter() { + bld.add_key(k); + } + + bld.finish() + } + + #[test] + fn test_filter_block_builder() { + let result = produce_filter_block(); + // 2 blocks of 4 filters of 4 bytes plus 1B for `k`; plus three filter offsets (because of + // the block offsets of 0 and 5000); plus footer + assert_eq!(result.len(), 2 * (get_keys().len() * 4 + 1) + (3 * 4) + 5); + assert_eq!(result, + vec![234, 195, 25, 155, 61, 141, 173, 140, 221, 28, 222, 92, 220, 112, 234, + 227, 22, 234, 195, 25, 155, 61, 141, 173, 140, 221, 28, 222, 92, 220, + 112, 234, 227, 22, 0, 0, 0, 0, 17, 0, 0, 0, 17, 0, 0, 0, 34, 0, 0, 0, 11]); + } + + #[test] + fn test_filter_block_build_read() { + let result = produce_filter_block(); + let reader = FilterBlockReader::new_owned(BloomPolicy::new(32), result); + + assert_eq!(reader.offset_of(get_filter_index(5121, FILTER_BASE_LOG2)), + 17); // third block in third filter + + let unknown_keys = vec!["xsb".as_bytes(), "9sad".as_bytes(), "assssaaaass".as_bytes()]; + + for block_offset in vec![0, 1024, 5000, 6025].into_iter() { + for key in get_keys().iter() { + assert!(reader.key_may_match(block_offset, key), + format!("{} {:?} ", block_offset, key)); + } + for key in unknown_keys.iter() { + assert!(!reader.key_may_match(block_offset, key)); + } + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/key_types.rs Tue Jan 03 19:45:33 2017 +0100 @@ -0,0 +1,89 @@ +use options::{CompressionType, int_to_compressiontype}; +use types::{ValueType, SequenceNumber}; + +use integer_encoding::{FixedInt, VarInt}; + +// The following typedefs are used to distinguish between the different key formats used internally +// by different modules. + +// TODO: At some point, convert those into actual types with conversions between them. That's a lot +// of boilerplate, but increases type safety. + +/// A UserKey is the actual key supplied by the calling application, without any internal +/// decorations. +pub type UserKey<'a> = &'a [u8]; + +/// An InternalKey consists of [key, tag], and is used as item type for Table iterators. +pub type InternalKey<'a> = &'a [u8]; + +/// A LookupKey is the first part of a memtable key, consisting of [keylen: varint32, key: *u8, +/// tag: u64] +/// keylen is the length of key plus 8 (for the tag; this for LevelDB compatibility) +#[derive(Clone, Debug)] +pub struct LookupKey { + key: Vec<u8>, + key_offset: usize, +} + +impl LookupKey { + #[allow(unused_assignments)] + pub fn new(k: &[u8], s: SequenceNumber) -> LookupKey { + let mut key = Vec::with_capacity(k.len() + k.len().required_space() + + <u64 as FixedInt>::required_space()); + let internal_keylen = k.len() + 8; + let mut i = 0; + + key.reserve(internal_keylen.required_space() + internal_keylen); + + key.resize(internal_keylen.required_space(), 0); + i += internal_keylen.encode_var(&mut key[i..]); + + key.extend_from_slice(k); + i += k.len(); + + key.resize(i + <u64 as FixedInt>::required_space(), 0); + (s << 8 | ValueType::TypeValue as u64).encode_fixed(&mut key[i..]); + i += <u64 as FixedInt>::required_space(); + + LookupKey { + key: key, + key_offset: k.len().required_space(), + } + } + + // Returns only key + #[allow(dead_code)] + pub fn user_key<'a>(&'a self) -> UserKey<'a> { + &self.key[self.key_offset..self.key.len() - 8] + } + + // Returns key+tag + pub fn internal_key<'a>(&'a self) -> InternalKey<'a> { + &self.key[self.key_offset..] + } +} + +/// Parses a tag into (type, sequence number) +pub fn parse_tag(tag: u64) -> (ValueType, u64) { + let seq = tag >> 8; + let typ = tag & 0xff; + + match typ { + 0 => (ValueType::TypeDeletion, seq), + 1 => (ValueType::TypeValue, seq), + _ => (ValueType::TypeValue, seq), + } +} + +/// Parse a key in InternalKey format. +pub fn parse_internal_key<'a>(ikey: InternalKey<'a>) -> (CompressionType, u64, UserKey<'a>) { + assert!(ikey.len() >= 8); + + let (ctype, seq) = parse_tag(FixedInt::decode_fixed(&ikey[ikey.len() - 8..])); + let ctype = int_to_compressiontype(ctype as u32).unwrap_or(CompressionType::CompressionNone); + + return (ctype, seq, &ikey[0..ikey.len() - 8]); +} + +#[cfg(test)] +mod tests {}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/types.rs Tue Jan 03 19:45:33 2017 +0100 @@ -0,0 +1,86 @@ +#![allow(dead_code)] + +//! A collection of fundamental and/or simple types used by other modules + +use std::error::Error; +use std::fmt::{self, Display, Formatter}; +use std::io; +use std::result; + +#[derive(Debug, PartialOrd, PartialEq)] +pub enum ValueType { + TypeDeletion = 0, + TypeValue = 1, +} + +/// Represents a sequence number of a single entry. +pub type SequenceNumber = u64; + +pub const MAX_SEQUENCE_NUMBER: SequenceNumber = (1 << 56) - 1; + +#[derive(Clone, Debug)] +#[allow(dead_code)] +pub enum Status { + OK, + NotFound(String), + Corruption(String), + NotSupported(String), + InvalidArgument(String), + PermissionDenied(String), + IOError(String), + Unknown(String), +} + +impl Display for Status { + fn fmt(&self, fmt: &mut Formatter) -> result::Result<(), fmt::Error> { + fmt.write_str(self.description()) + } +} + +impl Error for Status { + fn description(&self) -> &str { + match *self { + Status::OK => "ok", + Status::NotFound(ref s) => s, + Status::Corruption(ref s) => s, + Status::NotSupported(ref s) => s, + Status::InvalidArgument(ref s) => s, + Status::PermissionDenied(ref s) => s, + Status::IOError(ref s) => s, + Status::Unknown(ref s) => s, + } + } +} + +/// LevelDB's result type +pub type Result<T> = result::Result<T, Status>; + +pub fn from_io_result<T>(e: io::Result<T>) -> Result<T> { + match e { + Ok(r) => result::Result::Ok(r), + Err(e) => { + let err = e.description().to_string(); + + let r = match e.kind() { + io::ErrorKind::NotFound => Err(Status::NotFound(err)), + io::ErrorKind::InvalidData => Err(Status::Corruption(err)), + io::ErrorKind::InvalidInput => Err(Status::InvalidArgument(err)), + io::ErrorKind::PermissionDenied => Err(Status::PermissionDenied(err)), + _ => Err(Status::IOError(err)), + }; + + r + } + } +} + +/// Describes a file on disk. +#[derive(Clone, Debug, PartialEq)] +pub struct FileMetaData { + pub allowed_seeks: isize, + pub num: u64, + pub size: u64, + // these are in InternalKey format: + pub smallest: Vec<u8>, + pub largest: Vec<u8>, +}