Mercurial > lbo > hg > leveldb-rs
changeset 93:db84cb244062
Implement "strongly" typed keys.
LevelDB uses different key formats in different modules; by introducing
typedefs, we achieve at least better documentation about which key types are
used where.
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Sun, 11 Sep 2016 10:37:19 +0200 |
parents | 05da3cfbbcbc |
children | b09acb0e365e 16043131cbd5 |
files | src/key_types.rs src/lib.rs src/memtable.rs src/table_builder.rs |
diffstat | 4 files changed, 175 insertions(+), 148 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/key_types.rs Sun Sep 11 10:37:19 2016 +0200 @@ -0,0 +1,164 @@ + +use types::{ValueType, SequenceNumber}; + +use integer_encoding::{FixedInt, VarInt}; + +// The following typedefs are used to distinguish between the different key formats used internally +// by different modules. + +/// A MemtableKey consists of the following elements: [keylen, key, tag, (vallen, value)] where keylen is a varint32 +/// encoding the length of key+tag. tag is a fixed 8 bytes segment encoding the entry type and the +/// sequence number. vallen and value are optional components at the end. +pub type MemtableKey<'a> = &'a [u8]; + +/// A UserKey is the actual key supplied by the calling application, without any internal +/// decorations. +pub type UserKey<'a> = &'a [u8]; + +/// An InternalKey consists of [key, tag], so it's basically a MemtableKey without the initial +/// length specification. This type is used as item type of MemtableIterator, and as the key +/// type of tables. +pub type InternalKey<'a> = &'a [u8]; + +/// A LookupKey is the first part of a memtable key, consisting of [keylen: varint32, key: *u8, +/// tag: u64] +/// keylen is the length of key plus 8 (for the tag; this for LevelDB compatibility) +pub struct LookupKey { + key: Vec<u8>, + key_offset: usize, +} + +impl LookupKey { + #[allow(unused_assignments)] + pub fn new(k: &[u8], s: SequenceNumber) -> LookupKey { + let mut key = Vec::with_capacity(k.len() + k.len().required_space() + + <u64 as FixedInt>::required_space()); + let internal_keylen = k.len() + 8; + let mut i = 0; + + key.reserve(internal_keylen.required_space() + internal_keylen); + + key.resize(internal_keylen.required_space(), 0); + i += internal_keylen.encode_var(&mut key[i..]); + + key.extend_from_slice(k); + i += k.len(); + + key.resize(i + <u64 as FixedInt>::required_space(), 0); + (s << 8 | ValueType::TypeValue as u64).encode_fixed(&mut key[i..]); + i += <u64 as FixedInt>::required_space(); + + LookupKey { + key: key, + key_offset: k.len().required_space(), + } + } + + // Returns full key + pub fn memtable_key<'a>(&'a self) -> MemtableKey<'a> { + self.key.as_slice() + } + + // Returns only key + pub fn user_key<'a>(&'a self) -> UserKey<'a> { + &self.key[self.key_offset..self.key.len() - 8] + } + + // Returns key+tag + pub fn internal_key<'a>(&'a self) -> InternalKey<'a> { + &self.key[self.key_offset..] + } +} + +/// Parses a tag into (type, sequence number) +pub fn parse_tag(tag: u64) -> (u8, u64) { + let seq = tag >> 8; + let typ = tag & 0xff; + (typ as u8, seq) +} + +/// A memtable key is a bytestring containing (keylen, key, tag, vallen, val). This function +/// builds such a key. It's called key because the underlying Map implementation will only be +/// concerned with keys; the value field is not used (instead, the value is encoded in the key, +/// and for lookups we just search for the next bigger entry). +/// keylen is the length of key + 8 (to account for the tag) +pub fn build_memtable_key(key: &[u8], value: &[u8], t: ValueType, seq: SequenceNumber) -> Vec<u8> { + // We are using the original LevelDB approach here -- encoding key and value into the + // key that is used for insertion into the SkipMap. + // The format is: [key_size: varint32, key_data: [u8], flags: u64, value_size: varint32, + // value_data: [u8]] + + let mut i = 0; + let keysize = key.len() + 8; + let valsize = value.len(); + + let mut buf = Vec::with_capacity(keysize + valsize + keysize.required_space() + + valsize.required_space() + + <u64 as FixedInt>::required_space()); + buf.resize(keysize.required_space(), 0); + i += keysize.encode_var(&mut buf[i..]); + + buf.extend(key.iter()); + i += key.len(); + + let flag = (t as u64) | (seq << 8); + buf.resize(i + <u64 as FixedInt>::required_space(), 0); + flag.encode_fixed(&mut buf[i..]); + i += <u64 as FixedInt>::required_space(); + + buf.resize(i + valsize.required_space(), 0); + i += valsize.encode_var(&mut buf[i..]); + + buf.extend(value.iter()); + i += value.len(); + + assert_eq!(i, buf.len()); + buf +} + +/// Parses a memtable key and returns (keylen, key offset, tag, vallen, val offset). +/// If the key only contains (keylen, key, tag), the vallen and val offset return values will be +/// meaningless. +pub fn parse_memtable_key<'a>(mkey: MemtableKey<'a>) -> (usize, usize, u64, usize, usize) { + let (keylen, mut i): (usize, usize) = VarInt::decode_var(&mkey); + + let keyoff = i; + i += keylen - 8; + + if mkey.len() > i { + let tag = FixedInt::decode_fixed(&mkey[i..i + 8]); + i += 8; + + let (vallen, j): (usize, usize) = VarInt::decode_var(&mkey[i..]); + + i += j; + + let valoff = i; + + return (keylen - 8, keyoff, tag, vallen, valoff); + } else { + return (keylen - 8, keyoff, 0, 0, 0); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_memtable_lookupkey() { + use integer_encoding::VarInt; + + let lk1 = LookupKey::new("abcde".as_bytes(), 123); + let lk2 = LookupKey::new("xyabxy".as_bytes(), 97); + + // Assert correct allocation strategy + assert_eq!(lk1.key.len(), 14); + assert_eq!(lk1.key.capacity(), 14); + + assert_eq!(lk1.user_key(), "abcde".as_bytes()); + assert_eq!(u32::decode_var(lk1.memtable_key()), (13, 1)); + assert_eq!(lk2.internal_key(), + vec![120, 121, 97, 98, 120, 121, 1, 97, 0, 0, 0, 0, 0, 0].as_slice()); + } +}
--- a/src/lib.rs Sat Sep 03 21:26:35 2016 +0200 +++ b/src/lib.rs Sun Sep 11 10:37:19 2016 +0200 @@ -12,6 +12,7 @@ mod env; mod filter; mod filter_block; +mod key_types; mod log; mod memtable; mod merging_iter;
--- a/src/memtable.rs Sat Sep 03 21:26:35 2016 +0200 +++ b/src/memtable.rs Sun Sep 11 10:37:19 2016 +0200 @@ -1,10 +1,10 @@ use std::cmp::Ordering; + +use key_types::{LookupKey, UserKey, InternalKey, parse_memtable_key, build_memtable_key, parse_tag}; use types::{Comparator, StandardComparator}; use types::{ValueType, SequenceNumber, Status, LdbIterator}; use skipmap::{SkipMap, SkipMapIter}; -use integer_encoding::{FixedInt, VarInt}; - /// An internal comparator wrapping a user-supplied comparator. This comparator is used to compare /// memtable keys, which contain length prefixes and a sequence number. /// The ordering is determined by asking the wrapped comparator; ties are broken by *reverse* @@ -39,128 +39,6 @@ } } -/// A LookupKey is the first part of a memtable key, consisting of [keylen: varint32, key: *u8, -/// tag: u64] -/// keylen is the length of key plus 8 (for the tag; this for LevelDB compatibility) -pub struct LookupKey { - key: Vec<u8>, - key_offset: usize, -} - -impl LookupKey { - #[allow(unused_assignments)] - fn new(k: &[u8], s: SequenceNumber) -> LookupKey { - let mut key = Vec::with_capacity(k.len() + k.len().required_space() + - <u64 as FixedInt>::required_space()); - let internal_keylen = k.len() + 8; - let mut i = 0; - - key.reserve(internal_keylen.required_space() + internal_keylen); - - key.resize(internal_keylen.required_space(), 0); - i += internal_keylen.encode_var(&mut key[i..]); - - key.extend_from_slice(k); - i += k.len(); - - key.resize(i + <u64 as FixedInt>::required_space(), 0); - (s << 8 | ValueType::TypeValue as u64).encode_fixed(&mut key[i..]); - i += <u64 as FixedInt>::required_space(); - - LookupKey { - key: key, - key_offset: k.len().required_space(), - } - } - - // Returns full key - fn memtable_key<'a>(&'a self) -> &'a [u8] { - self.key.as_slice() - } - - // Returns only key - fn user_key<'a>(&'a self) -> &'a [u8] { - &self.key[self.key_offset..self.key.len() - 8] - } - - // Returns key+tag - fn internal_key<'a>(&'a self) -> &'a [u8] { - &self.key[self.key_offset..] - } -} - -/// Parses a tag into (type, sequence number) -fn parse_tag(tag: u64) -> (u8, u64) { - let seq = tag >> 8; - let typ = tag & 0xff; - (typ as u8, seq) -} - -/// A memtable key is a bytestring containing (keylen, key, tag, vallen, val). This function -/// builds such a key. It's called key because the underlying Map implementation will only be -/// concerned with keys; the value field is not used (instead, the value is encoded in the key, -/// and for lookups we just search for the next bigger entry). -/// keylen is the length of key + 8 (to account for the tag) -fn build_memtable_key(key: &[u8], value: &[u8], t: ValueType, seq: SequenceNumber) -> Vec<u8> { - // We are using the original LevelDB approach here -- encoding key and value into the - // key that is used for insertion into the SkipMap. - // The format is: [key_size: varint32, key_data: [u8], flags: u64, value_size: varint32, - // value_data: [u8]] - - let mut i = 0; - let keysize = key.len() + 8; - let valsize = value.len(); - - let mut buf = Vec::with_capacity(keysize + valsize + keysize.required_space() + - valsize.required_space() + - <u64 as FixedInt>::required_space()); - buf.resize(keysize.required_space(), 0); - i += keysize.encode_var(&mut buf[i..]); - - buf.extend(key.iter()); - i += key.len(); - - let flag = (t as u64) | (seq << 8); - buf.resize(i + <u64 as FixedInt>::required_space(), 0); - flag.encode_fixed(&mut buf[i..]); - i += <u64 as FixedInt>::required_space(); - - buf.resize(i + valsize.required_space(), 0); - i += valsize.encode_var(&mut buf[i..]); - - buf.extend(value.iter()); - i += value.len(); - - assert_eq!(i, buf.len()); - buf -} - -/// Parses a memtable key and returns (keylen, key offset, tag, vallen, val offset). -/// If the key only contains (keylen, key, tag), the vallen and val offset return values will be -/// meaningless. -fn parse_memtable_key(mkey: &[u8]) -> (usize, usize, u64, usize, usize) { - let (keylen, mut i): (usize, usize) = VarInt::decode_var(&mkey); - - let keyoff = i; - i += keylen - 8; - - if mkey.len() > i { - let tag = FixedInt::decode_fixed(&mkey[i..i + 8]); - i += 8; - - let (vallen, j): (usize, usize) = VarInt::decode_var(&mkey[i..]); - - i += j; - - let valoff = i; - - return (keylen - 8, keyoff, tag, vallen, valoff); - } else { - return (keylen - 8, keyoff, 0, 0, 0); - } -} - - /// Provides Insert/Get/Iterate, based on the SkipMap implementation. pub struct MemTable<C: Comparator> { map: SkipMap<MemtableKeyComparator<C>>, @@ -184,7 +62,7 @@ self.map.approx_memory() } - pub fn add(&mut self, seq: SequenceNumber, t: ValueType, key: &[u8], value: &[u8]) { + pub fn add<'a>(&mut self, seq: SequenceNumber, t: ValueType, key: UserKey<'a>, value: &[u8]) { self.map.insert(build_memtable_key(key, value, t, seq), Vec::new()) } @@ -226,7 +104,7 @@ } impl<'a, C: 'a + Comparator> Iterator for MemtableIterator<'a, C> { - type Item = (&'a [u8], &'a [u8]); + type Item = (InternalKey<'a>, &'a [u8]); fn next(&mut self) -> Option<Self::Item> { loop { @@ -296,7 +174,7 @@ #[allow(unused_variables)] mod tests { use super::*; - use super::{parse_memtable_key, parse_tag}; + use key_types::*; use types::*; fn get_memtable() -> MemTable<StandardComparator> { @@ -320,23 +198,6 @@ } #[test] - fn test_memtable_lookupkey() { - use integer_encoding::VarInt; - - let lk1 = LookupKey::new("abcde".as_bytes(), 123); - let lk2 = LookupKey::new("xyabxy".as_bytes(), 97); - - // Assert correct allocation strategy - assert_eq!(lk1.key.len(), 14); - assert_eq!(lk1.key.capacity(), 14); - - assert_eq!(lk1.user_key(), "abcde".as_bytes()); - assert_eq!(u32::decode_var(lk1.memtable_key()), (13, 1)); - assert_eq!(lk2.internal_key(), - vec![120, 121, 97, 98, 120, 121, 1, 97, 0, 0, 0, 0, 0, 0].as_slice()); - } - - #[test] fn test_memtable_add() { let mut mt = MemTable::new(); mt.add(123,
--- a/src/table_builder.rs Sat Sep 03 21:26:35 2016 +0200 +++ b/src/table_builder.rs Sun Sep 11 10:37:19 2016 +0200 @@ -1,9 +1,10 @@ use block::{BlockBuilder, BlockContents}; +use blockhandle::BlockHandle; use filter::{FilterPolicy, NoFilterPolicy}; use filter_block::FilterBlockBuilder; +use key_types::InternalKey; use options::{CompressionType, Options}; use types::Comparator; -use blockhandle::BlockHandle; use std::io::Write; use std::cmp::Ordering; @@ -17,7 +18,7 @@ pub const MAGIC_FOOTER_NUMBER: u64 = 0xdb4775248b80fb57; pub const MAGIC_FOOTER_ENCODED: [u8; 8] = [0x57, 0xfb, 0x80, 0x8b, 0x24, 0x75, 0x47, 0xdb]; -fn find_shortest_sep<C: Comparator>(c: &C, lo: &[u8], hi: &[u8]) -> Vec<u8> { +fn find_shortest_sep<'a, C: Comparator>(c: &C, lo: InternalKey<'a>, hi: InternalKey<'a>) -> Vec<u8> { let min; if lo.len() < hi.len() { @@ -151,7 +152,7 @@ self.num_entries } - pub fn add(&mut self, key: &'a [u8], val: &[u8]) { + pub fn add(&mut self, key: InternalKey<'a>, val: &[u8]) { assert!(self.data_block.is_some()); assert!(self.num_entries == 0 || self.cmp.cmp(&self.prev_block_last_key, key) == Ordering::Less); @@ -173,7 +174,7 @@ /// Writes an index entry for the current data_block where `next_key` is the first key of the /// next block. - fn write_data_block(&mut self, next_key: &[u8]) { + fn write_data_block<'b>(&mut self, next_key: InternalKey<'b>) { assert!(self.data_block.is_some()); let block = self.data_block.take().unwrap();