Mercurial > lbo > hg > sstable
changeset 33:4e7b9fcdbc88
Use a more efficient TableReader::get() implementation.
The old implementation was simply based on iterators and seeking; the new one
is "native" in that it implements the details itself, using the index blocks
for a rough seek, the filter blocks for skipping irrelevant blocks, and linear
search within a block for the key itself.
This is a code rebase from leveldb.
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Sun, 26 Feb 2017 09:28:00 +0100 |
parents | f4caa5af5b93 |
children | 5449b2eb7265 |
files | src/cmp.rs src/table_reader.rs |
diffstat | 2 files changed, 60 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/src/cmp.rs Tue Jan 03 20:05:45 2017 +0100 +++ b/src/cmp.rs Sun Feb 26 09:28:00 2017 +0100 @@ -3,9 +3,18 @@ /// Comparator trait, supporting types that can be nested (i.e., add additional functionality on /// top of an inner comparator) pub trait Cmp { + /// Compare to byte strings, bytewise. fn cmp(&self, &[u8], &[u8]) -> Ordering; + + /// Return the shortest byte string that compares "Greater" to the first argument and "Less" to + /// the second one. fn find_shortest_sep(&self, &[u8], &[u8]) -> Vec<u8>; + /// Return the shortest byte string that compares "Greater" to the argument. fn find_short_succ(&self, &[u8]) -> Vec<u8>; + + /// A unique identifier for a comparator. A comparator wrapper (like InternalKeyCmp) may + /// return the id of its inner comparator. + fn id(&self) -> &'static str; } /// Lexical comparator. @@ -17,6 +26,10 @@ a.cmp(b) } + fn id(&self) -> &'static str { + "leveldb.BytewiseComparator" + } + fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec<u8> { if a == b { return a.to_vec(); @@ -40,7 +53,12 @@ diff_at += 1; } - return a.to_vec(); + // Backup case: either `a` is full of 0xff, or all different places are less than 2 + // characters apart. + // The result is not necessarily short, but a good separator. + let mut sep = a.to_vec(); + sep[a.len() - 1] += 1; + return sep; } fn find_short_succ(&self, a: &[u8]) -> Vec<u8> { @@ -67,15 +85,17 @@ assert_eq!(DefaultCmp.find_shortest_sep("abcd".as_bytes(), "abcf".as_bytes()), "abce".as_bytes()); assert_eq!(DefaultCmp.find_shortest_sep("abc".as_bytes(), "acd".as_bytes()), - "abc".as_bytes()); + "abd".as_bytes()); assert_eq!(DefaultCmp.find_shortest_sep("abcdefghi".as_bytes(), "abcffghi".as_bytes()), "abce".as_bytes()); assert_eq!(DefaultCmp.find_shortest_sep("a".as_bytes(), "a".as_bytes()), "a".as_bytes()); assert_eq!(DefaultCmp.find_shortest_sep("a".as_bytes(), "b".as_bytes()), - "a".as_bytes()); + "b".as_bytes()); assert_eq!(DefaultCmp.find_shortest_sep("abc".as_bytes(), "zzz".as_bytes()), "b".as_bytes()); + assert_eq!(DefaultCmp.find_shortest_sep("yyy".as_bytes(), "z".as_bytes()), + "yyz".as_bytes()); assert_eq!(DefaultCmp.find_shortest_sep("".as_bytes(), "".as_bytes()), "".as_bytes()); }
--- a/src/table_reader.rs Tue Jan 03 20:05:45 2017 +0100 +++ b/src/table_reader.rs Sun Feb 26 09:28:00 2017 +0100 @@ -1,3 +1,5 @@ +#![allow(dead_code)] + use block::{Block, BlockIter}; use blockhandle::BlockHandle; use filter::BoxedFilterPolicy; @@ -73,7 +75,6 @@ pub struct Table<R: Read + Seek> { file: R, - opt: Options, footer: Footer, @@ -165,17 +166,47 @@ /// you frequently look for non-existing values (as it will detect the non-existence of an /// entry in a block without having to load the block). pub fn get(&mut self, key: &[u8]) -> Option<Vec<u8>> { - let mut iter = self.iter(); + let mut index_iter = self.indexblock.iter(); + index_iter.seek(key); + + let handle; + if let Some((last_in_block, h)) = index_iter.current() { + if self.opt.cmp.cmp(key, &last_in_block) == Ordering::Less { + handle = BlockHandle::decode(&h).0; + } else { + return None; + } + } else { + return None; + } + // found correct block. + // Check bloom filter + if let Some(ref filters) = self.filters { + if !filters.key_may_match(handle.offset(), key) { + return None; + } + } + + // Read block (potentially from cache) + let mut iter; + if let Ok(tb) = self.read_block(&handle) { + iter = tb.block.iter(); + } else { + return None; + } + + // Go to entry and check if it's the wanted entry. iter.seek(key); - if let Some((k, v)) = iter.current() { - if k == key { Some(v) } else { None } + if self.opt.cmp.cmp(key, &k) == Ordering::Equal { + Some(v) + } else { + None + } } else { None } - - // TODO: replace this with a more efficient method using filters } }