changeset 33:4e7b9fcdbc88

Use a more efficient TableReader::get() implementation. The old implementation was simply based on iterators and seeking; the new one is "native" in that it implements the details itself, using the index blocks for a rough seek, the filter blocks for skipping irrelevant blocks, and linear search within a block for the key itself. This is a code rebase from leveldb.
author Lewin Bormann <lbo@spheniscida.de>
date Sun, 26 Feb 2017 09:28:00 +0100
parents f4caa5af5b93
children 5449b2eb7265
files src/cmp.rs src/table_reader.rs
diffstat 2 files changed, 60 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/cmp.rs	Tue Jan 03 20:05:45 2017 +0100
+++ b/src/cmp.rs	Sun Feb 26 09:28:00 2017 +0100
@@ -3,9 +3,18 @@
 /// Comparator trait, supporting types that can be nested (i.e., add additional functionality on
 /// top of an inner comparator)
 pub trait Cmp {
+    /// Compare to byte strings, bytewise.
     fn cmp(&self, &[u8], &[u8]) -> Ordering;
+
+    /// Return the shortest byte string that compares "Greater" to the first argument and "Less" to
+    /// the second one.
     fn find_shortest_sep(&self, &[u8], &[u8]) -> Vec<u8>;
+    /// Return the shortest byte string that compares "Greater" to the argument.
     fn find_short_succ(&self, &[u8]) -> Vec<u8>;
+
+    /// A unique identifier for a comparator. A comparator wrapper (like InternalKeyCmp) may
+    /// return the id of its inner comparator.
+    fn id(&self) -> &'static str;
 }
 
 /// Lexical comparator.
@@ -17,6 +26,10 @@
         a.cmp(b)
     }
 
+    fn id(&self) -> &'static str {
+        "leveldb.BytewiseComparator"
+    }
+
     fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec<u8> {
         if a == b {
             return a.to_vec();
@@ -40,7 +53,12 @@
 
             diff_at += 1;
         }
-        return a.to_vec();
+        // Backup case: either `a` is full of 0xff, or all different places are less than 2
+        // characters apart.
+        // The result is not necessarily short, but a good separator.
+        let mut sep = a.to_vec();
+        sep[a.len() - 1] += 1;
+        return sep;
     }
 
     fn find_short_succ(&self, a: &[u8]) -> Vec<u8> {
@@ -67,15 +85,17 @@
         assert_eq!(DefaultCmp.find_shortest_sep("abcd".as_bytes(), "abcf".as_bytes()),
                    "abce".as_bytes());
         assert_eq!(DefaultCmp.find_shortest_sep("abc".as_bytes(), "acd".as_bytes()),
-                   "abc".as_bytes());
+                   "abd".as_bytes());
         assert_eq!(DefaultCmp.find_shortest_sep("abcdefghi".as_bytes(), "abcffghi".as_bytes()),
                    "abce".as_bytes());
         assert_eq!(DefaultCmp.find_shortest_sep("a".as_bytes(), "a".as_bytes()),
                    "a".as_bytes());
         assert_eq!(DefaultCmp.find_shortest_sep("a".as_bytes(), "b".as_bytes()),
-                   "a".as_bytes());
+                   "b".as_bytes());
         assert_eq!(DefaultCmp.find_shortest_sep("abc".as_bytes(), "zzz".as_bytes()),
                    "b".as_bytes());
+        assert_eq!(DefaultCmp.find_shortest_sep("yyy".as_bytes(), "z".as_bytes()),
+                   "yyz".as_bytes());
         assert_eq!(DefaultCmp.find_shortest_sep("".as_bytes(), "".as_bytes()),
                    "".as_bytes());
     }
--- a/src/table_reader.rs	Tue Jan 03 20:05:45 2017 +0100
+++ b/src/table_reader.rs	Sun Feb 26 09:28:00 2017 +0100
@@ -1,3 +1,5 @@
+#![allow(dead_code)]
+
 use block::{Block, BlockIter};
 use blockhandle::BlockHandle;
 use filter::BoxedFilterPolicy;
@@ -73,7 +75,6 @@
 
 pub struct Table<R: Read + Seek> {
     file: R,
-
     opt: Options,
 
     footer: Footer,
@@ -165,17 +166,47 @@
     /// you frequently look for non-existing values (as it will detect the non-existence of an
     /// entry in a block without having to load the block).
     pub fn get(&mut self, key: &[u8]) -> Option<Vec<u8>> {
-        let mut iter = self.iter();
+        let mut index_iter = self.indexblock.iter();
+        index_iter.seek(key);
+
+        let handle;
+        if let Some((last_in_block, h)) = index_iter.current() {
+            if self.opt.cmp.cmp(key, &last_in_block) == Ordering::Less {
+                handle = BlockHandle::decode(&h).0;
+            } else {
+                return None;
+            }
+        } else {
+            return None;
+        }
 
+        // found correct block.
+        // Check bloom filter
+        if let Some(ref filters) = self.filters {
+            if !filters.key_may_match(handle.offset(), key) {
+                return None;
+            }
+        }
+
+        // Read block (potentially from cache)
+        let mut iter;
+        if let Ok(tb) = self.read_block(&handle) {
+            iter = tb.block.iter();
+        } else {
+            return None;
+        }
+
+        // Go to entry and check if it's the wanted entry.
         iter.seek(key);
-
         if let Some((k, v)) = iter.current() {
-            if k == key { Some(v) } else { None }
+            if self.opt.cmp.cmp(key, &k) == Ordering::Equal {
+                Some(v)
+            } else {
+                None
+            }
         } else {
             None
         }
-
-        // TODO: replace this with a more efficient method using filters
     }
 }