changeset 133:d0c7a2c7c798

Fix prev() behavior in block iterator Essentially by not setting current_entry_offset if we encounter the end of the block.
author Lewin Bormann <lbo@spheniscida.de>
date Mon, 02 Jan 2017 11:15:42 +0100
parents e025f865ea6f
children 2788dee1b457
files src/block.rs src/table_reader.rs
diffstat 2 files changed, 119 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/src/block.rs	Sat Dec 31 18:33:59 2016 +0100
+++ b/src/block.rs	Mon Jan 02 11:15:42 2017 +0100
@@ -66,16 +66,19 @@
 pub struct BlockIter {
     block: Rc<BlockContents>,
     opt: Options,
-    // start of next entry
+    // offset of restarts area within the block.
+    restarts_off: usize,
+
+    // start of next entry to be parsed.
     offset: usize,
-    // offset of restarts area
-    restarts_off: usize,
+    // offset of the current entry.
     current_entry_offset: usize,
-    // tracks the last restart we encountered
+    // index of the most recent restart.
     current_restart_ix: usize,
 
     // We assemble the key from two parts usually, so we keep the current full key here.
     key: Vec<u8>,
+    // Offset of the current value within the block.
     val_offset: usize,
 }
 
@@ -128,7 +131,6 @@
         while let Some((_, _)) = self.next() {
         }
 
-        self.prev();
     }
 }
 
@@ -136,11 +138,15 @@
     type Item = (Vec<u8>, Vec<u8>);
 
     fn next(&mut self) -> Option<Self::Item> {
-        self.current_entry_offset = self.offset;
+        println!("next {:?}", (self.offset, self.restarts_off));
+        if self.offset >= self.restarts_off {
+            self.offset = self.restarts_off;
+            // current_entry_offset is left at the offset of the last entry
+            return None;
+        } else {
+            self.current_entry_offset = self.offset;
+        }
 
-        if self.current_entry_offset >= self.restarts_off {
-            return None;
-        }
         let (shared, non_shared, valsize) = self.parse_entry();
         self.assemble_key(shared, non_shared);
 
@@ -164,28 +170,36 @@
         self.val_offset = 0;
     }
     fn prev(&mut self) -> Option<Self::Item> {
+        println!("prev {:?}",
+                 (self.offset, self.restarts_off, self.block.len(), self.current_entry_offset));
         // as in the original implementation -- seek to last restart point, then look for key
-        let current_offset = self.current_entry_offset;
+        let orig_offset = self.current_entry_offset;
 
         // At the beginning, can't go further back
-        if current_offset == 0 {
+        if orig_offset == 0 {
             self.reset();
             return None;
         }
 
-        while self.get_restart_point(self.current_restart_ix) >= current_offset {
+        while self.get_restart_point(self.current_restart_ix) >= orig_offset {
+            // todo: double check this
+            if self.current_restart_ix == 0 {
+                self.offset = self.restarts_off;
+                self.current_restart_ix = self.number_restarts();
+                break;
+            }
             self.current_restart_ix -= 1;
         }
 
         self.offset = self.get_restart_point(self.current_restart_ix);
-        assert!(self.offset < current_offset);
+        assert!(self.offset < orig_offset);
 
         let mut result;
 
         loop {
             result = self.next();
 
-            if self.offset >= current_offset {
+            if self.offset >= orig_offset {
                 break;
             }
         }
@@ -329,8 +343,6 @@
         self.last_key.resize(shared, 0);
         self.last_key.extend_from_slice(&key[shared..]);
 
-        // assert_eq!(&self.last_key[..], key);
-
         self.counter += 1;
     }
 
@@ -439,18 +451,28 @@
         let block_contents = builder.finish();
         let mut block = Block::new(o.clone(), block_contents).iter();
 
-        assert!(!block.valid());
-        assert_eq!(block.next(),
-                   Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec())));
-        assert!(block.valid());
-        block.next();
-        assert!(block.valid());
+        // assert!(!block.valid());
+        // assert_eq!(block.next(),
+        // Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec())));
+        // assert!(block.valid());
+        // block.next();
+        // assert!(block.valid());
+        // block.prev();
+        // assert!(block.valid());
+        // assert_eq!(block.current(),
+        // Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec())));
+        // block.prev();
+        // assert!(!block.valid());
+        //
+        // Verify that prev() from the last entry goes to the prev-to-last entry
+        // (essentially, that next() returning None doesn't advance anything)
+        while let Some(_) = block.next() {
+        }
+
         block.prev();
         assert!(block.valid());
         assert_eq!(block.current(),
-                   Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec())));
-        block.prev();
-        assert!(!block.valid());
+                   Some(("prefix_key2".as_bytes().to_vec(), "value".as_bytes().to_vec())));
     }
 
     #[test]
@@ -483,6 +505,16 @@
         assert!(block.valid());
         assert_eq!(block.current(),
                    Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec())));
+
+        block.seek(&"prefix_key3".as_bytes());
+        assert!(block.valid());
+        assert_eq!(block.current(),
+                   Some(("prefix_key3".as_bytes().to_vec(), "value".as_bytes().to_vec())));
+
+        block.seek(&"prefix_key8".as_bytes());
+        assert!(block.valid());
+        assert_eq!(block.current(),
+                   Some(("prefix_key3".as_bytes().to_vec(), "value".as_bytes().to_vec())));
     }
 
     #[test]
--- a/src/table_reader.rs	Sat Dec 31 18:33:59 2016 +0100
+++ b/src/table_reader.rs	Mon Jan 02 11:15:42 2017 +0100
@@ -173,12 +173,12 @@
     // Iterators read from the file; thus only one iterator can be borrowed (mutably) per scope
     fn iter<'a>(&'a mut self) -> TableIterator<'a, R, FP> {
         let iter = TableIterator {
-            current_block: self.indexblock.iter(), // just for filling in here
+            current_block: self.indexblock.iter(),
+            init: false,
             current_block_off: 0,
             index_block: self.indexblock.iter(),
             opt: self.opt.clone(),
             table: self,
-            init: false,
         };
         iter
     }
@@ -227,16 +227,20 @@
 pub struct TableIterator<'a, R: 'a + Read + Seek, FP: 'a + FilterPolicy> {
     table: &'a mut Table<R, FP>,
     opt: Options,
+    // We're not using Option<BlockIter>, but instead a separate `init` field. That makes it easier
+    // working with the current block in the iterator methods (no borrowing annoyance as with
+    // Option<>)
     current_block: BlockIter,
     current_block_off: usize,
+    init: bool,
     index_block: BlockIter,
-
-    init: bool,
 }
 
 impl<'a, R: Read + Seek, FP: FilterPolicy> TableIterator<'a, R, FP> {
     // Skips to the entry referenced by the next entry in the index block.
     // This is called once a block has run out of entries.
+    // Err means corruption or I/O error; Ok(true) means a new block was loaded; Ok(false) means
+    // tht there's no more entries.
     fn skip_to_next_entry(&mut self) -> Result<bool> {
         if let Some((_key, val)) = self.index_block.next() {
             self.load_block(&val).map(|_| true)
@@ -250,6 +254,7 @@
         let (new_block_handle, _) = BlockHandle::decode(handle);
 
         let block = try!(self.table.read_block(&new_block_handle));
+
         self.current_block = block.block.iter();
         self.current_block_off = new_block_handle.offset();
 
@@ -261,20 +266,29 @@
     type Item = (Vec<u8>, Vec<u8>);
 
     fn next(&mut self) -> Option<Self::Item> {
-        if !self.init {
-            self.init = true;
-            if self.skip_to_next_entry().is_err() {
-                return None;
+        // init essentially means that `current_block` is a data block (it's initially filled with
+        // an index block as filler).
+        if self.init {
+            if let Some((key, val)) = self.current_block.next() {
+                Some((key, val))
+            } else {
+                match self.skip_to_next_entry() {
+                    Ok(true) => self.next(),
+                    Ok(false) => None,
+                    // try next block, this might be corruption
+                    Err(_) => self.next(),
+                }
             }
-        }
-
-        if let Some((key, val)) = self.current_block.next() {
-            Some((key, val))
         } else {
-            if self.skip_to_next_entry().unwrap_or(false) {
-                self.next()
-            } else {
-                None
+            match self.skip_to_next_entry() {
+                Ok(true) => {
+                    // Only initialize if we got an entry
+                    self.init = true;
+                    self.next()
+                }
+                Ok(false) => None,
+                // try next block from index, this might be corruption
+                Err(_) => self.next(),
             }
         }
     }
@@ -362,12 +376,15 @@
     use super::*;
 
     fn build_data() -> Vec<(&'static str, &'static str)> {
-        vec![("abc", "def"),
+        vec![// block 1
+             ("abc", "def"),
              ("abd", "dee"),
              ("bcd", "asa"),
+             // block 2
              ("bsr", "a00"),
              ("xyz", "xxx"),
              ("xzz", "yyy"),
+             // block 3
              ("zzz", "111")]
     }
 
@@ -400,7 +417,7 @@
     fn build_internal_table() -> (Vec<u8>, usize) {
         let mut d = Vec::with_capacity(512);
         let mut opt = Options::default();
-        opt.block_restart_interval = 2;
+        opt.block_restart_interval = 1;
         opt.block_size = 32;
 
         let mut i = 0 as u64;
@@ -435,7 +452,7 @@
         let (mut src, size) = build_table();
         println!("{}", size);
 
-        src[45] = 0;
+        src[10] += 1;
 
         let mut table = Table::new_raw(Options::default(),
                                        Cursor::new(&src as &[u8]),
@@ -448,26 +465,25 @@
 
         {
             let iter = table.iter();
-            // Last block is skipped
-            assert_eq!(iter.count(), 3);
-
+            // first block is skipped
+            assert_eq!(iter.count(), 4);
         }
 
         {
             let iter = table.iter();
 
             for (k, _) in iter {
-                if k == build_data()[2].0.as_bytes() {
+                if k == build_data()[5].0.as_bytes() {
                     return;
                 }
             }
 
-            panic!("Should have hit 3rd record in table!");
+            panic!("Should have hit 5th record in table!");
         }
     }
 
     #[test]
-    fn test_table_iterator_fwd() {
+    fn test_table_iterator_fwd_bwd() {
         let (src, size) = build_table();
         let data = build_data();
 
@@ -476,16 +492,28 @@
                                        size,
                                        BloomPolicy::new(4))
             .unwrap();
-        let iter = table.iter();
+        let mut iter = table.iter();
         let mut i = 0;
 
-        for (k, v) in iter {
+        while let Some((k, v)) = iter.next() {
             assert_eq!((data[i].0.as_bytes(), data[i].1.as_bytes()),
                        (k.as_ref(), v.as_ref()));
             i += 1;
         }
 
         assert_eq!(i, data.len());
+        assert!(iter.next().is_none());
+
+        // backwards count
+        let mut j = 0;
+
+        while let Some(_) = iter.prev() {
+            j += 1;
+        }
+
+        // expecting 7 - 1, because the last entry that the iterator stopped on is the last entry
+        // in the table; that is, it needs to go back over 6 entries.
+        assert_eq!(j, 6);
     }
 
     #[test]
@@ -527,6 +555,7 @@
         // See comment on valid()
         assert!(!iter.valid());
         assert!(iter.current().is_none());
+        assert!(iter.prev().is_none());
 
         assert!(iter.next().is_some());
         let first = iter.current();
@@ -561,7 +590,7 @@
 
         // Go back to previous entry, check, go forward two entries, repeat
         // Verifies that prev/next works well.
-        while iter.valid() && i < data.len() {
+        loop {
             iter.prev();
 
             if let Some((k, v)) = iter.current() {
@@ -572,11 +601,13 @@
             }
 
             i += 1;
-            iter.next();
-            iter.next();
+            if iter.next().is_none() || iter.next().is_none() {
+                break;
+            }
         }
 
-        assert_eq!(i, 7);
+        // Skipping the last value because the second next() above will break the loop
+        assert_eq!(i, 6);
     }
 
     #[test]