Mercurial > lbo > hg > leveldb-rs
changeset 133:d0c7a2c7c798
Fix prev() behavior in block iterator
Essentially by not setting current_entry_offset if we encounter the end of the block.
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Mon, 02 Jan 2017 11:15:42 +0100 |
parents | e025f865ea6f |
children | 2788dee1b457 |
files | src/block.rs src/table_reader.rs |
diffstat | 2 files changed, 119 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- a/src/block.rs Sat Dec 31 18:33:59 2016 +0100 +++ b/src/block.rs Mon Jan 02 11:15:42 2017 +0100 @@ -66,16 +66,19 @@ pub struct BlockIter { block: Rc<BlockContents>, opt: Options, - // start of next entry + // offset of restarts area within the block. + restarts_off: usize, + + // start of next entry to be parsed. offset: usize, - // offset of restarts area - restarts_off: usize, + // offset of the current entry. current_entry_offset: usize, - // tracks the last restart we encountered + // index of the most recent restart. current_restart_ix: usize, // We assemble the key from two parts usually, so we keep the current full key here. key: Vec<u8>, + // Offset of the current value within the block. val_offset: usize, } @@ -128,7 +131,6 @@ while let Some((_, _)) = self.next() { } - self.prev(); } } @@ -136,11 +138,15 @@ type Item = (Vec<u8>, Vec<u8>); fn next(&mut self) -> Option<Self::Item> { - self.current_entry_offset = self.offset; + println!("next {:?}", (self.offset, self.restarts_off)); + if self.offset >= self.restarts_off { + self.offset = self.restarts_off; + // current_entry_offset is left at the offset of the last entry + return None; + } else { + self.current_entry_offset = self.offset; + } - if self.current_entry_offset >= self.restarts_off { - return None; - } let (shared, non_shared, valsize) = self.parse_entry(); self.assemble_key(shared, non_shared); @@ -164,28 +170,36 @@ self.val_offset = 0; } fn prev(&mut self) -> Option<Self::Item> { + println!("prev {:?}", + (self.offset, self.restarts_off, self.block.len(), self.current_entry_offset)); // as in the original implementation -- seek to last restart point, then look for key - let current_offset = self.current_entry_offset; + let orig_offset = self.current_entry_offset; // At the beginning, can't go further back - if current_offset == 0 { + if orig_offset == 0 { self.reset(); return None; } - while self.get_restart_point(self.current_restart_ix) >= current_offset { + while self.get_restart_point(self.current_restart_ix) >= orig_offset { + // todo: double check this + if self.current_restart_ix == 0 { + self.offset = self.restarts_off; + self.current_restart_ix = self.number_restarts(); + break; + } self.current_restart_ix -= 1; } self.offset = self.get_restart_point(self.current_restart_ix); - assert!(self.offset < current_offset); + assert!(self.offset < orig_offset); let mut result; loop { result = self.next(); - if self.offset >= current_offset { + if self.offset >= orig_offset { break; } } @@ -329,8 +343,6 @@ self.last_key.resize(shared, 0); self.last_key.extend_from_slice(&key[shared..]); - // assert_eq!(&self.last_key[..], key); - self.counter += 1; } @@ -439,18 +451,28 @@ let block_contents = builder.finish(); let mut block = Block::new(o.clone(), block_contents).iter(); - assert!(!block.valid()); - assert_eq!(block.next(), - Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec()))); - assert!(block.valid()); - block.next(); - assert!(block.valid()); + // assert!(!block.valid()); + // assert_eq!(block.next(), + // Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec()))); + // assert!(block.valid()); + // block.next(); + // assert!(block.valid()); + // block.prev(); + // assert!(block.valid()); + // assert_eq!(block.current(), + // Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec()))); + // block.prev(); + // assert!(!block.valid()); + // + // Verify that prev() from the last entry goes to the prev-to-last entry + // (essentially, that next() returning None doesn't advance anything) + while let Some(_) = block.next() { + } + block.prev(); assert!(block.valid()); assert_eq!(block.current(), - Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec()))); - block.prev(); - assert!(!block.valid()); + Some(("prefix_key2".as_bytes().to_vec(), "value".as_bytes().to_vec()))); } #[test] @@ -483,6 +505,16 @@ assert!(block.valid()); assert_eq!(block.current(), Some(("key1".as_bytes().to_vec(), "value1".as_bytes().to_vec()))); + + block.seek(&"prefix_key3".as_bytes()); + assert!(block.valid()); + assert_eq!(block.current(), + Some(("prefix_key3".as_bytes().to_vec(), "value".as_bytes().to_vec()))); + + block.seek(&"prefix_key8".as_bytes()); + assert!(block.valid()); + assert_eq!(block.current(), + Some(("prefix_key3".as_bytes().to_vec(), "value".as_bytes().to_vec()))); } #[test]
--- a/src/table_reader.rs Sat Dec 31 18:33:59 2016 +0100 +++ b/src/table_reader.rs Mon Jan 02 11:15:42 2017 +0100 @@ -173,12 +173,12 @@ // Iterators read from the file; thus only one iterator can be borrowed (mutably) per scope fn iter<'a>(&'a mut self) -> TableIterator<'a, R, FP> { let iter = TableIterator { - current_block: self.indexblock.iter(), // just for filling in here + current_block: self.indexblock.iter(), + init: false, current_block_off: 0, index_block: self.indexblock.iter(), opt: self.opt.clone(), table: self, - init: false, }; iter } @@ -227,16 +227,20 @@ pub struct TableIterator<'a, R: 'a + Read + Seek, FP: 'a + FilterPolicy> { table: &'a mut Table<R, FP>, opt: Options, + // We're not using Option<BlockIter>, but instead a separate `init` field. That makes it easier + // working with the current block in the iterator methods (no borrowing annoyance as with + // Option<>) current_block: BlockIter, current_block_off: usize, + init: bool, index_block: BlockIter, - - init: bool, } impl<'a, R: Read + Seek, FP: FilterPolicy> TableIterator<'a, R, FP> { // Skips to the entry referenced by the next entry in the index block. // This is called once a block has run out of entries. + // Err means corruption or I/O error; Ok(true) means a new block was loaded; Ok(false) means + // tht there's no more entries. fn skip_to_next_entry(&mut self) -> Result<bool> { if let Some((_key, val)) = self.index_block.next() { self.load_block(&val).map(|_| true) @@ -250,6 +254,7 @@ let (new_block_handle, _) = BlockHandle::decode(handle); let block = try!(self.table.read_block(&new_block_handle)); + self.current_block = block.block.iter(); self.current_block_off = new_block_handle.offset(); @@ -261,20 +266,29 @@ type Item = (Vec<u8>, Vec<u8>); fn next(&mut self) -> Option<Self::Item> { - if !self.init { - self.init = true; - if self.skip_to_next_entry().is_err() { - return None; + // init essentially means that `current_block` is a data block (it's initially filled with + // an index block as filler). + if self.init { + if let Some((key, val)) = self.current_block.next() { + Some((key, val)) + } else { + match self.skip_to_next_entry() { + Ok(true) => self.next(), + Ok(false) => None, + // try next block, this might be corruption + Err(_) => self.next(), + } } - } - - if let Some((key, val)) = self.current_block.next() { - Some((key, val)) } else { - if self.skip_to_next_entry().unwrap_or(false) { - self.next() - } else { - None + match self.skip_to_next_entry() { + Ok(true) => { + // Only initialize if we got an entry + self.init = true; + self.next() + } + Ok(false) => None, + // try next block from index, this might be corruption + Err(_) => self.next(), } } } @@ -362,12 +376,15 @@ use super::*; fn build_data() -> Vec<(&'static str, &'static str)> { - vec![("abc", "def"), + vec![// block 1 + ("abc", "def"), ("abd", "dee"), ("bcd", "asa"), + // block 2 ("bsr", "a00"), ("xyz", "xxx"), ("xzz", "yyy"), + // block 3 ("zzz", "111")] } @@ -400,7 +417,7 @@ fn build_internal_table() -> (Vec<u8>, usize) { let mut d = Vec::with_capacity(512); let mut opt = Options::default(); - opt.block_restart_interval = 2; + opt.block_restart_interval = 1; opt.block_size = 32; let mut i = 0 as u64; @@ -435,7 +452,7 @@ let (mut src, size) = build_table(); println!("{}", size); - src[45] = 0; + src[10] += 1; let mut table = Table::new_raw(Options::default(), Cursor::new(&src as &[u8]), @@ -448,26 +465,25 @@ { let iter = table.iter(); - // Last block is skipped - assert_eq!(iter.count(), 3); - + // first block is skipped + assert_eq!(iter.count(), 4); } { let iter = table.iter(); for (k, _) in iter { - if k == build_data()[2].0.as_bytes() { + if k == build_data()[5].0.as_bytes() { return; } } - panic!("Should have hit 3rd record in table!"); + panic!("Should have hit 5th record in table!"); } } #[test] - fn test_table_iterator_fwd() { + fn test_table_iterator_fwd_bwd() { let (src, size) = build_table(); let data = build_data(); @@ -476,16 +492,28 @@ size, BloomPolicy::new(4)) .unwrap(); - let iter = table.iter(); + let mut iter = table.iter(); let mut i = 0; - for (k, v) in iter { + while let Some((k, v)) = iter.next() { assert_eq!((data[i].0.as_bytes(), data[i].1.as_bytes()), (k.as_ref(), v.as_ref())); i += 1; } assert_eq!(i, data.len()); + assert!(iter.next().is_none()); + + // backwards count + let mut j = 0; + + while let Some(_) = iter.prev() { + j += 1; + } + + // expecting 7 - 1, because the last entry that the iterator stopped on is the last entry + // in the table; that is, it needs to go back over 6 entries. + assert_eq!(j, 6); } #[test] @@ -527,6 +555,7 @@ // See comment on valid() assert!(!iter.valid()); assert!(iter.current().is_none()); + assert!(iter.prev().is_none()); assert!(iter.next().is_some()); let first = iter.current(); @@ -561,7 +590,7 @@ // Go back to previous entry, check, go forward two entries, repeat // Verifies that prev/next works well. - while iter.valid() && i < data.len() { + loop { iter.prev(); if let Some((k, v)) = iter.current() { @@ -572,11 +601,13 @@ } i += 1; - iter.next(); - iter.next(); + if iter.next().is_none() || iter.next().is_none() { + break; + } } - assert_eq!(i, 7); + // Skipping the last value because the second next() above will break the loop + assert_eq!(i, 6); } #[test]