changeset 97:4754dd356f5e

Fix several bugs in the Table*/Block implementations 1. Calculate correct number of restarts, even if it is 0 2. Use correct slice length when encoding the value length 3. Remove a weird duplicate implementation of Footer that I wrote for no reason 4. Move non-method functions out of Table 5. Don't try to read a filter block as ordinary block 6. Add tests for TableIterator
author Lewin Bormann <lbo@spheniscida.de>
date Sun, 25 Sep 2016 12:32:45 +0200
parents 16043131cbd5
children 5528147076d0
files src/block.rs src/blockhandle.rs src/table_builder.rs src/table_reader.rs
diffstat 4 files changed, 122 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/src/block.rs	Sun Sep 25 09:59:31 2016 +0200
+++ b/src/block.rs	Sun Sep 25 12:32:45 2016 +0200
@@ -27,6 +27,7 @@
 /// A RESTART is a fixed u32 pointing to the beginning of an ENTRY.
 ///
 /// N_RESTARTS contains the number of restarts.
+#[derive(Debug)]
 pub struct BlockIter<C: Comparator> {
     block: Rc<BlockContents>,
     cmp: C,
@@ -83,7 +84,7 @@
     }
 
     fn number_restarts(&self) -> usize {
-        ((self.block.len() - self.restarts_off) / 4) - 1
+        u32::decode_fixed(&self.block[self.block.len() - 4..]) as usize
     }
 
     fn get_restart_point(&self, ix: usize) -> usize {
@@ -182,7 +183,11 @@
         self.reset();
 
         let mut left = 0;
-        let mut right = self.number_restarts() - 1;
+        let mut right = if self.number_restarts() == 0 {
+            0
+        } else {
+            self.number_restarts() - 1
+        };
 
         // Do a binary search over the restart points.
         while left < right {
@@ -303,7 +308,7 @@
         self.buffer.extend_from_slice(&buf[0..sz]);
         sz = non_shared.encode_var(&mut buf[..]);
         self.buffer.extend_from_slice(&buf[0..sz]);
-        sz = val.len().encode_var(&mut buf[0..sz]);
+        sz = val.len().encode_var(&mut buf[..]);
         self.buffer.extend_from_slice(&buf[0..sz]);
 
         self.buffer.extend_from_slice(&key[shared..]);
--- a/src/blockhandle.rs	Sun Sep 25 09:59:31 2016 +0200
+++ b/src/blockhandle.rs	Sun Sep 25 12:32:45 2016 +0200
@@ -4,6 +4,7 @@
 /// used typically as file-internal pointer in table (SSTable) files. For example, the index block
 /// in an SSTable is a block of (key = largest key in block) -> (value = encoded blockhandle of
 /// block).
+#[derive(Debug)]
 pub struct BlockHandle {
     offset: usize,
     size: usize,
--- a/src/table_builder.rs	Sun Sep 25 09:59:31 2016 +0200
+++ b/src/table_builder.rs	Sun Sep 25 12:32:45 2016 +0200
@@ -50,6 +50,7 @@
 }
 
 /// Footer is a helper for encoding/decoding a table footer.
+#[derive(Debug)]
 pub struct Footer {
     pub meta_index: BlockHandle,
     pub index: BlockHandle,
@@ -216,9 +217,11 @@
         let _ = self.dst.write(&[t as u8; 1]);
         let _ = self.dst.write(&buf);
 
+        let handle = BlockHandle::new(self.offset, c.len());
+
         self.offset += c.len() + 1 + buf.len();
 
-        BlockHandle::new(self.offset, c.len())
+        handle
     }
 
     pub fn finish(mut self) {
@@ -248,7 +251,8 @@
         }
 
         // write metaindex block
-        let meta_ix_handle = self.write_block(meta_ix_block.finish(), ctype);
+        let meta_ix = meta_ix_block.finish();
+        let meta_ix_handle = self.write_block(meta_ix, ctype);
 
         // write index block
         let index_cont = self.index_block.take().unwrap().finish();
--- a/src/table_reader.rs	Sun Sep 25 09:59:31 2016 +0200
+++ b/src/table_reader.rs	Sun Sep 25 12:32:45 2016 +0200
@@ -3,30 +3,37 @@
 use filter::FilterPolicy;
 use filter_block::FilterBlockReader;
 use options::Options;
-use table_builder;
+use table_builder::{self, Footer};
 use types::{Comparator, LdbIterator};
 
 use std::io::{Read, Seek, SeekFrom, Result};
 
-struct TableFooter {
-    pub metaindex: BlockHandle,
-    pub index: BlockHandle,
+/// Reads the table footer.
+fn read_footer<R: Read + Seek>(f: &mut R, size: usize) -> Result<Footer> {
+    try!(f.seek(SeekFrom::Start((size - table_builder::FULL_FOOTER_LENGTH) as u64)));
+    let mut buf = [0; table_builder::FULL_FOOTER_LENGTH];
+    try!(f.read_exact(&mut buf));
+    Ok(Footer::decode(&buf))
 }
 
-impl TableFooter {
-    fn parse(footer: &[u8]) -> TableFooter {
-        assert_eq!(footer.len(), table_builder::FULL_FOOTER_LENGTH);
-        assert_eq!(&footer[footer.len() - 8..],
-                   table_builder::MAGIC_FOOTER_ENCODED);
+fn read_bytes<R: Read + Seek>(f: &mut R, location: &BlockHandle) -> Result<Vec<u8>> {
+    try!(f.seek(SeekFrom::Start(location.offset() as u64)));
+
+    let mut buf = Vec::new();
+    buf.resize(location.size(), 0);
+
+    try!(f.read_exact(&mut buf[0..location.size()]));
 
-        let (mi, n1) = BlockHandle::decode(footer);
-        let (ix, _) = BlockHandle::decode(&footer[n1..]);
+    Ok(buf)
+}
 
-        TableFooter {
-            metaindex: mi,
-            index: ix,
-        }
-    }
+/// Reads a block at location.
+fn read_block<R: Read + Seek, C: Comparator>(cmp: &C,
+                                             f: &mut R,
+                                             location: &BlockHandle)
+                                             -> Result<BlockIter<C>> {
+    let buf = try!(read_bytes(f, location));
+    Ok(BlockIter::new(buf, *cmp))
 }
 
 pub struct Table<R: Read + Seek, C: Comparator, FP: FilterPolicy> {
@@ -36,18 +43,17 @@
     opt: Options,
     cmp: C,
 
-    footer: TableFooter,
+    footer: Footer,
     indexblock: BlockIter<C>,
     filters: Option<FilterBlockReader<FP>>,
 }
 
 impl<R: Read + Seek, C: Comparator, FP: FilterPolicy> Table<R, C, FP> {
     pub fn new(mut file: R, size: usize, cmp: C, fp: FP, opt: Options) -> Result<Table<R, C, FP>> {
-        let footer = try!(Table::<R, C, FP>::read_footer(&mut file, size));
+        let footer = try!(read_footer(&mut file, size));
 
-        let indexblock = try!(Table::<R, C, FP>::read_block(&cmp, &mut file, &footer.index));
-        let mut metaindexblock =
-            try!(Table::<R, C, FP>::read_block(&cmp, &mut file, &footer.metaindex));
+        let indexblock = try!(read_block(&cmp, &mut file, &footer.index));
+        let mut metaindexblock = try!(read_block(&cmp, &mut file, &footer.meta_index));
 
         let mut filter_block_reader = None;
         let mut filter_name = "filter.".as_bytes().to_vec();
@@ -58,9 +64,8 @@
             let filter_block_location = BlockHandle::decode(&val).0;
 
             if filter_block_location.size() > 0 {
-                let filter_block =
-                    try!(Table::<R, C, FP>::read_block(&cmp, &mut file, &filter_block_location));
-                filter_block_reader = Some(FilterBlockReader::new(fp, filter_block.obtain()));
+                let buf = try!(read_bytes(&mut file, &filter_block_location));
+                filter_block_reader = Some(FilterBlockReader::new_owned(fp, buf));
             }
         }
         metaindexblock.reset();
@@ -76,28 +81,8 @@
         })
     }
 
-    /// Reads the table footer.
-    fn read_footer(f: &mut R, size: usize) -> Result<TableFooter> {
-        try!(f.seek(SeekFrom::Start((size - table_builder::FULL_FOOTER_LENGTH) as u64)));
-        let mut buf = [0; table_builder::FULL_FOOTER_LENGTH];
-        try!(f.read_exact(&mut buf));
-        Ok(TableFooter::parse(&buf))
-    }
-
-    /// Reads a block at location.
-    fn read_block(cmp: &C, f: &mut R, location: &BlockHandle) -> Result<BlockIter<C>> {
-        try!(f.seek(SeekFrom::Start(location.offset() as u64)));
-
-        let mut buf = Vec::new();
-        buf.resize(location.size(), 0);
-
-        try!(f.read_exact(&mut buf[0..location.size()]));
-
-        Ok(BlockIter::new(buf, *cmp))
-    }
-
     fn read_block_(&mut self, location: &BlockHandle) -> Result<BlockIter<C>> {
-        Table::<R, C, FP>::read_block(&self.cmp, &mut self.file, location)
+        read_block(&self.cmp, &mut self.file, location)
     }
 
     /// Returns the offset of the block that contains `key`.
@@ -112,7 +97,18 @@
             return location.offset();
         }
 
-        return self.footer.metaindex.offset();
+        return self.footer.meta_index.offset();
+    }
+
+    // Iterators read from the file; thus only one iterator can be borrowed (mutably) per scope
+    fn iter<'a>(&'a mut self) -> TableIterator<'a, R, C, FP> {
+        let mut iter = TableIterator {
+            current_block: self.indexblock.clone(), // just for filling in here
+            index_block: self.indexblock.clone(),
+            table: self,
+        };
+        iter.skip_to_next_entry(); // initialize current_block
+        iter
     }
 }
 
@@ -156,3 +152,68 @@
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use filter::BloomPolicy;
+    use options::Options;
+    use table_builder::TableBuilder;
+    use types::StandardComparator;
+
+    use std::io::Cursor;
+
+    use super::*;
+
+    fn build_data() -> Vec<(&'static str, &'static str)> {
+        vec![("abc", "def"),
+             ("abd", "dee"),
+             ("bcd", "asa"),
+             ("bsr", "a00"),
+             ("xyz", "xxx"),
+             ("xzz", "yyy"),
+             ("zzz", "111")]
+    }
+
+
+    fn build_table() -> (Vec<u8>, usize) {
+        let mut d = Vec::with_capacity(512);
+        let mut opt = Options::default();
+        opt.block_restart_interval = 2;
+
+        {
+            let mut b = TableBuilder::new(opt, StandardComparator, &mut d, BloomPolicy::new(4));
+            let data = build_data();
+
+            for &(k, v) in data.iter() {
+                b.add(k.as_bytes(), v.as_bytes());
+            }
+
+            b.finish();
+        }
+
+        let size = d.len();
+
+        (d, size)
+    }
+
+    #[test]
+    fn test_table_iterator() {
+        let (src, size) = build_table();
+        let data = build_data();
+
+        let mut table = Table::new(Cursor::new(&src as &[u8]),
+                                   size,
+                                   StandardComparator,
+                                   BloomPolicy::new(4),
+                                   Options::default())
+            .unwrap();
+        let iter = table.iter();
+        let mut i = 0;
+
+        for (k, v) in iter {
+            assert_eq!((data[i].0.as_bytes(), data[i].1.as_bytes()),
+                       (k.as_ref(), v.as_ref()));
+            i += 1;
+        }
+    }
+}