Mercurial > lbo > hg > btreex
view src/lib.rs @ 4:48536f1240fc
More tests, check coverage
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Tue, 05 Jul 2022 09:40:05 -0700 |
parents | a94eac64aee8 |
children | b4656ce03bda |
line wrap: on
line source
use ptree::TreeBuilder; use std::cmp::Ordering; // Min. order: 3 const ORDER: usize = 5; #[derive(Debug)] struct Node<T> { entries: [Option<T>; ORDER - 1], children: [Option<usize>; ORDER], } impl<T: Ord> Node<T> { fn new() -> Node<T> { Node { entries: Default::default(), children: Default::default(), } } } trait Compare<T> { fn cmp(v1: &T, v2: &T) -> Ordering; } struct StandardCompare; impl<T: Ord> Compare<T> for StandardCompare { fn cmp(v1: &T, v2: &T) -> Ordering { v1.cmp(v2) } } #[derive(Debug)] struct BTree<T, Cmp: Compare<T>> { v: Vec<Node<T>>, root: usize, cmp: std::marker::PhantomData<Cmp>, } impl<T: std::fmt::Debug + Ord> BTree<T, StandardCompare> { fn new(initial_capacity: usize) -> BTree<T, StandardCompare> { BTree { v: Vec::with_capacity(initial_capacity), root: 0, cmp: Default::default(), } } } impl<T: std::fmt::Debug + Ord, Cmp: Compare<T>> BTree<T, Cmp> { fn free(&self, node: usize) -> usize { self.v[node] .entries .iter() .map(|e| if e.is_some() { 0 } else { 1 }) .sum() } fn occupied(&self, node: usize) -> usize { self.v[node] .entries .iter() .map(|e| if e.is_some() { 1 } else { 0 }) .sum() } fn get(&self, node: usize, elem: usize) -> &Option<T> { &self.v[node].entries[elem] } fn get_mut(&mut self, node: usize, elem: usize) -> &mut Option<T> { &mut self.v[node].entries[elem] } fn get_child(&self, node: usize, child: usize) -> &Option<usize> { &self.v[node].children[child] } fn get_child_mut(&mut self, node: usize, child: usize) -> &mut Option<usize> { &mut self.v[node].children[child] } fn count(&self) -> usize { self.count_from(self.root) } fn count_from(&self, node: usize) -> usize { let mut s = self.occupied(node); for i in 0..ORDER { if let Some(ch) = self.v[node].children[i] { s += self.count_from(ch); } } s } pub fn find<'a>(&'a self, value: &T) -> Option<&'a T> { self.find_from(self.root, value) } fn find_from<'a>(&'a self, node: usize, value: &T) -> Option<&'a T> { let mut j = 0; // Break once we arrived at the entry right of the correct child, if we don't return // earlier. for i in 0..ORDER { if i < ORDER - 1 { if let Some(el) = self.get(node, i) { match Cmp::cmp(value, el) { Ordering::Less => { j = i; break; } Ordering::Equal => return Some(el), Ordering::Greater => continue, } } else { j = i; break; } } else { j = i; break; } } if let Some(ch) = self.get_child(node, j) { return self.find_from(*ch, value); } None } /// Insert a value into an existing node, preserving the order. /// Requires that at least one slot be free. fn insert_inline( &mut self, node: usize, value: T, left: Option<usize>, right: Option<usize>, ) -> usize { assert!(self.free(node) >= 1); let mut loc = ORDER - 2; let node = &mut self.v[node]; for i in 0..ORDER - 1 { if let Some(ref el) = node.entries[i] { if Cmp::cmp(&value, el) == Ordering::Less { // Found right location. Shift remaining elements. loc = i; break; } } else { loc = i; break; } } for i in 0..(ORDER - 2 - loc) { node.entries[ORDER - 2 - i] = node.entries[ORDER - 3 - i].take(); node.children[ORDER - 1 - i] = node.children[ORDER - 2 - i].take(); } node.entries[loc] = Some(value); node.children[loc] = left; node.children[loc + 1] = right; loc } /// Split a node, returning the IDs of the two new nodes and the pivot value. fn split(&mut self, node: usize) -> (usize, T, usize) { assert!(self.free(node) <= 1); // `node` will become left node. let leftix = node; let rightix = self.v.len(); let mut right = Node::<T>::new(); let pivotix = (ORDER - 1) / 2; let pivot = self.get_mut(node, pivotix).take(); // Pivot-left child remains in left node. for i in pivotix + 1..ORDER { right.children[i - pivotix - 1] = self.get_child_mut(leftix, i).take(); } // Make sure to move all children pointers! //right.children[ORDER - pivotix] = self.get_child_mut(leftix, ORDER - 1).take(); for i in pivotix + 1..ORDER - 1 { right.entries[i - pivotix - 1] = self.get_mut(leftix, i).take(); } self.v.push(right); (leftix, pivot.unwrap(), rightix) } /// Returns true if the given node is a leaf (doesn't have children). fn is_leaf(&self, node: usize) -> bool { self.v[node].children.iter().all(|c| c.is_none()) } /// Insert a value into the BTree. pub fn insert(&mut self, value: T) -> bool { if let Some((l, pivot, r)) = self.insert_into(self.root, value) { // Split root let mut newroot = Node::new(); let rootix = self.v.len(); newroot.children[0] = Some(l); newroot.children[1] = Some(r); newroot.entries[0] = Some(pivot); self.root = rootix; self.v.push(newroot); true } else { true } } /// Insert value recursively. If a split occurs, returns (left, pivot, right) tuple. fn insert_into(&mut self, node: usize, value: T) -> Option<(usize, T, usize)> { // BTree initialized? if self.v.len() == 0 { self.v.push(Node::new()); self.insert_inline(self.root, value, None, None); return None; } // Leaf node with free space? We are finished. if self.is_leaf(node) && self.free(node) >= 1 { self.insert_inline(node, value, None, None); return None; } else if self.is_leaf(node) && self.free(node) < 1 { // Leaf node but full - needs to be split. let (l, pivot, r) = self.split(node); // Insert value into left or right subtree. match Cmp::cmp(&value, &pivot) { Ordering::Less => assert!(self.insert_into(l, value).is_none()), Ordering::Greater => assert!(self.insert_into(r, value).is_none()), _ => panic!("Attempting to insert duplicate element"), }; // Return fragments upwards to be inserted. return Some((l, pivot, r)); } else { // Descend into node. let mut split = None; // Find correct child to descend into. for i in 0..ORDER { if i < ORDER - 1 { if let Some(el) = self.get(node, i) { match Cmp::cmp(&value, el) { Ordering::Less => { split = self.insert_into(self.get_child(node, i).unwrap(), value); break; } Ordering::Equal => { panic!("Attempting to insert duplicate element {:?}", value) } Ordering::Greater => continue, } } else { if let Some(ch) = self.get_child(node, i) { split = self.insert_into(*ch, value); break; } panic!( "Internal node missing child? {} => {:?}", node, self.v[node] ); } } else { if let Some(ch) = self.get_child(node, i) { split = self.insert_into(*ch, value); break; } else { let (l, pivot, r) = self.split(node); // Insert value into left or right subtree. match Cmp::cmp(&value, &pivot) { Ordering::Less => assert!(self.insert_into(l, value).is_none()), Ordering::Greater => assert!(self.insert_into(r, value).is_none()), _ => panic!("Attempting to insert duplicate element"), }; return Some((l, pivot, r)); } } } // Merge split if one happened below us. if let Some((l, pivot, r)) = split { if self.free(node) >= 1 { // We can insert the split here. self.insert_inline(node, pivot, Some(l), Some(r)); return None; } else { // Split this node too. let (nl, npivot, nr) = self.split(node); match pivot.cmp(&npivot) { Ordering::Less => self.insert_inline(nl, pivot, Some(l), Some(r)), Ordering::Greater => self.insert_inline(nr, pivot, Some(l), Some(r)), _ => panic!("Attempting to insert duplicate element"), }; return Some((nl, npivot, nr)); } } else { return None; } } } } impl<T: std::fmt::Debug, Cmp: Compare<T>> BTree<T, Cmp> { fn debug_print(&self) -> String { let mut t = TreeBuilder::new(format!("<{}>", self.root)); self.print_subtree(&mut t, self.root); let mut o: Vec<u8> = Vec::new(); ptree::write_tree(&t.build(), &mut o); String::from_utf8(o).unwrap() } fn print_subtree(&self, tb: &mut TreeBuilder, node: usize) { let node = &self.v[node]; for i in 0..ORDER { if let Some(ch) = node.children[i] { self.print_subtree(tb.begin_child(format!("<{}>", ch)), ch); tb.end_child(); } if i < ORDER - 1 { tb.add_empty_child(format!("{:?}", node.entries[i])); } } } } #[cfg(test)] mod tests { use super::BTree; use rand::{rngs::StdRng, RngCore, SeedableRng}; #[test] fn basic_test() { let mut bt = BTree::new(16); let ns = &[ 15, 2, 21, 35, 4, 62, 31, 27, 88, 1, 12, 45, 11, 16, 23, 20, 22, 24, 25, 26, 28, 29, 30, 31, 32, ]; for n in ns { bt.insert(*n); } for (i, e) in bt.v.iter().enumerate() { println!("{} {:?}", i, e); } assert_eq!(bt.count(), ns.len()); } #[test] fn randomized_all_reachable() { time_test::time_test!("10 x 100 inserts"); for seed in &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] { let mut rng = StdRng::seed_from_u64(*seed); let N = 1000; let mut bt = BTree::new(N / 2); for i in 0..N { let rn = rng.next_u32(); bt.insert(rn); if i + 1 != bt.count() { println!("inserting {}", rn); println!("seed = {}\n{}", seed, bt.debug_print()); } assert_eq!(i + 1, bt.count()); } println!("{}", bt.debug_print()); } } #[test] fn test_find() { time_test::time_test!("1000 x insert/find"); let mut rng = StdRng::seed_from_u64(1); let N = 1000; let mut items = Vec::with_capacity(N); let mut bt = BTree::new(N / 3); for i in 0..N { let rn = rng.next_u32(); bt.insert(rn); items.push(rn); for rn in items.iter() { assert_eq!(bt.find(rn), Some(rn)); } } for (i, rn) in items.iter().enumerate() { assert_eq!(bt.find(rn), Some(rn)); } } #[test] fn test_find_sequential() { time_test::time_test!("1000 x insert/find sequential"); let N = 1000; let mut bt = BTree::new(N / 3); for i in 0..N { bt.insert(i); } for i in 0..N { assert_eq!(bt.find(&i), Some(&i)); } } #[test] fn randomized_stdbtree_test() { time_test::time_test!("10 x 100 inserts"); for seed in &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] { let mut rng = StdRng::seed_from_u64(*seed); let N = 1000; let mut bt = std::collections::BTreeSet::new(); for i in 0..N { let rn = rng.next_u32(); bt.insert(rn); assert_eq!(i + 1, bt.len()); } } } }