view src/lib.rs @ 4:48536f1240fc

More tests, check coverage
author Lewin Bormann <lbo@spheniscida.de>
date Tue, 05 Jul 2022 09:40:05 -0700
parents a94eac64aee8
children b4656ce03bda
line wrap: on
line source

use ptree::TreeBuilder;
use std::cmp::Ordering;

// Min. order: 3
const ORDER: usize = 5;

#[derive(Debug)]
struct Node<T> {
    entries: [Option<T>; ORDER - 1],
    children: [Option<usize>; ORDER],
}

impl<T: Ord> Node<T> {
    fn new() -> Node<T> {
        Node {
            entries: Default::default(),
            children: Default::default(),
        }
    }
}

trait Compare<T> {
    fn cmp(v1: &T, v2: &T) -> Ordering;
}

struct StandardCompare;

impl<T: Ord> Compare<T> for StandardCompare {
    fn cmp(v1: &T, v2: &T) -> Ordering {
        v1.cmp(v2)
    }
}

#[derive(Debug)]
struct BTree<T, Cmp: Compare<T>> {
    v: Vec<Node<T>>,
    root: usize,
    cmp: std::marker::PhantomData<Cmp>,
}

impl<T: std::fmt::Debug + Ord> BTree<T, StandardCompare> {
    fn new(initial_capacity: usize) -> BTree<T, StandardCompare> {
        BTree {
            v: Vec::with_capacity(initial_capacity),
            root: 0,
            cmp: Default::default(),
        }
    }
}

impl<T: std::fmt::Debug + Ord, Cmp: Compare<T>> BTree<T, Cmp> {
    fn free(&self, node: usize) -> usize {
        self.v[node]
            .entries
            .iter()
            .map(|e| if e.is_some() { 0 } else { 1 })
            .sum()
    }
    fn occupied(&self, node: usize) -> usize {
        self.v[node]
            .entries
            .iter()
            .map(|e| if e.is_some() { 1 } else { 0 })
            .sum()
    }
    fn get(&self, node: usize, elem: usize) -> &Option<T> {
        &self.v[node].entries[elem]
    }
    fn get_mut(&mut self, node: usize, elem: usize) -> &mut Option<T> {
        &mut self.v[node].entries[elem]
    }
    fn get_child(&self, node: usize, child: usize) -> &Option<usize> {
        &self.v[node].children[child]
    }
    fn get_child_mut(&mut self, node: usize, child: usize) -> &mut Option<usize> {
        &mut self.v[node].children[child]
    }

    fn count(&self) -> usize {
        self.count_from(self.root)
    }
    fn count_from(&self, node: usize) -> usize {
        let mut s = self.occupied(node);
        for i in 0..ORDER {
            if let Some(ch) = self.v[node].children[i] {
                s += self.count_from(ch);
            }
        }
        s
    }

    pub fn find<'a>(&'a self, value: &T) -> Option<&'a T> {
        self.find_from(self.root, value)
    }
    fn find_from<'a>(&'a self, node: usize, value: &T) -> Option<&'a T> {
        let mut j = 0;
        // Break once we arrived at the entry right of the correct child, if we don't return
        // earlier.
        for i in 0..ORDER {
            if i < ORDER - 1 {
                if let Some(el) = self.get(node, i) {
                    match Cmp::cmp(value, el) {
                        Ordering::Less => {
                            j = i;
                            break;
                        }
                        Ordering::Equal => return Some(el),
                        Ordering::Greater => continue,
                    }
                } else {
                    j = i;
                    break;
                }
            } else {
                j = i;
                break;
            }
        }
        if let Some(ch) = self.get_child(node, j) {
            return self.find_from(*ch, value);
        }
        None
    }

    /// Insert a value into an existing node, preserving the order.
    /// Requires that at least one slot be free.
    fn insert_inline(
        &mut self,
        node: usize,
        value: T,
        left: Option<usize>,
        right: Option<usize>,
    ) -> usize {
        assert!(self.free(node) >= 1);
        let mut loc = ORDER - 2;

        let node = &mut self.v[node];
        for i in 0..ORDER - 1 {
            if let Some(ref el) = node.entries[i] {
                if Cmp::cmp(&value, el) == Ordering::Less {
                    // Found right location. Shift remaining elements.
                    loc = i;
                    break;
                }
            } else {
                loc = i;
                break;
            }
        }

        for i in 0..(ORDER - 2 - loc) {
            node.entries[ORDER - 2 - i] = node.entries[ORDER - 3 - i].take();
            node.children[ORDER - 1 - i] = node.children[ORDER - 2 - i].take();
        }
        node.entries[loc] = Some(value);
        node.children[loc] = left;
        node.children[loc + 1] = right;

        loc
    }

    /// Split a node, returning the IDs of the two new nodes and the pivot value.
    fn split(&mut self, node: usize) -> (usize, T, usize) {
        assert!(self.free(node) <= 1);
        // `node` will become left node.
        let leftix = node;
        let rightix = self.v.len();

        let mut right = Node::<T>::new();

        let pivotix = (ORDER - 1) / 2;
        let pivot = self.get_mut(node, pivotix).take();

        // Pivot-left child remains in left node.
        for i in pivotix + 1..ORDER {
            right.children[i - pivotix - 1] = self.get_child_mut(leftix, i).take();
        }
        // Make sure to move all children pointers!
        //right.children[ORDER - pivotix] = self.get_child_mut(leftix, ORDER - 1).take();
        for i in pivotix + 1..ORDER - 1 {
            right.entries[i - pivotix - 1] = self.get_mut(leftix, i).take();
        }

        self.v.push(right);
        (leftix, pivot.unwrap(), rightix)
    }

    /// Returns true if the given node is a leaf (doesn't have children).
    fn is_leaf(&self, node: usize) -> bool {
        self.v[node].children.iter().all(|c| c.is_none())
    }

    /// Insert a value into the BTree.
    pub fn insert(&mut self, value: T) -> bool {
        if let Some((l, pivot, r)) = self.insert_into(self.root, value) {
            // Split root
            let mut newroot = Node::new();
            let rootix = self.v.len();
            newroot.children[0] = Some(l);
            newroot.children[1] = Some(r);
            newroot.entries[0] = Some(pivot);

            self.root = rootix;

            self.v.push(newroot);
            true
        } else {
            true
        }
    }

    /// Insert value recursively. If a split occurs, returns (left, pivot, right) tuple.
    fn insert_into(&mut self, node: usize, value: T) -> Option<(usize, T, usize)> {
        // BTree initialized?
        if self.v.len() == 0 {
            self.v.push(Node::new());
            self.insert_inline(self.root, value, None, None);
            return None;
        }
        // Leaf node with free space? We are finished.
        if self.is_leaf(node) && self.free(node) >= 1 {
            self.insert_inline(node, value, None, None);
            return None;
        } else if self.is_leaf(node) && self.free(node) < 1 {
            // Leaf node but full - needs to be split.
            let (l, pivot, r) = self.split(node);

            // Insert value into left or right subtree.
            match Cmp::cmp(&value, &pivot) {
                Ordering::Less => assert!(self.insert_into(l, value).is_none()),
                Ordering::Greater => assert!(self.insert_into(r, value).is_none()),
                _ => panic!("Attempting to insert duplicate element"),
            };
            // Return fragments upwards to be inserted.
            return Some((l, pivot, r));
        } else {
            // Descend into node.

            let mut split = None;
            // Find correct child to descend into.
            for i in 0..ORDER {
                if i < ORDER - 1 {
                    if let Some(el) = self.get(node, i) {
                        match Cmp::cmp(&value, el) {
                            Ordering::Less => {
                                split = self.insert_into(self.get_child(node, i).unwrap(), value);
                                break;
                            }
                            Ordering::Equal => {
                                panic!("Attempting to insert duplicate element {:?}", value)
                            }
                            Ordering::Greater => continue,
                        }
                    } else {
                        if let Some(ch) = self.get_child(node, i) {
                            split = self.insert_into(*ch, value);
                            break;
                        }
                        panic!(
                            "Internal node missing child? {} => {:?}",
                            node, self.v[node]
                        );
                    }
                } else {
                    if let Some(ch) = self.get_child(node, i) {
                        split = self.insert_into(*ch, value);
                        break;
                    } else {
                        let (l, pivot, r) = self.split(node);

                        // Insert value into left or right subtree.
                        match Cmp::cmp(&value, &pivot) {
                            Ordering::Less => assert!(self.insert_into(l, value).is_none()),
                            Ordering::Greater => assert!(self.insert_into(r, value).is_none()),
                            _ => panic!("Attempting to insert duplicate element"),
                        };

                        return Some((l, pivot, r));
                    }
                }
            }

            // Merge split if one happened below us.
            if let Some((l, pivot, r)) = split {
                if self.free(node) >= 1 {
                    // We can insert the split here.
                    self.insert_inline(node, pivot, Some(l), Some(r));
                    return None;
                } else {
                    // Split this node too.
                    let (nl, npivot, nr) = self.split(node);
                    match pivot.cmp(&npivot) {
                        Ordering::Less => self.insert_inline(nl, pivot, Some(l), Some(r)),
                        Ordering::Greater => self.insert_inline(nr, pivot, Some(l), Some(r)),
                        _ => panic!("Attempting to insert duplicate element"),
                    };
                    return Some((nl, npivot, nr));
                }
            } else {
                return None;
            }
        }
    }
}
impl<T: std::fmt::Debug, Cmp: Compare<T>> BTree<T, Cmp> {
    fn debug_print(&self) -> String {
        let mut t = TreeBuilder::new(format!("<{}>", self.root));
        self.print_subtree(&mut t, self.root);
        let mut o: Vec<u8> = Vec::new();
        ptree::write_tree(&t.build(), &mut o);
        String::from_utf8(o).unwrap()
    }
    fn print_subtree(&self, tb: &mut TreeBuilder, node: usize) {
        let node = &self.v[node];
        for i in 0..ORDER {
            if let Some(ch) = node.children[i] {
                self.print_subtree(tb.begin_child(format!("<{}>", ch)), ch);
                tb.end_child();
            }
            if i < ORDER - 1 {
                tb.add_empty_child(format!("{:?}", node.entries[i]));
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::BTree;
    use rand::{rngs::StdRng, RngCore, SeedableRng};
    #[test]
    fn basic_test() {
        let mut bt = BTree::new(16);

        let ns = &[
            15, 2, 21, 35, 4, 62, 31, 27, 88, 1, 12, 45, 11, 16, 23, 20, 22, 24, 25, 26, 28, 29,
            30, 31, 32,
        ];

        for n in ns {
            bt.insert(*n);
        }
        for (i, e) in bt.v.iter().enumerate() {
            println!("{} {:?}", i, e);
        }
        assert_eq!(bt.count(), ns.len());
    }

    #[test]
    fn randomized_all_reachable() {
        time_test::time_test!("10 x 100 inserts");
        for seed in &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] {
            let mut rng = StdRng::seed_from_u64(*seed);
            let N = 1000;
            let mut bt = BTree::new(N / 2);
            for i in 0..N {
                let rn = rng.next_u32();
                bt.insert(rn);
                if i + 1 != bt.count() {
                    println!("inserting {}", rn);
                    println!("seed = {}\n{}", seed, bt.debug_print());
                }
                assert_eq!(i + 1, bt.count());
            }
            println!("{}", bt.debug_print());
        }
    }

    #[test]
    fn test_find() {
        time_test::time_test!("1000 x insert/find");
        let mut rng = StdRng::seed_from_u64(1);
        let N = 1000;
        let mut items = Vec::with_capacity(N);
        let mut bt = BTree::new(N / 3);
        for i in 0..N {
            let rn = rng.next_u32();
            bt.insert(rn);
            items.push(rn);

            for rn in items.iter() {
                assert_eq!(bt.find(rn), Some(rn));
            }
        }

        for (i, rn) in items.iter().enumerate() {
            assert_eq!(bt.find(rn), Some(rn));
        }
    }

    #[test]
    fn test_find_sequential() {
        time_test::time_test!("1000 x insert/find sequential");
        let N = 1000;
        let mut bt = BTree::new(N / 3);
        for i in 0..N {
            bt.insert(i);
        }

        for i in 0..N {
            assert_eq!(bt.find(&i), Some(&i));
        }
    }

    #[test]
    fn randomized_stdbtree_test() {
        time_test::time_test!("10 x 100 inserts");
        for seed in &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] {
            let mut rng = StdRng::seed_from_u64(*seed);
            let N = 1000;
            let mut bt = std::collections::BTreeSet::new();
            for i in 0..N {
                let rn = rng.next_u32();
                bt.insert(rn);
                assert_eq!(i + 1, bt.len());
            }
        }
    }
}