Mercurial > lbo > hg > localmr
changeset 112:690a9fb49ba1
Remove unnecessary sort methods; unify on dictionary sort
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Wed, 15 Jun 2016 22:04:18 +0200 |
parents | 7924ba557e6c |
children | 02ee73a6152f |
files | src/formats/lines.rs src/phases/output.rs src/shard_merge.rs src/sort.rs |
diffstat | 4 files changed, 13 insertions(+), 132 deletions(-) [+] |
line wrap: on
line diff
--- a/src/formats/lines.rs Wed Jun 15 21:38:02 2016 +0200 +++ b/src/formats/lines.rs Wed Jun 15 22:04:18 2016 +0200 @@ -110,7 +110,7 @@ /// Use either a path like `/a/b/c/` to generate files in a directory /// or `/a/b/c/file_prefix_` to create files with that prefix. pub fn new_to_files() -> LinesSinkGenerator { - LinesSinkGenerator { } + LinesSinkGenerator {} } }
--- a/src/phases/output.rs Wed Jun 15 21:38:02 2016 +0200 +++ b/src/phases/output.rs Wed Jun 15 22:04:18 2016 +0200 @@ -27,9 +27,9 @@ } pub fn open_reduce_inputs(location: &String, - partitions: usize, - shard: usize) - -> Vec<RecordReadIterator<WriteLogReader>> { + partitions: usize, + shard: usize) + -> Vec<RecordReadIterator<WriteLogReader>> { let mut inputs = Vec::new(); for part in 0..partitions {
--- a/src/shard_merge.rs Wed Jun 15 21:38:02 2016 +0200 +++ b/src/shard_merge.rs Wed Jun 15 22:04:18 2016 +0200 @@ -7,8 +7,6 @@ use std::cmp::{Ord, Ordering}; use std::iter; -use sort; - /// See module description. /// This type uses dynamic instead of static dispatch because it realizes an arbitrary structure /// and can therefore not work with a single type signature. @@ -18,7 +16,6 @@ left_peeked: Option<T>, right_peeked: Option<T>, - comparer: sort::Comparer<T>, } impl<'a, T: Ord + Clone> Iterator for ShardMergeIterator<'a, T> { @@ -47,7 +44,7 @@ return r; } (Some(l), Some(r)) => { - let cmp = (self.comparer)(&l, &r); + let cmp = l.cmp(&r); if cmp == Ordering::Less || cmp == Ordering::Equal { self.left_peeked = None; return Some(l); @@ -69,7 +66,6 @@ right: Box::new(iter::empty()), left_peeked: None, right_peeked: None, - comparer: sort::default_generic_compare, } } @@ -78,29 +74,17 @@ where T: 'a, It: 'a { - ShardMergeIterator::_build(sources, None) - } - - pub fn build_with_cmp<It: Iterator<Item = T>, ItIt: Iterator<Item = It>> - (sources: &mut ItIt, - cmp: sort::Comparer<T>) - -> ShardMergeIterator<'a, T> - where T: 'a, - It: 'a - { - ShardMergeIterator::_build(sources, Some(cmp)) + ShardMergeIterator::_build(sources) } /// Takes multiple iterators of type It and generates one ShardedMergeIterator.. /// (yes, iterator over a collection of iterators). - fn _build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt, - cmp_o: Option<sort::Comparer<T>>) + fn _build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt) -> ShardMergeIterator<'a, T> where T: 'a, It: 'a { let mut merged: Vec<ShardMergeIterator<T>> = Vec::new(); - let cmp_fn = cmp_o.unwrap_or(sort::default_generic_compare); // Initial merging: Merge pairs of input iterators together. loop { @@ -114,7 +98,6 @@ merged.push(ShardMergeIterator { left: Box::new(src1), right: Box::new(iter::empty()), - comparer: cmp_fn, ..ShardMergeIterator::default() }) } @@ -122,7 +105,6 @@ merged.push(ShardMergeIterator { left: Box::new(src1), right: Box::new(src), - comparer: cmp_fn, ..ShardMergeIterator::default() }) } @@ -130,31 +112,24 @@ } // Recursively build the merge tree from the leaves. - ShardMergeIterator::merge(merged, cmp_fn) + ShardMergeIterator::merge(merged) } /// Merge multiple ShardMergeIterators, recursively (meaning it will result in a more or less /// balanced merge sort tree). - fn merge(mut its: Vec<ShardMergeIterator<'a, T>>, - cmp: sort::Comparer<T>) - -> ShardMergeIterator<'a, T> + fn merge(mut its: Vec<ShardMergeIterator<'a, T>>) -> ShardMergeIterator<'a, T> where T: 'a { if its.len() == 0 { ShardMergeIterator::default() } else if its.len() == 1 { - ShardMergeIterator { - left: Box::new(its.remove(0)), - comparer: cmp, - ..ShardMergeIterator::default() - } + ShardMergeIterator { left: Box::new(its.remove(0)), ..ShardMergeIterator::default() } } else if its.len() == 2 { let it1 = its.remove(0); let it2 = its.remove(0); ShardMergeIterator { left: Box::new(it1), right: Box::new(it2), - comparer: cmp, ..ShardMergeIterator::default() } } else { @@ -162,9 +137,8 @@ let split_at = its.len() / 2; let right = its.split_off(split_at); ShardMergeIterator { - left: Box::new(ShardMergeIterator::merge(its, cmp)), - right: Box::new(ShardMergeIterator::merge(right, cmp)), - comparer: cmp, + left: Box::new(ShardMergeIterator::merge(its)), + right: Box::new(ShardMergeIterator::merge(right)), ..ShardMergeIterator::default() } } @@ -220,7 +194,6 @@ } use formats::lines; - use sort; use std::fmt; use std::io::Write; @@ -234,8 +207,7 @@ files.push(lines::new_from_file(&name).unwrap()); } - let merge_it = ShardMergeIterator::build_with_cmp(&mut files.into_iter(), - sort::dict_string_compare); + let merge_it = ShardMergeIterator::build(&mut files.into_iter()); let mut outfile = lines::LinesWriter::new_to_file(&String::from("testdata/all_sorted.txt")) .unwrap();
--- a/src/sort.rs Wed Jun 15 21:38:02 2016 +0200 +++ b/src/sort.rs Wed Jun 15 22:04:18 2016 +0200 @@ -36,46 +36,6 @@ } } -/// Compares a with b in dictionary order (case insensitive) -#[inline] -pub fn sane_string_compare(a: &String, b: &String) -> Ordering { - let (mut charsa, mut charsb) = (a.chars(), b.chars()); - loop { - match (charsa.next(), charsb.next()) { - (None, None) => return Ordering::Equal, - (_, None) => return Ordering::Greater, - (None, _) => return Ordering::Less, - (Some(ca), Some(cb)) => { - let cmp = sane_char_compare(ca, cb); - if cmp != Ordering::Equal { - return cmp; - } else { - continue; - } - } - } - } -} - -#[inline] -fn sane_char_compare(a: char, b: char) -> Ordering { - use std::ascii::AsciiExt; - // denormalize case to lower case - let cmp = a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()); - - // Handle the case of a and b being the same letter but with different case. - // Native: 'B' < 'a'; we want: 'B' > 'a' - if cmp == Ordering::Equal { - match a.cmp(&b) { - Ordering::Equal => Ordering::Equal, // actually same character - Ordering::Less => Ordering::Greater, // 'B' > 'a'! - Ordering::Greater => Ordering::Less, // 'a' < 'B'! - } - } else { - cmp - } -} - #[inline] fn dict_char_compare(a: char, b: char) -> Ordering { use std::ascii::AsciiExt; @@ -116,54 +76,3 @@ dict_string_compare(a, b) } } - -#[cfg(test)] -mod tests { - use super::*; - use std::cmp::Ordering; - - #[test] - fn test_sane_string_compare() { - let cnv = String::from; - let s1 = &cnv(""); - let s2 = &cnv("0abc"); - let s3 = &cnv("123"); - let s4 = &cnv("abc"); - let s5 = &cnv("Abc"); - let s6 = &cnv("ABC"); - let s7 = &cnv("aBC"); - - assert_eq!(sane_string_compare(s1, s2), Ordering::Less); - assert_eq!(sane_string_compare(s2, s3), Ordering::Less); - assert_eq!(sane_string_compare(s3, s2), Ordering::Greater); - assert_eq!(sane_string_compare(s2, s2), Ordering::Equal); - assert_eq!(sane_string_compare(s4, s5), Ordering::Less); - assert_eq!(sane_string_compare(s5, s6), Ordering::Less); - assert_eq!(sane_string_compare(s6, s7), Ordering::Greater); - } - - #[inline] - fn bogus_fn(o: Ordering) -> bool { - if o == Ordering::Equal { - panic!("bogus panic") - } - true - } - - // Slow test! - // #[test] - fn bench_sane_string_compare() { - let cnv = String::from; - let s1 = &cnv(""); - let s2 = &cnv("0abc"); - let s3 = &cnv("123"); - let s4 = &cnv("abc"); - let s5 = &cnv("Abc"); - - for _ in 0..50000000 { - bogus_fn(sane_string_compare(s1, s2)); - bogus_fn(sane_string_compare(s4, s3)); - bogus_fn(sane_string_compare(s4, s5)); - } - } -}