changeset 112:690a9fb49ba1

Remove unnecessary sort methods; unify on dictionary sort
author Lewin Bormann <lbo@spheniscida.de>
date Wed, 15 Jun 2016 22:04:18 +0200
parents 7924ba557e6c
children 02ee73a6152f
files src/formats/lines.rs src/phases/output.rs src/shard_merge.rs src/sort.rs
diffstat 4 files changed, 13 insertions(+), 132 deletions(-) [+]
line wrap: on
line diff
--- a/src/formats/lines.rs	Wed Jun 15 21:38:02 2016 +0200
+++ b/src/formats/lines.rs	Wed Jun 15 22:04:18 2016 +0200
@@ -110,7 +110,7 @@
     /// Use either a path like `/a/b/c/` to generate files in a directory
     /// or `/a/b/c/file_prefix_` to create files with that prefix.
     pub fn new_to_files() -> LinesSinkGenerator {
-        LinesSinkGenerator { }
+        LinesSinkGenerator {}
     }
 }
 
--- a/src/phases/output.rs	Wed Jun 15 21:38:02 2016 +0200
+++ b/src/phases/output.rs	Wed Jun 15 22:04:18 2016 +0200
@@ -27,9 +27,9 @@
 }
 
 pub fn open_reduce_inputs(location: &String,
-                      partitions: usize,
-                      shard: usize)
-                      -> Vec<RecordReadIterator<WriteLogReader>> {
+                          partitions: usize,
+                          shard: usize)
+                          -> Vec<RecordReadIterator<WriteLogReader>> {
     let mut inputs = Vec::new();
 
     for part in 0..partitions {
--- a/src/shard_merge.rs	Wed Jun 15 21:38:02 2016 +0200
+++ b/src/shard_merge.rs	Wed Jun 15 22:04:18 2016 +0200
@@ -7,8 +7,6 @@
 use std::cmp::{Ord, Ordering};
 use std::iter;
 
-use sort;
-
 /// See module description.
 /// This type uses dynamic instead of static dispatch because it realizes an arbitrary structure
 /// and can therefore not work with a single type signature.
@@ -18,7 +16,6 @@
 
     left_peeked: Option<T>,
     right_peeked: Option<T>,
-    comparer: sort::Comparer<T>,
 }
 
 impl<'a, T: Ord + Clone> Iterator for ShardMergeIterator<'a, T> {
@@ -47,7 +44,7 @@
                 return r;
             }
             (Some(l), Some(r)) => {
-                let cmp = (self.comparer)(&l, &r);
+                let cmp = l.cmp(&r);
                 if cmp == Ordering::Less || cmp == Ordering::Equal {
                     self.left_peeked = None;
                     return Some(l);
@@ -69,7 +66,6 @@
             right: Box::new(iter::empty()),
             left_peeked: None,
             right_peeked: None,
-            comparer: sort::default_generic_compare,
         }
     }
 
@@ -78,29 +74,17 @@
         where T: 'a,
               It: 'a
     {
-        ShardMergeIterator::_build(sources, None)
-    }
-
-    pub fn build_with_cmp<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>
-        (sources: &mut ItIt,
-         cmp: sort::Comparer<T>)
-         -> ShardMergeIterator<'a, T>
-        where T: 'a,
-              It: 'a
-    {
-        ShardMergeIterator::_build(sources, Some(cmp))
+        ShardMergeIterator::_build(sources)
     }
 
     /// Takes multiple iterators of type It and generates one ShardedMergeIterator..
     /// (yes, iterator over a collection of iterators).
-    fn _build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt,
-                                                                 cmp_o: Option<sort::Comparer<T>>)
+    fn _build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt)
                                                                  -> ShardMergeIterator<'a, T>
         where T: 'a,
               It: 'a
     {
         let mut merged: Vec<ShardMergeIterator<T>> = Vec::new();
-        let cmp_fn = cmp_o.unwrap_or(sort::default_generic_compare);
 
         // Initial merging: Merge pairs of input iterators together.
         loop {
@@ -114,7 +98,6 @@
                     merged.push(ShardMergeIterator {
                         left: Box::new(src1),
                         right: Box::new(iter::empty()),
-                        comparer: cmp_fn,
                         ..ShardMergeIterator::default()
                     })
                 }
@@ -122,7 +105,6 @@
                     merged.push(ShardMergeIterator {
                         left: Box::new(src1),
                         right: Box::new(src),
-                        comparer: cmp_fn,
                         ..ShardMergeIterator::default()
                     })
                 }
@@ -130,31 +112,24 @@
         }
 
         // Recursively build the merge tree from the leaves.
-        ShardMergeIterator::merge(merged, cmp_fn)
+        ShardMergeIterator::merge(merged)
     }
 
     /// Merge multiple ShardMergeIterators, recursively (meaning it will result in a more or less
     /// balanced merge sort tree).
-    fn merge(mut its: Vec<ShardMergeIterator<'a, T>>,
-             cmp: sort::Comparer<T>)
-             -> ShardMergeIterator<'a, T>
+    fn merge(mut its: Vec<ShardMergeIterator<'a, T>>) -> ShardMergeIterator<'a, T>
         where T: 'a
     {
         if its.len() == 0 {
             ShardMergeIterator::default()
         } else if its.len() == 1 {
-            ShardMergeIterator {
-                left: Box::new(its.remove(0)),
-                comparer: cmp,
-                ..ShardMergeIterator::default()
-            }
+            ShardMergeIterator { left: Box::new(its.remove(0)), ..ShardMergeIterator::default() }
         } else if its.len() == 2 {
             let it1 = its.remove(0);
             let it2 = its.remove(0);
             ShardMergeIterator {
                 left: Box::new(it1),
                 right: Box::new(it2),
-                comparer: cmp,
                 ..ShardMergeIterator::default()
             }
         } else {
@@ -162,9 +137,8 @@
             let split_at = its.len() / 2;
             let right = its.split_off(split_at);
             ShardMergeIterator {
-                left: Box::new(ShardMergeIterator::merge(its, cmp)),
-                right: Box::new(ShardMergeIterator::merge(right, cmp)),
-                comparer: cmp,
+                left: Box::new(ShardMergeIterator::merge(its)),
+                right: Box::new(ShardMergeIterator::merge(right)),
                 ..ShardMergeIterator::default()
             }
         }
@@ -220,7 +194,6 @@
     }
 
     use formats::lines;
-    use sort;
     use std::fmt;
     use std::io::Write;
 
@@ -234,8 +207,7 @@
             files.push(lines::new_from_file(&name).unwrap());
         }
 
-        let merge_it = ShardMergeIterator::build_with_cmp(&mut files.into_iter(),
-                                                          sort::dict_string_compare);
+        let merge_it = ShardMergeIterator::build(&mut files.into_iter());
         let mut outfile = lines::LinesWriter::new_to_file(&String::from("testdata/all_sorted.txt"))
             .unwrap();
 
--- a/src/sort.rs	Wed Jun 15 21:38:02 2016 +0200
+++ b/src/sort.rs	Wed Jun 15 22:04:18 2016 +0200
@@ -36,46 +36,6 @@
     }
 }
 
-/// Compares a with b in dictionary order (case insensitive)
-#[inline]
-pub fn sane_string_compare(a: &String, b: &String) -> Ordering {
-    let (mut charsa, mut charsb) = (a.chars(), b.chars());
-    loop {
-        match (charsa.next(), charsb.next()) {
-            (None, None) => return Ordering::Equal,
-            (_, None) => return Ordering::Greater,
-            (None, _) => return Ordering::Less,
-            (Some(ca), Some(cb)) => {
-                let cmp = sane_char_compare(ca, cb);
-                if cmp != Ordering::Equal {
-                    return cmp;
-                } else {
-                    continue;
-                }
-            }
-        }
-    }
-}
-
-#[inline]
-fn sane_char_compare(a: char, b: char) -> Ordering {
-    use std::ascii::AsciiExt;
-    // denormalize case to lower case
-    let cmp = a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase());
-
-    // Handle the case of a and b being the same letter but with different case.
-    // Native: 'B' < 'a'; we want: 'B' > 'a'
-    if cmp == Ordering::Equal {
-        match a.cmp(&b) {
-            Ordering::Equal => Ordering::Equal,  // actually same character
-            Ordering::Less => Ordering::Greater, // 'B' > 'a'!
-            Ordering::Greater => Ordering::Less, // 'a' < 'B'!
-        }
-    } else {
-        cmp
-    }
-}
-
 #[inline]
 fn dict_char_compare(a: char, b: char) -> Ordering {
     use std::ascii::AsciiExt;
@@ -116,54 +76,3 @@
         dict_string_compare(a, b)
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::cmp::Ordering;
-
-    #[test]
-    fn test_sane_string_compare() {
-        let cnv = String::from;
-        let s1 = &cnv("");
-        let s2 = &cnv("0abc");
-        let s3 = &cnv("123");
-        let s4 = &cnv("abc");
-        let s5 = &cnv("Abc");
-        let s6 = &cnv("ABC");
-        let s7 = &cnv("aBC");
-
-        assert_eq!(sane_string_compare(s1, s2), Ordering::Less);
-        assert_eq!(sane_string_compare(s2, s3), Ordering::Less);
-        assert_eq!(sane_string_compare(s3, s2), Ordering::Greater);
-        assert_eq!(sane_string_compare(s2, s2), Ordering::Equal);
-        assert_eq!(sane_string_compare(s4, s5), Ordering::Less);
-        assert_eq!(sane_string_compare(s5, s6), Ordering::Less);
-        assert_eq!(sane_string_compare(s6, s7), Ordering::Greater);
-    }
-
-    #[inline]
-    fn bogus_fn(o: Ordering) -> bool {
-        if o == Ordering::Equal {
-            panic!("bogus panic")
-        }
-        true
-    }
-
-    // Slow test!
-    // #[test]
-    fn bench_sane_string_compare() {
-        let cnv = String::from;
-        let s1 = &cnv("");
-        let s2 = &cnv("0abc");
-        let s3 = &cnv("123");
-        let s4 = &cnv("abc");
-        let s5 = &cnv("Abc");
-
-        for _ in 0..50000000 {
-            bogus_fn(sane_string_compare(s1, s2));
-            bogus_fn(sane_string_compare(s4, s3));
-            bogus_fn(sane_string_compare(s4, s5));
-        }
-    }
-}