changeset 54:27f4cbaa0045

Merge outstanding head
author Lewin Bormann <lbo@spheniscida.de>
date Fri, 05 Feb 2016 21:24:38 +0000
parents 90b955067a22 (diff) a0647ae635e2 (current diff)
children 2e7fba6d7cad
files
diffstat 3 files changed, 128 insertions(+), 105 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib.rs	Wed Feb 03 21:31:12 2016 +0000
+++ b/src/lib.rs	Fri Feb 05 21:24:38 2016 +0000
@@ -9,7 +9,7 @@
 pub mod parameters;
 pub mod record_types;
 pub mod shard_merge;
-
+pub mod sort;
 
 #[test]
 fn it_works() {}
--- a/src/shard_merge.rs	Wed Feb 03 21:31:12 2016 +0000
+++ b/src/shard_merge.rs	Fri Feb 05 21:24:38 2016 +0000
@@ -4,14 +4,11 @@
 
 #![allow(dead_code)]
 
+use sort;
 
 use std::cmp::{Ord, Ordering};
 use std::iter;
 
-/// Function type to be used as custom compare function
-/// (rust's standard String comparison is based on ASCII values, not dictionary order)
-pub type Comparer<T> = fn(a: &T, b: &T) -> Ordering;
-
 /// See module description.
 /// This type uses dynamic instead of static dispatch because it realizes an arbitrary structure
 /// and can therefore not work with a single type signature.
@@ -21,7 +18,7 @@
 
     left_peeked: Option<T>,
     right_peeked: Option<T>,
-    comparer: Option<Comparer<T>>,
+    comparer: Option<sort::Comparer<T>>,
 }
 
 impl<'a, T: Ord + Clone> Iterator for ShardMergeIterator<'a, T> {
@@ -76,20 +73,29 @@
         }
     }
 
-    pub fn build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt) -> ShardMergeIterator<'a, T>
-        where T: 'a, It: 'a {
-            ShardMergeIterator::_build(sources, None)
+    pub fn build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt)
+                                                                    -> ShardMergeIterator<'a, T>
+        where T: 'a,
+              It: 'a
+    {
+        ShardMergeIterator::_build(sources, None)
     }
-    
-    pub fn build_with_cmp<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt, cmp: Comparer<T>) -> ShardMergeIterator<'a, T>
-        where T: 'a, It: 'a {
-            ShardMergeIterator::_build(sources, Some(cmp))
+
+    pub fn build_with_cmp<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>
+        (sources: &mut ItIt,
+         cmp: sort::Comparer<T>)
+         -> ShardMergeIterator<'a, T>
+        where T: 'a,
+              It: 'a
+    {
+        ShardMergeIterator::_build(sources, Some(cmp))
     }
 
     /// Takes multiple iterators of type It and generates one ShardedMergeIterator..
     /// (yes, iterator over a collection of iterators).
-    fn _build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt, cmp: Option<Comparer<T>>)
-                                                                    -> ShardMergeIterator<'a, T>
+    fn _build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt,
+                                                                 cmp: Option<sort::Comparer<T>>)
+                                                                 -> ShardMergeIterator<'a, T>
         where T: 'a,
               It: 'a
     {
@@ -129,19 +135,25 @@
     fn compare(&self, a: &T, b: &T) -> Ordering {
         match self.comparer {
             None => a.cmp(b),
-            Some(f) => f(a, b)
+            Some(f) => f(a, b),
         }
     }
 
     /// Merge multiple ShardMergeIterators, recursively (meaning it will result in a more or less
     /// balanced merge sort tree).
-    fn merge(mut its: Vec<ShardMergeIterator<'a, T>>, cmp: Option<Comparer<T>>) -> ShardMergeIterator<'a, T>
+    fn merge(mut its: Vec<ShardMergeIterator<'a, T>>,
+             cmp: Option<sort::Comparer<T>>)
+             -> ShardMergeIterator<'a, T>
         where T: 'a
     {
         if its.len() == 0 {
             ShardMergeIterator::default()
         } else if its.len() == 1 {
-            ShardMergeIterator { left: Box::new(its.remove(0)), comparer: cmp, ..ShardMergeIterator::default() }
+            ShardMergeIterator {
+                left: Box::new(its.remove(0)),
+                comparer: cmp,
+                ..ShardMergeIterator::default()
+            }
         } else if its.len() == 2 {
             let it1 = its.remove(0);
             let it2 = its.remove(0);
@@ -165,75 +177,6 @@
     }
 }
 
-#[inline]
-fn sane_char_compare(a: char, b: char) -> Ordering {
-    use std::ascii::AsciiExt;
-    // denormalize case to lower case
-    let cmp = a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase());
-
-    // Handle the case of a and b being the same letter but with different case.
-    // Native: 'B' < 'a'; we want: 'B' > 'a'
-    if cmp == Ordering::Equal {
-        match a.cmp(&b) {
-            Ordering::Equal => Ordering::Equal,  // actually same character
-            Ordering::Less => Ordering::Greater, // 'B' > 'a'!
-            Ordering::Greater => Ordering::Less, // 'a' < 'B'!
-        }
-    } else {
-        cmp
-    }
-}
-
-#[inline]
-fn dict_char_compare(a: char, b: char) -> Ordering {
-    use std::ascii::AsciiExt;
-    // denormalize case to lower case
-    a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase())
-}
-
-/// Compares a with b in a totally case insensitive manner
-/// (like coreutil sort)
-#[inline]
-fn dict_string_compare(a: &String, b: &String) -> Ordering {
-    let (mut charsa, mut charsb) = (a.chars(), b.chars());
-    loop {
-        match (charsa.next(), charsb.next()) {
-            (None, None) => return Ordering::Equal,
-            (_, None) => return Ordering::Greater,
-            (None, _) => return Ordering::Less,
-            (Some(ca), Some(cb)) => {
-                let cmp = dict_char_compare(ca, cb);
-                if cmp != Ordering::Equal {
-                    return cmp;
-                } else {
-                    continue
-                }
-            }
-        }
-    }
-}
-
-/// Compares a with b in dictionary order (case insensitive)
-#[inline]
-fn sane_string_compare(a: &String, b: &String) -> Ordering {
-    let (mut charsa, mut charsb) = (a.chars(), b.chars());
-    loop {
-        match (charsa.next(), charsb.next()) {
-            (None, None) => return Ordering::Equal,
-            (_, None) => return Ordering::Greater,
-            (None, _) => return Ordering::Less,
-            (Some(ca), Some(cb)) => {
-                let cmp = sane_char_compare(ca, cb);
-                if cmp != Ordering::Equal {
-                    return cmp;
-                } else {
-                    continue
-                }
-            }
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use std::vec;
@@ -296,8 +239,10 @@
             files.push(lines::new_from_file(&name).unwrap());
         }
 
-        let merge_it = ShardMergeIterator::build_with_cmp(&mut files.into_iter(), dict_string_compare);
-        let mut outfile = lines::LinesWriter::new_to_file(&String::from("testdata/all_sorted.txt")).unwrap();
+        let merge_it = ShardMergeIterator::build_with_cmp(&mut files.into_iter(),
+                                                          sort::dict_string_compare);
+        let mut outfile = lines::LinesWriter::new_to_file(&String::from("testdata/all_sorted.txt"))
+                              .unwrap();
 
         for line in merge_it {
             let _ = outfile.write(line.as_bytes());
@@ -305,14 +250,14 @@
     }
 
     use std::cmp::Ordering;
-    use super::{sane_char_compare, sane_string_compare, dict_string_compare};
+    use sort;
 
     #[test]
     fn test_sane_char_compare() {
-        assert_eq!(sane_char_compare('a', 'B'), Ordering::Less);
-        assert_eq!(sane_char_compare('a', 'a'), Ordering::Equal);
-        assert_eq!(sane_char_compare('A', 'a'), Ordering::Greater);
-        assert_eq!(sane_char_compare('B', 'a'), Ordering::Greater);
+        assert_eq!(sort::sane_char_compare('a', 'B'), Ordering::Less);
+        assert_eq!(sort::sane_char_compare('a', 'a'), Ordering::Equal);
+        assert_eq!(sort::sane_char_compare('A', 'a'), Ordering::Greater);
+        assert_eq!(sort::sane_char_compare('B', 'a'), Ordering::Greater);
     }
 
     #[test]
@@ -326,13 +271,13 @@
         let s6 = &cnv("ABC");
         let s7 = &cnv("aBC");
 
-        assert_eq!(sane_string_compare(s1, s2), Ordering::Less);
-        assert_eq!(sane_string_compare(s2, s3), Ordering::Less);
-        assert_eq!(sane_string_compare(s3, s2), Ordering::Greater);
-        assert_eq!(sane_string_compare(s2, s2), Ordering::Equal);
-        assert_eq!(sane_string_compare(s4, s5), Ordering::Less);
-        assert_eq!(sane_string_compare(s5, s6), Ordering::Less);
-        assert_eq!(sane_string_compare(s6, s7), Ordering::Greater);
+        assert_eq!(sort::sane_string_compare(s1, s2), Ordering::Less);
+        assert_eq!(sort::sane_string_compare(s2, s3), Ordering::Less);
+        assert_eq!(sort::sane_string_compare(s3, s2), Ordering::Greater);
+        assert_eq!(sort::sane_string_compare(s2, s2), Ordering::Equal);
+        assert_eq!(sort::sane_string_compare(s4, s5), Ordering::Less);
+        assert_eq!(sort::sane_string_compare(s5, s6), Ordering::Less);
+        assert_eq!(sort::sane_string_compare(s6, s7), Ordering::Greater);
     }
 
     #[inline]
@@ -344,7 +289,7 @@
     }
 
     // Slow test!
-    //#[test]
+    // #[test]
     fn bench_sane_string_compare() {
         let cnv = String::from;
         let s1 = &cnv("");
@@ -354,9 +299,9 @@
         let s5 = &cnv("Abc");
 
         for _ in 0..50000000 {
-            bogus_fn(sane_string_compare(s1, s2));
-            bogus_fn(sane_string_compare(s4, s3));
-            bogus_fn(sane_string_compare(s4, s5));
+            bogus_fn(sort::sane_string_compare(s1, s2));
+            bogus_fn(sort::sane_string_compare(s4, s3));
+            bogus_fn(sort::sane_string_compare(s4, s5));
         }
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/sort.rs	Fri Feb 05 21:24:38 2016 +0000
@@ -0,0 +1,78 @@
+//! Sorting/comparison functions of various sorts.
+
+#![allow(dead_code)]
+
+use std::cmp::Ordering;
+
+/// Function type to be used as custom compare function
+/// (rust's standard String comparison is based on ASCII values, not dictionary order)
+pub type Comparer<T> = fn(a: &T, b: &T) -> Ordering;
+
+#[inline]
+fn sane_char_compare(a: char, b: char) -> Ordering {
+    use std::ascii::AsciiExt;
+    // denormalize case to lower case
+    let cmp = a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase());
+
+    // Handle the case of a and b being the same letter but with different case.
+    // Native: 'B' < 'a'; we want: 'B' > 'a'
+    if cmp == Ordering::Equal {
+        match a.cmp(&b) {
+            Ordering::Equal => Ordering::Equal,  // actually same character
+            Ordering::Less => Ordering::Greater, // 'B' > 'a'!
+            Ordering::Greater => Ordering::Less, // 'a' < 'B'!
+        }
+    } else {
+        cmp
+    }
+}
+
+#[inline]
+fn dict_char_compare(a: char, b: char) -> Ordering {
+    use std::ascii::AsciiExt;
+    // denormalize case to lower case
+    a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase())
+}
+
+/// Compares a with b in a totally case insensitive manner
+/// (like coreutil sort)
+#[inline]
+fn dict_string_compare(a: &String, b: &String) -> Ordering {
+    let (mut charsa, mut charsb) = (a.chars(), b.chars());
+    loop {
+        match (charsa.next(), charsb.next()) {
+            (None, None) => return Ordering::Equal,
+            (_, None) => return Ordering::Greater,
+            (None, _) => return Ordering::Less,
+            (Some(ca), Some(cb)) => {
+                let cmp = dict_char_compare(ca, cb);
+                if cmp != Ordering::Equal {
+                    return cmp;
+                } else {
+                    continue;
+                }
+            }
+        }
+    }
+}
+
+/// Compares a with b in dictionary order (case insensitive)
+#[inline]
+fn sane_string_compare(a: &String, b: &String) -> Ordering {
+    let (mut charsa, mut charsb) = (a.chars(), b.chars());
+    loop {
+        match (charsa.next(), charsb.next()) {
+            (None, None) => return Ordering::Equal,
+            (_, None) => return Ordering::Greater,
+            (None, _) => return Ordering::Less,
+            (Some(ca), Some(cb)) => {
+                let cmp = sane_char_compare(ca, cb);
+                if cmp != Ordering::Equal {
+                    return cmp;
+                } else {
+                    continue;
+                }
+            }
+        }
+    }
+}