Mercurial > lbo > hg > localmr
changeset 52:6a51cc42d1a6
Move sort/comparison functions into own module
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Fri, 05 Feb 2016 21:17:34 +0000 |
parents | 8a2538cbac8a |
children | 90b955067a22 |
files | src/lib.rs src/shard_merge.rs src/sort.rs |
diffstat | 3 files changed, 100 insertions(+), 94 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib.rs Wed Feb 03 20:50:34 2016 +0000 +++ b/src/lib.rs Fri Feb 05 21:17:34 2016 +0000 @@ -9,7 +9,7 @@ pub mod parameters; pub mod record_types; pub mod shard_merge; - +pub mod sort; #[test] fn it_works() {}
--- a/src/shard_merge.rs Wed Feb 03 20:50:34 2016 +0000 +++ b/src/shard_merge.rs Fri Feb 05 21:17:34 2016 +0000 @@ -4,14 +4,11 @@ #![allow(dead_code)] +use sort; use std::cmp::{Ord, Ordering}; use std::iter; -/// Function type to be used as custom compare function -/// (rust's standard String comparison is based on ASCII values, not dictionary order) -pub type Comparer<T> = fn(a: &T, b: &T) -> Ordering; - /// See module description. /// This type uses dynamic instead of static dispatch because it realizes an arbitrary structure /// and can therefore not work with a single type signature. @@ -21,7 +18,7 @@ left_peeked: Option<T>, right_peeked: Option<T>, - comparer: Option<Comparer<T>>, + comparer: Option<sort::Comparer<T>>, } impl<'a, T: Ord + Clone> Iterator for ShardMergeIterator<'a, T> { @@ -81,14 +78,14 @@ ShardMergeIterator::_build(sources, None) } - pub fn build_with_cmp<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt, cmp: Comparer<T>) -> ShardMergeIterator<'a, T> + pub fn build_with_cmp<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt, cmp: sort::Comparer<T>) -> ShardMergeIterator<'a, T> where T: 'a, It: 'a { ShardMergeIterator::_build(sources, Some(cmp)) } /// Takes multiple iterators of type It and generates one ShardedMergeIterator.. /// (yes, iterator over a collection of iterators). - fn _build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt, cmp: Option<Comparer<T>>) + fn _build<It: Iterator<Item = T>, ItIt: Iterator<Item = It>>(sources: &mut ItIt, cmp: Option<sort::Comparer<T>>) -> ShardMergeIterator<'a, T> where T: 'a, It: 'a @@ -135,7 +132,7 @@ /// Merge multiple ShardMergeIterators, recursively (meaning it will result in a more or less /// balanced merge sort tree). - fn merge(mut its: Vec<ShardMergeIterator<'a, T>>, cmp: Option<Comparer<T>>) -> ShardMergeIterator<'a, T> + fn merge(mut its: Vec<ShardMergeIterator<'a, T>>, cmp: Option<sort::Comparer<T>>) -> ShardMergeIterator<'a, T> where T: 'a { if its.len() == 0 { @@ -165,75 +162,6 @@ } } -#[inline] -fn sane_char_compare(a: char, b: char) -> Ordering { - use std::ascii::AsciiExt; - // denormalize case to lower case - let cmp = a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()); - - // Handle the case of a and b being the same letter but with different case. - // Native: 'B' < 'a'; we want: 'B' > 'a' - if cmp == Ordering::Equal { - match a.cmp(&b) { - Ordering::Equal => Ordering::Equal, // actually same character - Ordering::Less => Ordering::Greater, // 'B' > 'a'! - Ordering::Greater => Ordering::Less, // 'a' < 'B'! - } - } else { - cmp - } -} - -#[inline] -fn dict_char_compare(a: char, b: char) -> Ordering { - use std::ascii::AsciiExt; - // denormalize case to lower case - a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()) -} - -/// Compares a with b in a totally case insensitive manner -/// (like coreutil sort) -#[inline] -fn dict_string_compare(a: &String, b: &String) -> Ordering { - let (mut charsa, mut charsb) = (a.chars(), b.chars()); - loop { - match (charsa.next(), charsb.next()) { - (None, None) => return Ordering::Equal, - (_, None) => return Ordering::Greater, - (None, _) => return Ordering::Less, - (Some(ca), Some(cb)) => { - let cmp = dict_char_compare(ca, cb); - if cmp != Ordering::Equal { - return cmp; - } else { - continue - } - } - } - } -} - -/// Compares a with b in dictionary order (case insensitive) -#[inline] -fn sane_string_compare(a: &String, b: &String) -> Ordering { - let (mut charsa, mut charsb) = (a.chars(), b.chars()); - loop { - match (charsa.next(), charsb.next()) { - (None, None) => return Ordering::Equal, - (_, None) => return Ordering::Greater, - (None, _) => return Ordering::Less, - (Some(ca), Some(cb)) => { - let cmp = sane_char_compare(ca, cb); - if cmp != Ordering::Equal { - return cmp; - } else { - continue - } - } - } - } -} - #[cfg(test)] mod tests { use std::vec; @@ -296,7 +224,7 @@ files.push(lines::new_from_file(&name).unwrap()); } - let merge_it = ShardMergeIterator::build_with_cmp(&mut files.into_iter(), dict_string_compare); + let merge_it = ShardMergeIterator::build_with_cmp(&mut files.into_iter(), sort::dict_string_compare); let mut outfile = lines::LinesWriter::new_to_file(&String::from("testdata/all_sorted.txt")).unwrap(); for line in merge_it { @@ -305,14 +233,14 @@ } use std::cmp::Ordering; - use super::{sane_char_compare, sane_string_compare, dict_string_compare}; + use sort; #[test] fn test_sane_char_compare() { - assert_eq!(sane_char_compare('a', 'B'), Ordering::Less); - assert_eq!(sane_char_compare('a', 'a'), Ordering::Equal); - assert_eq!(sane_char_compare('A', 'a'), Ordering::Greater); - assert_eq!(sane_char_compare('B', 'a'), Ordering::Greater); + assert_eq!(sort::sane_char_compare('a', 'B'), Ordering::Less); + assert_eq!(sort::sane_char_compare('a', 'a'), Ordering::Equal); + assert_eq!(sort::sane_char_compare('A', 'a'), Ordering::Greater); + assert_eq!(sort::sane_char_compare('B', 'a'), Ordering::Greater); } #[test] @@ -326,13 +254,13 @@ let s6 = &cnv("ABC"); let s7 = &cnv("aBC"); - assert_eq!(sane_string_compare(s1, s2), Ordering::Less); - assert_eq!(sane_string_compare(s2, s3), Ordering::Less); - assert_eq!(sane_string_compare(s3, s2), Ordering::Greater); - assert_eq!(sane_string_compare(s2, s2), Ordering::Equal); - assert_eq!(sane_string_compare(s4, s5), Ordering::Less); - assert_eq!(sane_string_compare(s5, s6), Ordering::Less); - assert_eq!(sane_string_compare(s6, s7), Ordering::Greater); + assert_eq!(sort::sane_string_compare(s1, s2), Ordering::Less); + assert_eq!(sort::sane_string_compare(s2, s3), Ordering::Less); + assert_eq!(sort::sane_string_compare(s3, s2), Ordering::Greater); + assert_eq!(sort::sane_string_compare(s2, s2), Ordering::Equal); + assert_eq!(sort::sane_string_compare(s4, s5), Ordering::Less); + assert_eq!(sort::sane_string_compare(s5, s6), Ordering::Less); + assert_eq!(sort::sane_string_compare(s6, s7), Ordering::Greater); } #[inline] @@ -354,9 +282,9 @@ let s5 = &cnv("Abc"); for _ in 0..50000000 { - bogus_fn(sane_string_compare(s1, s2)); - bogus_fn(sane_string_compare(s4, s3)); - bogus_fn(sane_string_compare(s4, s5)); + bogus_fn(sort::sane_string_compare(s1, s2)); + bogus_fn(sort::sane_string_compare(s4, s3)); + bogus_fn(sort::sane_string_compare(s4, s5)); } } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/sort.rs Fri Feb 05 21:17:34 2016 +0000 @@ -0,0 +1,78 @@ +//! Sorting/comparison functions of various sorts. + +#![allow(dead_code)] + +use std::cmp::Ordering; + +/// Function type to be used as custom compare function +/// (rust's standard String comparison is based on ASCII values, not dictionary order) +pub type Comparer<T> = fn(a: &T, b: &T) -> Ordering; + +#[inline] +fn sane_char_compare(a: char, b: char) -> Ordering { + use std::ascii::AsciiExt; + // denormalize case to lower case + let cmp = a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()); + + // Handle the case of a and b being the same letter but with different case. + // Native: 'B' < 'a'; we want: 'B' > 'a' + if cmp == Ordering::Equal { + match a.cmp(&b) { + Ordering::Equal => Ordering::Equal, // actually same character + Ordering::Less => Ordering::Greater, // 'B' > 'a'! + Ordering::Greater => Ordering::Less, // 'a' < 'B'! + } + } else { + cmp + } +} + +#[inline] +fn dict_char_compare(a: char, b: char) -> Ordering { + use std::ascii::AsciiExt; + // denormalize case to lower case + a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()) +} + +/// Compares a with b in a totally case insensitive manner +/// (like coreutil sort) +#[inline] +fn dict_string_compare(a: &String, b: &String) -> Ordering { + let (mut charsa, mut charsb) = (a.chars(), b.chars()); + loop { + match (charsa.next(), charsb.next()) { + (None, None) => return Ordering::Equal, + (_, None) => return Ordering::Greater, + (None, _) => return Ordering::Less, + (Some(ca), Some(cb)) => { + let cmp = dict_char_compare(ca, cb); + if cmp != Ordering::Equal { + return cmp; + } else { + continue + } + } + } + } +} + +/// Compares a with b in dictionary order (case insensitive) +#[inline] +fn sane_string_compare(a: &String, b: &String) -> Ordering { + let (mut charsa, mut charsb) = (a.chars(), b.chars()); + loop { + match (charsa.next(), charsb.next()) { + (None, None) => return Ordering::Equal, + (_, None) => return Ordering::Greater, + (None, _) => return Ordering::Less, + (Some(ca), Some(cb)) => { + let cmp = sane_char_compare(ca, cb); + if cmp != Ordering::Equal { + return cmp; + } else { + continue + } + } + } + } +}