Mercurial > lbo > hg > localmr
changeset 67:5bfe21949e22
Add option to do insensitive grouping in reduce phase
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Sat, 06 Feb 2016 17:07:36 +0000 |
parents | 9d7c1468f1b2 |
children | edbfc8c3ff56 |
files | src/parameters.rs |
diffstat | 1 files changed, 13 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/src/parameters.rs Sat Feb 06 00:08:38 2016 +0000 +++ b/src/parameters.rs Sat Feb 06 17:07:36 2016 +0000 @@ -10,7 +10,8 @@ pub map_partition_size: usize, - pub reduce_group_size: usize, + pub reduce_group_prealloc_size: usize, + pub reduce_group_insensitive: bool, // Internal parameters pub shard_id: usize, @@ -23,7 +24,8 @@ mappers: 4, reducers: 4, map_partition_size: 100 * 1024 * 1024, - reduce_group_size: 1, + reduce_group_prealloc_size: 1, + reduce_group_insensitive: false, shard_id: 0, } } @@ -62,12 +64,17 @@ self } - /// How big are the groups of keys in the reduce phase expected to be? (used for pre-allocating + /// prealloc_size: How big are the groups of keys in the reduce phase expected to be? (used for pre-allocating /// buffers) + /// Default 1. /// - /// Default 1. - pub fn set_reduce_group_size(mut self, size: usize) -> MRParameters { - self.reduce_group_size = size; + /// insensitive: Whether to group strings together that differ in case. + /// BUG: This will not work correctly until the map phase delivers outputs in the correct order, i.e. + /// dictionary order. The default Ord implementation for String treats lower and upper case + /// very differently. Default: false. + pub fn set_reduce_group_opts(mut self, prealloc_size: usize, insensitive: bool) -> MRParameters { + self.reduce_group_prealloc_size = prealloc_size; + self.reduce_group_insensitive = insensitive; self }