changeset 67:5bfe21949e22

Add option to do insensitive grouping in reduce phase
author Lewin Bormann <lbo@spheniscida.de>
date Sat, 06 Feb 2016 17:07:36 +0000
parents 9d7c1468f1b2
children edbfc8c3ff56
files src/parameters.rs
diffstat 1 files changed, 13 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/src/parameters.rs	Sat Feb 06 00:08:38 2016 +0000
+++ b/src/parameters.rs	Sat Feb 06 17:07:36 2016 +0000
@@ -10,7 +10,8 @@
 
     pub map_partition_size: usize,
 
-    pub reduce_group_size: usize,
+    pub reduce_group_prealloc_size: usize,
+    pub reduce_group_insensitive: bool,
 
     // Internal parameters
     pub shard_id: usize,
@@ -23,7 +24,8 @@
             mappers: 4,
             reducers: 4,
             map_partition_size: 100 * 1024 * 1024,
-            reduce_group_size: 1,
+            reduce_group_prealloc_size: 1,
+            reduce_group_insensitive: false,
             shard_id: 0,
         }
     }
@@ -62,12 +64,17 @@
         self
     }
 
-    /// How big are the groups of keys in the reduce phase expected to be? (used for pre-allocating
+    /// prealloc_size: How big are the groups of keys in the reduce phase expected to be? (used for pre-allocating
     /// buffers)
+    /// Default 1.
     ///
-    /// Default 1.
-    pub fn set_reduce_group_size(mut self, size: usize) -> MRParameters {
-        self.reduce_group_size = size;
+    /// insensitive: Whether to group strings together that differ in case.
+    /// BUG: This will not work correctly until the map phase delivers outputs in the correct order, i.e.
+    /// dictionary order. The default Ord implementation for String treats lower and upper case
+    /// very differently. Default: false.
+    pub fn set_reduce_group_opts(mut self, prealloc_size: usize, insensitive: bool) -> MRParameters {
+        self.reduce_group_prealloc_size = prealloc_size;
+        self.reduce_group_insensitive = insensitive;
         self
     }