changeset 91:f451bcd4cecd

Add option to keep temp files
author Lewin Bormann <lbo@spheniscida.de>
date Sun, 07 Feb 2016 16:12:59 +0000
parents 9853a14b08fb
children 0a6149c1c052
files src/controller.rs src/parameters.rs
diffstat 2 files changed, 26 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/src/controller.rs	Sun Feb 07 15:29:39 2016 +0000
+++ b/src/controller.rs	Sun Feb 07 16:12:59 2016 +0000
@@ -60,6 +60,7 @@
         };
         controller.run_map(inp);
         controller.run_reduce(out);
+        controller.clean_up();
     }
 
     fn map_runner(mr: MR, params: MRParameters, inp: LinkedList<Record>) {
@@ -141,4 +142,18 @@
             }
         });
     }
+
+    fn clean_up(&self) {
+        use std::fs;
+        use std::fmt;
+
+        if !self.params.keep_temp_files {
+            for mpart in 0..self.map_partitions_run {
+                for rshard in 0..self.params.reducers {
+                    let name = fmt::format(format_args!("{}{}.{}", self.params.map_output_location, mpart, rshard));
+                    let _ = fs::remove_file(name);
+                }
+            }
+        }
+    }
 }
--- a/src/parameters.rs	Sun Feb 07 15:29:39 2016 +0000
+++ b/src/parameters.rs	Sun Feb 07 16:12:59 2016 +0000
@@ -14,6 +14,7 @@
     pub reduce_group_insensitive: bool,
 
     pub map_output_location: String,
+    pub keep_temp_files: bool,
     pub reduce_output_shard_prefix: String,
 
     // Internal parameters
@@ -30,6 +31,7 @@
             reduce_group_prealloc_size: 1,
             reduce_group_insensitive: false,
             map_output_location: String::from("map_intermediate_"),
+            keep_temp_files: false,
             reduce_output_shard_prefix: String::from("output_"),
             shard_id: 0,
         }
@@ -104,6 +106,15 @@
         self
     }
 
+    /// If this is set to true, intermediate files, such as outputs from the map phase,
+    /// will be kept.
+    ///
+    /// Default: false
+    pub fn keep_temp_files(mut self, keep: bool) -> MRParameters {
+        self.keep_temp_files = keep;
+        self
+    }
+
     /// For internal use: Sets the ID of the executing data chunk (for file naming etc.)
     ///
     pub fn set_shard_id(mut self, n: usize) -> MRParameters {