Mercurial > lbo > hg > localmr
changeset 43:ab7d19e012e0
Move record types into separate module
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Mon, 01 Feb 2016 21:02:34 +0000 |
parents | 85e3fb6bf1b8 |
children | bbb3c4e61068 |
files | src/closure_mr.rs src/formats/util.rs src/lib.rs src/map.rs src/mapreducer.rs src/record_types.rs |
diffstat | 6 files changed, 98 insertions(+), 83 deletions(-) [+] |
line wrap: on
line diff
--- a/src/closure_mr.rs Mon Feb 01 20:54:42 2016 +0000 +++ b/src/closure_mr.rs Mon Feb 01 21:02:34 2016 +0000 @@ -1,7 +1,7 @@ //! A MapReducer that uses supplied map()/reduce() functions. -use mapreducer::{MEmitter, MapReducer, MapperF, MultiRecord, REmitter, Record, ReducerF, SharderF, - _std_shard}; +use mapreducer::{MapReducer, MapperF, ReducerF, SharderF, _std_shard}; +use record_types::{Record, MultiRecord, MEmitter, REmitter}; /// This type implements the MapReducer trait. You can use it to provide your own functions to a /// MapReduce process. If you need more flexibility, however, you may want to simply implement your
--- a/src/formats/util.rs Mon Feb 01 20:54:42 2016 +0000 +++ b/src/formats/util.rs Mon Feb 01 21:02:34 2016 +0000 @@ -1,7 +1,7 @@ //! Various iterators/adapters used for input/output formats. -use mapreducer::Record; +use record_types::Record; use std::fmt; use std::io;
--- a/src/lib.rs Mon Feb 01 20:54:42 2016 +0000 +++ b/src/lib.rs Mon Feb 01 21:02:34 2016 +0000 @@ -7,6 +7,7 @@ pub mod map; pub mod mapreducer; pub mod parameters; +pub mod record_types; pub mod shard_merge;
--- a/src/map.rs Mon Feb 01 20:54:42 2016 +0000 +++ b/src/map.rs Mon Feb 01 21:02:34 2016 +0000 @@ -6,7 +6,8 @@ use std::fmt; use std::io::Write; use std::collections::{LinkedList, BTreeMap}; -use mapreducer::{Record, MapReducer, MEmitter}; +use mapreducer::MapReducer; +use record_types::{Record, MEmitter}; use parameters::MRParameters; use formats::util::MRSinkGenerator; @@ -151,7 +152,7 @@ use formats::util::RecordIterator; use formats::lines::LinesSinkGenerator; use map::MapPartition; - use mapreducer::{MEmitter, REmitter, Record, MultiRecord}; + use record_types::{MEmitter, REmitter, Record, MultiRecord}; use parameters::MRParameters; use std::collections::LinkedList;
--- a/src/mapreducer.rs Mon Feb 01 20:54:42 2016 +0000 +++ b/src/mapreducer.rs Mon Feb 01 21:02:34 2016 +0000 @@ -1,86 +1,9 @@ //! The MapReducer trait and associated types. +use record_types::{REmitter, MEmitter, Record, MultiRecord}; use std::clone::Clone; -use std::cmp::{PartialOrd, Eq, Ordering}; -use std::collections::LinkedList; use std::hash::{Hasher, SipHasher}; -/// A (key,value) pair. -#[derive(Clone, PartialEq, Eq)] -pub struct Record { - pub key: String, - pub value: String, -} - -impl PartialOrd for Record { - fn partial_cmp(&self, other: &Record) -> Option<Ordering> { - match self.key.cmp(&other.key) { - Ordering::Equal => Some(self.value.cmp(&other.value)), - o => Some(o) - } - } -} - -/// A (key,[value]) pair; typicall used as input to a reducer function. -/// Can be easily iterated over, e.g. in a `for` loop. -pub struct MultiRecord { - key: String, - value: Box<Iterator<Item = String>>, -} - -impl MultiRecord { - /// Retrieves the key of the record. - pub fn key<'a>(&'a self) -> &'a String { - &self.key - } -} - -impl IntoIterator for MultiRecord { - type Item = String; - type IntoIter = Box<Iterator<Item=String>>; - /// Allows iterating over all the values. - fn into_iter(self) -> Self::IntoIter { - self.value - } -} - -/// Emitter type used in the mapper phase; used to emit (key,value) pairs. -pub struct MEmitter { - r: LinkedList<Record>, -} - -impl MEmitter { - pub fn new() -> MEmitter { - MEmitter { r: LinkedList::new() } - } - pub fn emit(&mut self, key: String, val: String) { - self.r.push_back(Record { - key: key, - value: val, - }) - } - pub fn _get(self) -> LinkedList<Record> { - self.r - } -} - -/// Emitter used in the reducer phase; used to emit values. -pub struct REmitter { - r: LinkedList<String>, -} - -impl REmitter { - pub fn new() -> REmitter { - REmitter { r: LinkedList::new() } - } - pub fn emit(&mut self, val: String) { - self.r.push_back(val) - } - pub fn _get(self) -> LinkedList<String> { - self.r - } -} - /// Default sharding function. pub fn _std_shard(n: usize, key: &String) -> usize { let mut h = SipHasher::new();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/record_types.rs Mon Feb 01 21:02:34 2016 +0000 @@ -0,0 +1,90 @@ +use std::cmp::{Eq, PartialEq, Ordering, PartialOrd}; +use std::collections::LinkedList; + +/// A (key,value) pair. +#[derive(Clone, PartialEq, Eq)] +pub struct Record { + pub key: String, + pub value: String, +} + +impl PartialOrd for Record { + fn partial_cmp(&self, other: &Record) -> Option<Ordering> { + match self.key.cmp(&other.key) { + Ordering::Equal => Some(self.value.cmp(&other.value)), + o => Some(o) + } + } +} + +/// A (key,[value]) pair; typicall used as input to a reducer function. +/// Can be easily iterated over, e.g. in a `for` loop. +pub struct MultiRecord { + key: String, + value: Box<Iterator<Item = String>>, +} + +impl MultiRecord { + /// Retrieves the key of the record. + pub fn key<'a>(&'a self) -> &'a String { + &self.key + } +} + +impl PartialEq for MultiRecord { + fn eq(&self, other: &MultiRecord) -> bool { + self.key == other.key + } +} + +impl PartialOrd for MultiRecord { + fn partial_cmp(&self, other: &MultiRecord) -> Option<Ordering> { + Some(self.key.cmp(&other.key)) + } +} + +impl IntoIterator for MultiRecord { + type Item = String; + type IntoIter = Box<Iterator<Item=String>>; + /// Allows iterating over all the values. + fn into_iter(self) -> Self::IntoIter { + self.value + } +} + +/// Emitter type used in the mapper phase; used to emit (key,value) pairs. +pub struct MEmitter { + r: LinkedList<Record>, +} + +impl MEmitter { + pub fn new() -> MEmitter { + MEmitter { r: LinkedList::new() } + } + pub fn emit(&mut self, key: String, val: String) { + self.r.push_back(Record { + key: key, + value: val, + }) + } + pub fn _get(self) -> LinkedList<Record> { + self.r + } +} + +/// Emitter used in the reducer phase; used to emit values. +pub struct REmitter { + r: LinkedList<String>, +} + +impl REmitter { + pub fn new() -> REmitter { + REmitter { r: LinkedList::new() } + } + pub fn emit(&mut self, val: String) { + self.r.push_back(val) + } + pub fn _get(self) -> LinkedList<String> { + self.r + } +}