changeset 4:768efcbf56a3

Move error elsewhere and enhance Extract interface
author Lewin Bormann <lbo@spheniscida.de>
date Sat, 21 Mar 2020 17:20:50 +0100
parents 6f4e48cd69b4
children cc875ec12026
files src/err.rs src/extract.rs src/http.rs src/main.rs
diffstat 4 files changed, 72 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/err.rs	Sat Mar 21 17:20:50 2020 +0100
@@ -0,0 +1,41 @@
+
+use std::fmt;
+use std::error::Error;
+
+pub fn logic_err(e: &dyn Error) -> HTTPError {
+    let s = format!("{}", e);
+    HTTPError::LogicError(s)
+}
+
+#[derive(Debug)]
+pub enum HTTPError {
+    HyperError(hyper::Error),
+    LogicError(String),
+    StatusError(hyper::StatusCode),
+    HttpError(http::Error),
+}
+
+impl fmt::Display for HTTPError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        let e;
+        match self {
+            HTTPError::HyperError(he) => e = format!("{}", he),
+            HTTPError::LogicError(s) => e = s.clone(),
+            HTTPError::StatusError(sc) => e = format!("{}", sc),
+            HTTPError::HttpError(he) => e = format!("{}", he),
+        }
+        write!(f, "HTTPError({})", e)?;
+        Ok(())
+    }
+}
+
+impl Error for HTTPError {
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        match self {
+            &HTTPError::HyperError(ref e) => Some(e),
+            &HTTPError::HttpError(ref e) => Some(e),
+            _ => None,
+        }
+    }
+}
+
--- a/src/extract.rs	Sat Mar 21 17:00:04 2020 +0100
+++ b/src/extract.rs	Sat Mar 21 17:20:50 2020 +0100
@@ -1,3 +1,4 @@
+use crate::err::{logic_err, HTTPError};
 use crate::http;
 
 use log::info;
@@ -19,12 +20,25 @@
             html: Html::parse_document(content),
         }
     }
-    pub fn get_field(&self, selector: &str) {
-        let selector = scraper::Selector::parse(selector).unwrap();
-        let selected = self.html.select(&selector);
-        for e in selected {
-            println!("selected: {}", e.inner_html());
+    pub fn get_fields(&self, selectors: &[&str]) -> Result<Vec<Vec<String>>, HTTPError> {
+        let mut r = Vec::with_capacity(selectors.len());
+        for sel in selectors {
+            let selector = scraper::Selector::parse(sel)
+                .map_err(|_| HTTPError::LogicError(format!("failed to parse selector {}", sel)))?;
+            let selected = self.html.select(&selector);
+
+            let mut values = vec![];
+            for e in selected {
+                println!("selected: {}", e.inner_html());
+                values.push(e.inner_html());
+            }
+            r.push(values);
         }
+        Ok(r)
+    }
+    pub fn get_field(&self, selector: &str) -> Result<Vec<String>, HTTPError> {
+        let v = self.get_fields(&[selector])?;
+        Ok(v[0].clone())
     }
 }
 
@@ -32,11 +46,18 @@
 mod tests {
     use super::Extract;
 
+    use std::iter;
+
     #[test]
     fn test_extract() {
         let content = String::from_utf8(std::fs::read("audiophil_sony.html").unwrap()).unwrap();
         let ex = Extract::new(&content);
-        ex.get_field(".bez.neu");
-        ex.get_field(".preis strong");
+        let mut data = ex.get_fields(&[".bez.neu", ".preis strong"]).unwrap();
+        let prices = data.pop().unwrap();
+        let descs = data.pop().unwrap();
+        let zipped: Vec<(String, String)> = descs.into_iter().zip(prices).map(|(desc, price)| {
+            (desc.trim().to_string(), price.trim().to_string())
+        }).collect();
+        println!("{:?}", zipped);
     }
 }
--- a/src/http.rs	Sat Mar 21 17:00:04 2020 +0100
+++ b/src/http.rs	Sat Mar 21 17:20:50 2020 +0100
@@ -1,7 +1,7 @@
+use crate::err::HTTPError;
+
 use std::collections::HashMap;
 use std::convert::{Into, TryFrom};
-use std::error::Error;
-use std::fmt;
 
 use http;
 use hyper;
@@ -23,38 +23,6 @@
     m.check_path(uri.path()) && m2.check_path(uri.path())
 }
 
-#[derive(Debug)]
-pub enum HTTPError {
-    HyperError(hyper::Error),
-    LogicError(String),
-    StatusError(hyper::StatusCode),
-    HttpError(http::Error),
-}
-
-impl fmt::Display for HTTPError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        let e;
-        match self {
-            HTTPError::HyperError(he) => e = format!("{}", he),
-            HTTPError::LogicError(s) => e = s.clone(),
-            HTTPError::StatusError(sc) => e = format!("{}", sc),
-            HTTPError::HttpError(he) => e = format!("{}", he),
-        }
-        write!(f, "HTTPError({})", e)?;
-        Ok(())
-    }
-}
-
-impl Error for HTTPError {
-    fn source(&self) -> Option<&(dyn Error + 'static)> {
-        match self {
-            &HTTPError::HyperError(ref e) => Some(e),
-            &HTTPError::HttpError(ref e) => Some(e),
-            _ => None,
-        }
-    }
-}
-
 pub struct HTTPS {
     client: HyperHTTPS,
     agent: String,
--- a/src/main.rs	Sat Mar 21 17:00:04 2020 +0100
+++ b/src/main.rs	Sat Mar 21 17:20:50 2020 +0100
@@ -1,3 +1,4 @@
+mod err;
 mod extract;
 mod http;