Mercurial > lbo > hg > scrapeprice
view src/extract.rs @ 4:768efcbf56a3
Move error elsewhere and enhance Extract interface
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Sat, 21 Mar 2020 17:20:50 +0100 |
parents | 6f4e48cd69b4 |
children | cc875ec12026 |
line wrap: on
line source
use crate::err::{logic_err, HTTPError}; use crate::http; use log::info; use scraper::Html; pub struct Extract { html: Html, } pub fn parse_response(r: http::GetResponse) -> Extract { let content = http::bytes_to_str(r.body).unwrap(); let doc = Html::parse_document(content.as_str()); Extract { html: doc } } impl Extract { fn new(content: &str) -> Extract { Extract { html: Html::parse_document(content), } } pub fn get_fields(&self, selectors: &[&str]) -> Result<Vec<Vec<String>>, HTTPError> { let mut r = Vec::with_capacity(selectors.len()); for sel in selectors { let selector = scraper::Selector::parse(sel) .map_err(|_| HTTPError::LogicError(format!("failed to parse selector {}", sel)))?; let selected = self.html.select(&selector); let mut values = vec![]; for e in selected { println!("selected: {}", e.inner_html()); values.push(e.inner_html()); } r.push(values); } Ok(r) } pub fn get_field(&self, selector: &str) -> Result<Vec<String>, HTTPError> { let v = self.get_fields(&[selector])?; Ok(v[0].clone()) } } #[cfg(test)] mod tests { use super::Extract; use std::iter; #[test] fn test_extract() { let content = String::from_utf8(std::fs::read("audiophil_sony.html").unwrap()).unwrap(); let ex = Extract::new(&content); let mut data = ex.get_fields(&[".bez.neu", ".preis strong"]).unwrap(); let prices = data.pop().unwrap(); let descs = data.pop().unwrap(); let zipped: Vec<(String, String)> = descs.into_iter().zip(prices).map(|(desc, price)| { (desc.trim().to_string(), price.trim().to_string()) }).collect(); println!("{:?}", zipped); } }