Mercurial > lbo > hg > scrapeprice
view src/extract.rs @ 3:6f4e48cd69b4
Start work on extract module
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Sat, 21 Mar 2020 17:00:04 +0100 |
parents | |
children | 768efcbf56a3 |
line wrap: on
line source
use crate::http; use log::info; use scraper::Html; pub struct Extract { html: Html, } pub fn parse_response(r: http::GetResponse) -> Extract { let content = http::bytes_to_str(r.body).unwrap(); let doc = Html::parse_document(content.as_str()); Extract { html: doc } } impl Extract { fn new(content: &str) -> Extract { Extract { html: Html::parse_document(content), } } pub fn get_field(&self, selector: &str) { let selector = scraper::Selector::parse(selector).unwrap(); let selected = self.html.select(&selector); for e in selected { println!("selected: {}", e.inner_html()); } } } #[cfg(test)] mod tests { use super::Extract; #[test] fn test_extract() { let content = String::from_utf8(std::fs::read("audiophil_sony.html").unwrap()).unwrap(); let ex = Extract::new(&content); ex.get_field(".bez.neu"); ex.get_field(".preis strong"); } }