Mercurial > lbo > hg > scrapeprice
changeset 18:233c28d6d968
Remove example_main and add some docs
author | Lewin Bormann <lbo@spheniscida.de> |
---|---|
date | Tue, 22 Sep 2020 13:29:39 +0200 |
parents | 10ef42e217cc |
children | b9af1d5065b4 |
files | src/example_main.rs src/extract.rs |
diffstat | 2 files changed, 4 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/src/example_main.rs Mon Sep 21 17:27:15 2020 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -mod driver; -mod err; -mod extract; -mod http; -mod implem; -mod util; - -use implem::audiophil as audiophil; - -use log::{info, warn}; -use env_logger; -use tokio; - -#[tokio::main] -async fn main() { - env_logger::Builder::from_default_env().filter(None, log::LevelFilter::Info).init(); - - info!("scrapeprice: init"); - //test_fetch_page().await.unwrap(); - - let logic = driver::DriverLogic { - explore: Box::new(audiophil::AudiophilExplorer::new()), - store: Box::new(util::DebuggingStorage {}), - extract: Box::new(audiophil::AudiophilItemPriceExtractor {}), - }; - let mut driver = driver::Driver::new(logic, None); - - let mut ival = tokio::time::interval(tokio::time::Duration::from_millis(2000)); - - loop { - ival.tick().await; - if let Err(e) = driver.drive().await { - warn!("Error from driver: {}", e); - } - } -}
--- a/src/extract.rs Mon Sep 21 17:27:15 2020 +0200 +++ b/src/extract.rs Tue Sep 22 13:29:39 2020 +0200 @@ -9,7 +9,7 @@ use scraper::Html; /// A fetched document is given to the Extractor which gets information from it and returns the -/// storable data. +/// storable data. The underlying logic is implemented by the `scraper` crate. pub struct Document { html: Html, } @@ -26,6 +26,7 @@ html: Html::parse_document(content), } } + /// For every CSS selector in `selectors`, return a vec of contents in that selector. pub fn get_contents(&self, selectors: &[&str]) -> Result<Vec<Vec<String>>, HTTPError> { let mut r = Vec::with_capacity(selectors.len()); for sel in selectors { @@ -40,10 +41,12 @@ } Ok(r) } + /// For a selector, return a vec of contents for the selector. pub fn get_content(&self, selector: &str) -> Result<Vec<String>, HTTPError> { let v = self.get_contents(&[selector])?; Ok(v[0].clone()) } + /// For the elements described by selector, return the attributes pub fn get_attr(&self, selector: &str, attr: &str) -> Result<Vec<String>, HTTPError> { let selector = parse_selector(selector)?; let sel = self.html.select(&selector);