changeset 19:b9af1d5065b4

Make driver traits async
author Lewin Bormann <lbo@spheniscida.de>
date Tue, 22 Sep 2020 19:01:57 +0200
parents 233c28d6d968
children b16039ffcb17
files src/driver.rs src/http.rs
diffstat 2 files changed, 7 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/src/driver.rs	Tue Sep 22 13:29:39 2020 +0200
+++ b/src/driver.rs	Tue Sep 22 19:01:57 2020 +0200
@@ -18,16 +18,17 @@
 }
 
 /// Return Uris to explore, both as initial set and for every fetched page.
+#[async_trait::async_trait]
 pub trait Explorer {
     /// Return pages to fetch in any case, e.g. time-based. Called on every iteration of the
-    /// driver.
-    fn idle(&mut self) -> Vec<Uri>;
+    /// driver. All returned Uris are appended to the queue.
+    async fn idle(&mut self) -> Vec<Uri>;
     /// Return pages to fetch based on a fetched document.
-    fn next(&mut self, uri: &Uri, doc: &extract::Document) -> Vec<Uri>;
+    async fn next(&mut self, uri: &Uri, doc: &extract::Document) -> Vec<Uri>;
 }
 
 /// An Extractor retrieves information from a Document.
-pub trait Extractor<T> {
+pub trait Extractor<T: Send> {
     fn extract(&mut self, uri: &Uri, doc: &extract::Document) -> Vec<T> {
         vec![]
     }
@@ -58,7 +59,7 @@
     /// Run Driver a single step, i.e. first explore, then process one page. Returns true if a page
     /// was processed.
     pub async fn drive(&mut self) -> Result<bool, err::HTTPError> {
-        let new = self.logic.explore.idle();
+        let new = self.logic.explore.idle().await;
         info!("Appended URIs to queue: {:?}", new);
         self.queue.extend(new.into_iter());
 
@@ -68,7 +69,7 @@
             let doc = extract::parse_response(resp)?;
             let extracted = self.logic.extract.extract(&uri, &doc);
             self.logic.store.store(Box::new(extracted.into_iter()));
-            let next = self.logic.explore.next(&uri, &doc);
+            let next = self.logic.explore.next(&uri, &doc).await;
             info!("Appended URIs after fetch: {:?}", next);
             self.queue.extend(next);
             return Ok(true);
--- a/src/http.rs	Tue Sep 22 13:29:39 2020 +0200
+++ b/src/http.rs	Tue Sep 22 19:01:57 2020 +0200
@@ -1,4 +1,3 @@
-
 #![allow(unused)]
 
 use crate::err::HTTPError;