changeset 6:173a94e88057

Handle videos correctly, and parse command line arguments.
author Lewin Bormann <lbo@spheniscida.de>
date Fri, 14 Jun 2019 21:30:58 +0200
parents 6fce3b59ae36
children 4ac3fcff5a81
files Pipfile Pipfile.lock photosync.py
diffstat 3 files changed, 169 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/Pipfile	Fri Jun 14 20:28:43 2019 +0200
+++ b/Pipfile	Fri Jun 14 21:30:58 2019 +0200
@@ -10,6 +10,10 @@
 google-auth-httplib2 = "*"
 google-auth-oauthlib = "*"
 python-dateutil = "*"
+arguments = "*"
+future = "*"
+pyyaml = "*"
+consoleprinter = "*"
 
 [requires]
 python_version = "3.7"
--- a/Pipfile.lock	Fri Jun 14 20:28:43 2019 +0200
+++ b/Pipfile.lock	Fri Jun 14 21:30:58 2019 +0200
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "92e1fcff9dc3bf8ed8a7de79e9fc8f419f16b9644ec2bf80975549f81c296044"
+            "sha256": "113add46aadeed9b44a7aa8ecedc1be15fbc5e9cd1853e037901f72f90ae286b"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -16,6 +16,13 @@
         ]
     },
     "default": {
+        "arguments": {
+            "hashes": [
+                "sha256:5de390ba2212c227f0b4f43db175b623db1d1a25d4fb001f4e20013827ba829c"
+            ],
+            "index": "pypi",
+            "version": "==76"
+        },
         "cachetools": {
             "hashes": [
                 "sha256:428266a1c0d36dc5aca63a2d7c5942e88c2c898d72139fca0e97fdd2380517ae",
@@ -37,6 +44,20 @@
             ],
             "version": "==3.0.4"
         },
+        "consoleprinter": {
+            "hashes": [
+                "sha256:a5a91a7b52cd459b38840d9558fa59b19eb94a860cbad5e12551ceaec8068a95"
+            ],
+            "index": "pypi",
+            "version": "==93"
+        },
+        "future": {
+            "hashes": [
+                "sha256:67045236dcfd6816dc439556d009594abf643e5eb48992e36beac09c2ca659b8"
+            ],
+            "index": "pypi",
+            "version": "==0.17.1"
+        },
         "google-api-python-client": {
             "hashes": [
                 "sha256:048da0d68564380ee23b449e5a67d4666af1b3b536d2fb0a02cee1ad540fa5ec",
@@ -111,6 +132,23 @@
             "index": "pypi",
             "version": "==2.8.0"
         },
+        "pyyaml": {
+            "hashes": [
+                "sha256:57acc1d8533cbe51f6662a55434f0dbecfa2b9eaf115bede8f6fd00115a0c0d3",
+                "sha256:588c94b3d16b76cfed8e0be54932e5729cc185caffaa5a451e7ad2f7ed8b4043",
+                "sha256:68c8dd247f29f9a0d09375c9c6b8fdc64b60810ebf07ba4cdd64ceee3a58c7b7",
+                "sha256:70d9818f1c9cd5c48bb87804f2efc8692f1023dac7f1a1a5c61d454043c1d265",
+                "sha256:86a93cccd50f8c125286e637328ff4eef108400dd7089b46a7be3445eecfa391",
+                "sha256:a0f329125a926876f647c9fa0ef32801587a12328b4a3c741270464e3e4fa778",
+                "sha256:a3c252ab0fa1bb0d5a3f6449a4826732f3eb6c0270925548cac342bc9b22c225",
+                "sha256:b4bb4d3f5e232425e25dda21c070ce05168a786ac9eda43768ab7f3ac2770955",
+                "sha256:cd0618c5ba5bda5f4039b9398bb7fb6a317bb8298218c3de25c47c4740e4b95e",
+                "sha256:ceacb9e5f8474dcf45b940578591c7f3d960e82f926c707788a570b51ba59190",
+                "sha256:fe6a88094b64132c4bb3b631412e90032e8cfe9745a58370462240b8cb7553cd"
+            ],
+            "index": "pypi",
+            "version": "==5.1.1"
+        },
         "requests": {
             "hashes": [
                 "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
@@ -139,6 +177,18 @@
             ],
             "version": "==1.12.0"
         },
+        "terminaltables": {
+            "hashes": [
+                "sha256:f3eb0eb92e3833972ac36796293ca0906e998dc3be91fbe1f8615b331b853b81"
+            ],
+            "version": "==3.1.0"
+        },
+        "ujson": {
+            "hashes": [
+                "sha256:f66073e5506e91d204ab0c614a148d5aa938bdbf104751be66f8ad7a222f5f86"
+            ],
+            "version": "==1.35"
+        },
         "uritemplate": {
             "hashes": [
                 "sha256:01c69f4fe8ed503b2951bef85d996a9d22434d2431584b5b107b2981ff416fbd",
--- a/photosync.py	Fri Jun 14 20:28:43 2019 +0200
+++ b/photosync.py	Fri Jun 14 21:30:58 2019 +0200
@@ -1,4 +1,5 @@
 
+import arguments
 import datetime
 import dateutil.parser
 import json
@@ -103,14 +104,14 @@
             if pagetoken is None:
                 return
 
-    def download_photo(self, id, path):
-        """Download a photo and store it under its file name in the directory `path`.
+    def download_item(self, id, path):
+        """Download a item and store it under its file name in the directory `path`.
         """
-        photo = self._service.mediaItems().get(mediaItemId=id).execute()
-        rawurl = photo['baseUrl']
+        item = self._service.mediaItems().get(mediaItemId=id).execute()
+        rawurl = item['baseUrl']
         rawurl = '{url}=d'.format(url=rawurl)
         os.makedirs(path, exist_ok=True)
-        p = os.path.join(path, photo['filename'])
+        p = os.path.join(path, item['filename'])
         with open(p, 'wb') as f:
             f.write(self._http.request('GET', rawurl).data)
 
@@ -124,8 +125,9 @@
 
     def initdb(self):
         cur = self._db.cursor()
-        cur.execute('CREATE TABLE IF NOT EXISTS photos (id TEXT PRIMARY KEY, creationTime TEXT, path TEXT, filename TEXT, offline INTEGER)')
-        cur.execute('CREATE TABLE IF NOT EXISTS transactions (id TEXT, type TEXT, time INTEGER, path TEXT, filename TEXT)')
+        cur.execute('CREATE TABLE IF NOT EXISTS items (id TEXT PRIMARY KEY, creationTime TEXT, path TEXT, mimetype \
+                TEXT, filename TEXT, video INTEGER, offline INTEGER)')
+        cur.execute('CREATE TABLE IF NOT EXISTS transactions (id TEXT, type TEXT, time INTEGER)')
         cur.execute('CREATE TABLE IF NOT EXISTS oauth (id TEXT PRIMARY KEY, credentials BLOB)')
         self._db.commit()
 
@@ -149,102 +151,163 @@
                 return row[0]
             return None
 
-    def add_online_photo(self, media_item, path):
+    def add_online_item(self, media_item, path):
         with self._db as conn:
             cur = conn.cursor()
-            cur.execute('SELECT id FROM photos WHERE id = "{}"'.format(media_item['id']))
+            cur.execute('SELECT id FROM items WHERE id = "{}"'.format(media_item['id']))
             if cur.fetchone():
                 log('INFO', 'Photo already in store.')
                 cur.close()
                 return False
-            log('INFO', 'Inserting photo {}'.format(media_item['id']))
+            log('INFO', 'Inserting item {}'.format(media_item['id']))
             cur.close()
 
             creation_time = int(self._dtparse.isoparse(media_item['mediaMetadata']['creationTime']).timestamp())
-            conn.cursor().execute('INSERT INTO photos (id, creationTime, path, filename, offline) VALUES (?, ?, ?, ?, 0)', (media_item['id'], creation_time, path, media_item['filename']))
-            conn.commit()
-            return True
+            is_video = 1 if 'video' in media_item['mediaMetadata'] else 0
+            conn.cursor().execute('INSERT INTO items (id, creationTime, path, mimetype, filename, video, offline) VALUES (?, ?, ?, ?, ?, ?, 0)', (media_item['id'], creation_time, path, media_item['mimeType'], media_item['filename'], is_video))
+        self.record_transaction(media_item['id'], 'ADD')
+        return True
 
-    def get_not_downloaded_photos(self):
-        """Yield photos (as [id, path]) that are not yet present locally."""
+    def get_not_downloaded_items(self):
+        """Generate items (as [id, path, filename]) that are not yet present locally."""
         with self._db as conn:
             cur = conn.cursor()
-            cur.execute('SELECT id, path, filename FROM photos WHERE offline = 0 ORDER BY creationTime ASC')
+            cur.execute('SELECT id, path, filename FROM items WHERE offline = 0 ORDER BY creationTime ASC')
             while True:
                 row = cur.fetchone()
                 if not row:
                     break
                 yield row
 
-    def mark_photo_downloaded(self, id):
+    def mark_item_downloaded(self, id):
         with self._db as conn:
-            conn.cursor().execute('UPDATE photos SET offline = 1 WHERE id = ?', (id,))
+            conn.cursor().execute('UPDATE items SET offline = 1 WHERE id = ?', (id,))
+        self.record_transaction(id, 'DOWNLOAD')
 
-    def most_recent_creation_date(self):
+    def existing_items_range(self):
         with self._db as conn:
             cursor = conn.cursor()
-            cursor.execute('SELECT creationTime FROM photos ORDER BY creationTime DESC LIMIT 1')
-            row = cursor.fetchone()
-            cursor.close()
-            if row:
-                return datetime.datetime.fromtimestamp(int(row[0]))
-            return datetime.datetime.fromtimestamp(0)
+            cursor.execute('SELECT creationTime FROM items ORDER BY creationTime DESC LIMIT 1')
+            newest = cursor.fetchone()
+            cursor.execute('SELECT creationTime FROM items ORDER BY creationTime ASC LIMIT 1')
+            oldest = cursor.fetchone()
+
+            # Safe defaults that will lead to all items being selected
+            old_default = datetime.datetime.now()
+            new_default = datetime.datetime.fromtimestamp(0)
+            return (
+                datetime.datetime.fromtimestamp(int(oldest[0])) if oldest else old_default,
+                datetime.datetime.fromtimestamp(int(newest[0])) if newest else new_default
+            )
+
+    def record_transaction(self, id, typ):
+        """Record an event in the transaction log.
+
+        typ should be one of 'ADD', 'DOWNLOAD'.
+        """
+        with self._db as conn:
+            cursor = conn.cursor()
+            cursor.execute('INSERT INTO transactions (id, type, time) VALUES (?, ?, ?)', (id, typ, int(datetime.datetime.now().timestamp())))
 
 
 class Driver:
     """Coordinates synchronization.
 
-    1. Fetch photo metadata (list_library). This takes a long time on first try.
-    2. Check for photos not yet downloaded, download them.
+    1. Fetch item metadata (list_library). This takes a long time on first try.
+    2. Check for items not yet downloaded, download them.
     3. Start again.
     """
 
-    def __init__(self, db, photosservice, path_mapper=None):
+    def __init__(self, db, photosservice, root='', path_mapper=None):
+        self._root = root
         self._db = db
         self._svc = photosservice
         self._path_mapper = path_mapper if path_mapper else Driver.path_from_date
 
-    def fetch_metadata(self, date_range=(None, None), start_at_recent=False):
+    def fetch_metadata(self, date_range=(None, None), window_heuristic=False):
         """Fetch media metadata and write it to the database."""
+
+        # First, figure out which ranges we need to fetch.
+        ranges = [date_range]
         if not (date_range[0] or date_range[1]):
-            if start_at_recent:
-                date_range = (self._db.most_recent_creation_date(), datetime.datetime.now())
+            if window_heuristic:
+                (oldest, newest) = self._db.existing_items_range()
+                # Special case where no previous items exist.
+                if newest == datetime.datetime.fromtimestamp(0):
+                    ranges = [(datetime.datetime.fromtimestamp(0), datetime.datetime.now())]
+                else:
+                    # Fetch from the time before the oldest item and after the newest item.
+                    # This will fail if items are uploaded with a creation
+                    # date in between existing items.
+                    ranges = [
+                        (datetime.datetime.fromtimestamp(0), oldest),
+                        (newest, datetime.datetime.now())
+                    ]
+            else:
+                ranges = [(datetime.datetime.fromtimestamp(0), datetime.datetime.now())]
+
         log('INFO', 'Running starting for {}'.format(date_range))
 
-        for photo in self._svc.list_library(start=date_range[0], to=date_range[1]):
-            log('INFO', 'Fetched metadata for {}'.format(photo['filename']))
-            if self._db.add_online_photo(photo, self._path_mapper(photo)):
-                log('INFO', 'Added {} to DB'.format(photo['filename']))
+        for rng in ranges:
+            for item in self._svc.list_library(start=rng[0], to=rng[1]):
+                log('INFO', 'Fetched metadata for {}'.format(item['filename']))
+                if self._db.add_online_item(item, self._path_mapper(item)):
+                    log('INFO', 'Added {} to DB'.format(item['filename']))
         return True
 
-    def download_photos(self):
-        """Scans database for photos not yet downloaded and downloads them."""
-        for photo in self._db.get_not_downloaded_photos():
-            (id, path, filename) = photo
+    def download_items(self):
+        """Scans database for items not yet downloaded and downloads them."""
+        for item in self._db.get_not_downloaded_items():
+            (id, path, filename) = item
+            path = os.path.join(self._root, path)
             log('INFO', 'Downloading {fn} into {p}'.format(fn=filename, p=path))
-            self._svc.download_photo(id, path)
+            self._svc.download_item(id, path)
             log('INFO', 'Downloading {fn} successful'.format(fn=filename))
-            self._db.mark_photo_downloaded(id)
+            self._db.mark_item_downloaded(id)
 
-    def drive(self, date_range=(None, None), start_at_recent=True):
-        """First, download all metadata since most recently fetched photo.
+    def drive(self, date_range=(None, None), window_heuristic=True):
+        """First, download all metadata since most recently fetched item.
         Then, download content."""
         # This possibly takes a long time and it may be that the user aborts in
-        # between. It returns fast if most photos are already present locally.
-        if self.fetch_metadata(date_range, start_at_recent):
-            self.download_photos()
+        # between. It returns fast if most items are already present locally.
+        # window_heuristic asks the metadata fetching logic to only fetch
+        # items older than the oldest or newer than the newest item, which is
+        # what we want for updating the items library.
+        if self.fetch_metadata(date_range, window_heuristic):
+            self.download_items()
 
     def path_from_date(item):
-        """By default, map photos to year/month/day directory."""
+        """By default, map items to year/month/day directory."""
         dt = dateutil.parser.isoparser().isoparse(item['mediaMetadata']['creationTime']).date()
         return '{y}/{m:02d}/{d:02d}/'.format(y=dt.year, m=dt.month, d=dt.day)
 
 
+class Main(arguments.BaseArguments):
+    def __init__(self):
+        doc = '''
+        Download photos and videos from Google Photos.
+
+        Usage:
+            photosync.py [options]
+
+        Options:
+            -h --help       Show this screen
+            -d --dir=<dir>  Root directory; where to download photos and store the database.
+        '''
+        super(arguments.BaseArguments, self).__init__(doc=doc)
+        self.dir = self.dir if self.dir else '.'
+
+    def main(self):
+        print(self.dir)
+        db = DB(os.path.join(self.dir, 'sync.db'))
+        s = PhotosService(tokens=TokenSource(db=db))
+        d = Driver(db, s, root=self.dir)
+        d.drive(date_range=(datetime.datetime.fromtimestamp(0), datetime.datetime.now()))
+
+
 def main():
-    db = DB('photosync.db')
-    s = PhotosService(tokens=TokenSource(db=db))
-    d = Driver(db, s)
-    d.drive()
+    Main().main()
+
 
 if __name__ == '__main__':
     main()