changeset 17:9e077bd7c541

Troubleshoot bad media item id situation.
author Lewin Bormann <lbo@spheniscida.de>
date Sun, 16 Jun 2019 01:53:06 +0200
parents 53e94b301d31
children fab76e6c2568
files README.md photosync.py
diffstat 2 files changed, 52 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Sat Jun 15 12:25:11 2019 +0200
+++ b/README.md	Sun Jun 16 01:53:06 2019 +0200
@@ -55,3 +55,15 @@
 
 Consult the help text printed by the last command. Usually you will need to set `--dir` so that your photos don't end up
 in the repository.
+
+## Troubleshooting
+
+* I have seen `Invalid media item ID.` errors for valid-looking media item IDs. This happened to a handful of photos,
+  all from the same day. The media item IDs all started with the same prefix which was different than the shared prefix of
+  all other media item IDs (all IDs from one account usually start with the same 4-6 characters). I'm not sure why the
+  API at one point returned those.
+  * To clean this up, remove the invalid IDs from the database (`sqlite3 sync.db "DELETE FROM items WHERE id LIKE
+    'wrongprefix%'"`) after checking that only a small number of items has this kind of ID (`sqlite3 sync.db "SELECT *
+    FROM items WHERE id LIKE 'wrongprefix%'"`).
+  * Re-fetch metadata for the affected days: `python3 photosync.py --dir=.../directory --all --dates=2012-12-12:2012-12-14`
+    (for example)
--- a/photosync.py	Sat Jun 15 12:25:11 2019 +0200
+++ b/photosync.py	Sun Jun 16 01:53:06 2019 +0200
@@ -73,6 +73,10 @@
         self._service = build('photoslibrary', 'v1', credentials=tokens.creds())
         self._http = httplib2.Http()
 
+    def get_item(self, id):
+        item = self._service.mediaItems().get(mediaItemId=id).execute()
+        return item
+
     def list_library(self, start=None, to=None):
         """Yields items from the library.
 
@@ -299,13 +303,18 @@
 
             if len(chunk) > chunksize:
                 ok = self._svc.download_items(chunk)
-                wantids = list(map(lambda i: i[0], chunk))
                 self._db.mark_items_downloaded(ok)
+                wantids = set(map(lambda i: i[0], chunk))
+                missing = wantids ^ set(ok)
+                for item in chunk:
+                    if item[0] in missing:
+                        retry.append(item)
                 chunk = []
-                retry.extend(set(wantids) ^ set(ok))
 
-        if len(chunk) + len(retry) > 0:
-            chunk.extend(retry)
+        chunk.extend(retry)
+        n = chunksize
+        smalls = [chunk[i:i + n] for i in range(0, len(chunk), n)]
+        for chunk in smalls:
             ok = self._svc.download_items(chunk)
             self._db.mark_items_downloaded(ok)
             if len(ok) < len(chunk):
@@ -354,7 +363,21 @@
 class Main(arguments.BaseArguments):
     def __init__(self):
         doc = '''
-        Download photos and videos from Google Photos.
+        Download photos and videos from Google Photos. Without any arguments, photosync will check for
+        new photos and download all photos that are marked as not yet downloaded as well as the new ones.
+
+        In general, photosync works like this:
+
+        * Download metadata for all items (initial run, or --all), or items in
+          a specified date range (--dates), or before the oldest and after the
+          newest item (default)
+            -> items are marked as "online", i.e. not yet downloaded.
+        * Check database for all items that are "online" and start download them.
+        * Exit.
+
+        This means that if you interrupt photosync during any phase of
+        synchronization, it will pick up afterwards without re-executing a lot
+        of work, as long as you don't use the --all option.
 
         Usage:
             photosync.py [options]
@@ -362,8 +385,10 @@
         Options:
             -h --help                   Show this screen
             -d --dir=<dir>              Root directory; where to download photos and store the database.
-            --all                       Synchronize *all* photos instead of just before the oldest/after the newest photo. Needed if you have uploaded photos somewhere in the middle.
+            --all                       Synchronize metadata for *all* photos instead of just before the oldest/after the newest photo. Needed if you have uploaded photos somewhere in the middle. Consider using --dates instead.
             --creds=clientsecret.json   Path to the client credentials JSON file. Defaults to
+            --dates=<dates>             Similar to --all, but only consider photos in the given date range: yyyy-mm-dd:yyyy-mm-dd
+            --query=<item id>           Query metadata for item and print on console.
             --resync                    Check local filesystem for files that should be downloaded but are not there (anymore).
         '''
         super(arguments.BaseArguments, self).__init__(doc=doc)
@@ -376,6 +401,9 @@
         s = PhotosService(tokens=TokenSource(db=db, clientsecret=self.creds))
         d = Driver(db, s, root=self.dir)
 
+        if self.query:
+            print(s.get_item(self.query))
+            return
         if self.resync:
             if d.find_vanished_items(self.dir):
                 d.download_items()
@@ -383,6 +411,12 @@
             return
         if self.all:
             d.drive(window_heuristic=False)
+        elif self.dates:
+            (a, b) = self.dates.split(':')
+            p = dateutil.parser.isoparser()
+            (a, b) = p.isoparse(a), p.isoparse(b)
+            window = (a, b)
+            d.drive(window_heuristic=False, date_range=window)
         else:
             d.drive(window_heuristic=True)