From bc2c9950e375400ac5acee3180b9d0cfab8b90dc Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Sun, 6 May 2018 15:36:49 +0200
Subject: [PATCH] Fix #189: federation cache should now delete properly,
 including orphaned files

---
 api/funkwhale_api/federation/tasks.py | 44 +++++++++++++++++-----
 api/tests/federation/test_tasks.py    | 53 +++++++++++++++++++++------
 changes/changelog.d/189.bugfix        |  1 +
 3 files changed, 77 insertions(+), 21 deletions(-)
 create mode 100644 changes/changelog.d/189.bugfix

diff --git a/api/funkwhale_api/federation/tasks.py b/api/funkwhale_api/federation/tasks.py
index adc354c4..8f931b0e 100644
--- a/api/funkwhale_api/federation/tasks.py
+++ b/api/funkwhale_api/federation/tasks.py
@@ -1,8 +1,10 @@
 import datetime
 import json
 import logging
+import os
 
 from django.conf import settings
+from django.db.models import Q
 from django.utils import timezone
 
 from requests.exceptions import RequestException
@@ -96,16 +98,38 @@ def clean_music_cache():
     delay = preferences['federation__music_cache_duration']
     if delay < 1:
         return  # cache clearing disabled
+    limit = timezone.now() - datetime.timedelta(minutes=delay)
 
     candidates = models.LibraryTrack.objects.filter(
-        audio_file__isnull=False
-    ).values_list('local_track_file__track', flat=True)
-    listenings = Listening.objects.filter(
-        creation_date__gte=timezone.now() - datetime.timedelta(minutes=delay),
-        track__pk__in=candidates).values_list('track', flat=True)
-    too_old = set(candidates) - set(listenings)
-
-    to_remove = models.LibraryTrack.objects.filter(
-        local_track_file__track__pk__in=too_old).only('audio_file')
-    for lt in to_remove:
+        Q(audio_file__isnull=False) & (
+            Q(local_track_file__accessed_date__lt=limit) |
+            Q(local_track_file__accessed_date=None)
+        )
+    ).exclude(audio_file='').only('audio_file', 'id')
+    for lt in candidates:
         lt.audio_file.delete()
+
+    # we also delete orphaned files, if any
+    storage = models.LibraryTrack._meta.get_field('audio_file').storage
+    files = get_files(storage, 'federation_cache')
+    existing = models.LibraryTrack.objects.filter(audio_file__in=files)
+    missing = set(files) - set(existing.values_list('audio_file', flat=True))
+    for m in missing:
+        storage.delete(m)
+
+
+def get_files(storage, *parts):
+    """
+    This is a recursive function that return all files available
+    in a given directory using django's storage.
+    """
+    if not parts:
+        raise ValueError('Missing path')
+
+    dirs, files = storage.listdir(os.path.join(*parts))
+    for dir in dirs:
+        files += get_files(storage, *(list(parts) + [dir]))
+    return [
+        os.path.join(parts[-1], path)
+        for path in files
+    ]
diff --git a/api/tests/federation/test_tasks.py b/api/tests/federation/test_tasks.py
index 506fbc1f..3517e8fe 100644
--- a/api/tests/federation/test_tasks.py
+++ b/api/tests/federation/test_tasks.py
@@ -1,4 +1,7 @@
 import datetime
+import os
+import pathlib
+import pytest
 
 from django.core.paginator import Paginator
 from django.utils import timezone
@@ -117,17 +120,16 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories):
     lt1 = factories['federation.LibraryTrack'](with_audio_file=True)
     lt2 = factories['federation.LibraryTrack'](with_audio_file=True)
     lt3 = factories['federation.LibraryTrack'](with_audio_file=True)
-    tf1 = factories['music.TrackFile'](library_track=lt1)
-    tf2 = factories['music.TrackFile'](library_track=lt2)
-    tf3 = factories['music.TrackFile'](library_track=lt3)
-
-    # we listen to the first one, and the second one (but weeks ago)
-    listening1 = factories['history.Listening'](
-        track=tf1.track,
-        creation_date=timezone.now())
-    listening2 = factories['history.Listening'](
-        track=tf2.track,
-        creation_date=timezone.now() - datetime.timedelta(minutes=61))
+    tf1 = factories['music.TrackFile'](
+        accessed_date=timezone.now(), library_track=lt1)
+    tf2 = factories['music.TrackFile'](
+        accessed_date=timezone.now()-datetime.timedelta(minutes=61),
+        library_track=lt2)
+    tf3 = factories['music.TrackFile'](
+        accessed_date=None, library_track=lt3)
+    path1 = lt1.audio_file.path
+    path2 = lt2.audio_file.path
+    path3 = lt3.audio_file.path
 
     tasks.clean_music_cache()
 
@@ -138,3 +140,32 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories):
     assert bool(lt1.audio_file) is True
     assert bool(lt2.audio_file) is False
     assert bool(lt3.audio_file) is False
+    assert os.path.exists(path1) is True
+    assert os.path.exists(path2) is False
+    assert os.path.exists(path3) is False
+
+
+def test_clean_federation_music_cache_orphaned(
+        settings, preferences, factories):
+    preferences['federation__music_cache_duration'] = 60
+    path = os.path.join(settings.MEDIA_ROOT, 'federation_cache')
+    keep_path = os.path.join(os.path.join(path, '1a', 'b2'), 'keep.ogg')
+    remove_path = os.path.join(os.path.join(path, 'c3', 'd4'), 'remove.ogg')
+    os.makedirs(os.path.dirname(keep_path), exist_ok=True)
+    os.makedirs(os.path.dirname(remove_path), exist_ok=True)
+    pathlib.Path(keep_path).touch()
+    pathlib.Path(remove_path).touch()
+    lt = factories['federation.LibraryTrack'](
+        with_audio_file=True,
+        audio_file__path=keep_path)
+    tf = factories['music.TrackFile'](
+        library_track=lt,
+        accessed_date=timezone.now())
+
+    tasks.clean_music_cache()
+
+    lt.refresh_from_db()
+
+    assert bool(lt.audio_file) is True
+    assert os.path.exists(lt.audio_file.path) is True
+    assert os.path.exists(remove_path) is False
diff --git a/changes/changelog.d/189.bugfix b/changes/changelog.d/189.bugfix
new file mode 100644
index 00000000..076058e6
--- /dev/null
+++ b/changes/changelog.d/189.bugfix
@@ -0,0 +1 @@
+Federation cache suppression is now simpler and also deletes orphaned files (#189)
-- 
GitLab