diff --git a/api/funkwhale_api/federation/tasks.py b/api/funkwhale_api/federation/tasks.py index adc354c4fdaba7f1195e960246e6ff2f6161b107..8f931b0ed741ff13017475c1ac47411ff67e5df0 100644 --- a/api/funkwhale_api/federation/tasks.py +++ b/api/funkwhale_api/federation/tasks.py @@ -1,8 +1,10 @@ import datetime import json import logging +import os from django.conf import settings +from django.db.models import Q from django.utils import timezone from requests.exceptions import RequestException @@ -96,16 +98,38 @@ def clean_music_cache(): delay = preferences['federation__music_cache_duration'] if delay < 1: return # cache clearing disabled + limit = timezone.now() - datetime.timedelta(minutes=delay) candidates = models.LibraryTrack.objects.filter( - audio_file__isnull=False - ).values_list('local_track_file__track', flat=True) - listenings = Listening.objects.filter( - creation_date__gte=timezone.now() - datetime.timedelta(minutes=delay), - track__pk__in=candidates).values_list('track', flat=True) - too_old = set(candidates) - set(listenings) - - to_remove = models.LibraryTrack.objects.filter( - local_track_file__track__pk__in=too_old).only('audio_file') - for lt in to_remove: + Q(audio_file__isnull=False) & ( + Q(local_track_file__accessed_date__lt=limit) | + Q(local_track_file__accessed_date=None) + ) + ).exclude(audio_file='').only('audio_file', 'id') + for lt in candidates: lt.audio_file.delete() + + # we also delete orphaned files, if any + storage = models.LibraryTrack._meta.get_field('audio_file').storage + files = get_files(storage, 'federation_cache') + existing = models.LibraryTrack.objects.filter(audio_file__in=files) + missing = set(files) - set(existing.values_list('audio_file', flat=True)) + for m in missing: + storage.delete(m) + + +def get_files(storage, *parts): + """ + This is a recursive function that return all files available + in a given directory using django's storage. + """ + if not parts: + raise ValueError('Missing path') + + dirs, files = storage.listdir(os.path.join(*parts)) + for dir in dirs: + files += get_files(storage, *(list(parts) + [dir])) + return [ + os.path.join(parts[-1], path) + for path in files + ] diff --git a/api/funkwhale_api/music/migrations/0026_trackfile_accessed_date.py b/api/funkwhale_api/music/migrations/0026_trackfile_accessed_date.py new file mode 100644 index 0000000000000000000000000000000000000000..1d5327d93969e821a6fa3e20e55e56964fc1f841 --- /dev/null +++ b/api/funkwhale_api/music/migrations/0026_trackfile_accessed_date.py @@ -0,0 +1,18 @@ +# Generated by Django 2.0.3 on 2018-05-06 12:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('music', '0025_auto_20180419_2023'), + ] + + operations = [ + migrations.AddField( + model_name='trackfile', + name='accessed_date', + field=models.DateTimeField(blank=True, null=True), + ), + ] diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index 18f181e884aec2877c07f3b6dac533411d653e94..655d38755e1deba8e4bd3e99942c0ea7b586af5d 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -415,6 +415,7 @@ class TrackFile(models.Model): source = models.URLField(null=True, blank=True, max_length=500) creation_date = models.DateTimeField(default=timezone.now) modification_date = models.DateTimeField(auto_now=True) + accessed_date = models.DateTimeField(null=True, blank=True) duration = models.IntegerField(null=True, blank=True) acoustid_track_id = models.UUIDField(null=True, blank=True) mimetype = models.CharField(null=True, blank=True, max_length=200) diff --git a/api/funkwhale_api/music/views.py b/api/funkwhale_api/music/views.py index f53de1b0a73a2126091fc0998325d27ca958f8f7..76fc8bc3e7fb257962c3388cd904223ff0c5d9d3 100644 --- a/api/funkwhale_api/music/views.py +++ b/api/funkwhale_api/music/views.py @@ -14,6 +14,7 @@ from django.db.models.functions import Length from django.db.models import Count from django.http import StreamingHttpResponse from django.urls import reverse +from django.utils import timezone from django.utils.decorators import method_decorator from rest_framework import viewsets, views, mixins @@ -264,6 +265,10 @@ class TrackFileViewSet(viewsets.ReadOnlyModelViewSet): except models.TrackFile.DoesNotExist: return Response(status=404) + # we update the accessed_date + f.accessed_date = timezone.now() + f.save(update_fields=['accessed_date']) + mt = f.mimetype audio_file = f.audio_file try: diff --git a/api/tests/federation/test_tasks.py b/api/tests/federation/test_tasks.py index 506fbc1fe95be93320161e28f626f450f7b4000f..3517e8feb2c317f319192991dd09917a3070e553 100644 --- a/api/tests/federation/test_tasks.py +++ b/api/tests/federation/test_tasks.py @@ -1,4 +1,7 @@ import datetime +import os +import pathlib +import pytest from django.core.paginator import Paginator from django.utils import timezone @@ -117,17 +120,16 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories): lt1 = factories['federation.LibraryTrack'](with_audio_file=True) lt2 = factories['federation.LibraryTrack'](with_audio_file=True) lt3 = factories['federation.LibraryTrack'](with_audio_file=True) - tf1 = factories['music.TrackFile'](library_track=lt1) - tf2 = factories['music.TrackFile'](library_track=lt2) - tf3 = factories['music.TrackFile'](library_track=lt3) - - # we listen to the first one, and the second one (but weeks ago) - listening1 = factories['history.Listening']( - track=tf1.track, - creation_date=timezone.now()) - listening2 = factories['history.Listening']( - track=tf2.track, - creation_date=timezone.now() - datetime.timedelta(minutes=61)) + tf1 = factories['music.TrackFile']( + accessed_date=timezone.now(), library_track=lt1) + tf2 = factories['music.TrackFile']( + accessed_date=timezone.now()-datetime.timedelta(minutes=61), + library_track=lt2) + tf3 = factories['music.TrackFile']( + accessed_date=None, library_track=lt3) + path1 = lt1.audio_file.path + path2 = lt2.audio_file.path + path3 = lt3.audio_file.path tasks.clean_music_cache() @@ -138,3 +140,32 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories): assert bool(lt1.audio_file) is True assert bool(lt2.audio_file) is False assert bool(lt3.audio_file) is False + assert os.path.exists(path1) is True + assert os.path.exists(path2) is False + assert os.path.exists(path3) is False + + +def test_clean_federation_music_cache_orphaned( + settings, preferences, factories): + preferences['federation__music_cache_duration'] = 60 + path = os.path.join(settings.MEDIA_ROOT, 'federation_cache') + keep_path = os.path.join(os.path.join(path, '1a', 'b2'), 'keep.ogg') + remove_path = os.path.join(os.path.join(path, 'c3', 'd4'), 'remove.ogg') + os.makedirs(os.path.dirname(keep_path), exist_ok=True) + os.makedirs(os.path.dirname(remove_path), exist_ok=True) + pathlib.Path(keep_path).touch() + pathlib.Path(remove_path).touch() + lt = factories['federation.LibraryTrack']( + with_audio_file=True, + audio_file__path=keep_path) + tf = factories['music.TrackFile']( + library_track=lt, + accessed_date=timezone.now()) + + tasks.clean_music_cache() + + lt.refresh_from_db() + + assert bool(lt.audio_file) is True + assert os.path.exists(lt.audio_file.path) is True + assert os.path.exists(remove_path) is False diff --git a/api/tests/music/test_views.py b/api/tests/music/test_views.py index 2cdee4e8cd5199ce6772558a72ce38f50e4ba151..b22ab7fd504fe1afd54b713cec55d9912e9890d8 100644 --- a/api/tests/music/test_views.py +++ b/api/tests/music/test_views.py @@ -2,6 +2,7 @@ import io import pytest from django.urls import reverse +from django.utils import timezone from funkwhale_api.music import views from funkwhale_api.federation import actors @@ -149,6 +150,19 @@ def test_can_proxy_remote_track( assert library_track.audio_file.read() == b'test' +def test_serve_updates_access_date(factories, settings, api_client): + settings.PROTECT_AUDIO_FILES = False + track_file = factories['music.TrackFile']() + now = timezone.now() + assert track_file.accessed_date is None + + response = api_client.get(track_file.path) + track_file.refresh_from_db() + + assert response.status_code == 200 + assert track_file.accessed_date > now + + def test_can_create_import_from_federation_tracks( factories, superuser_api_client, mocker): lts = factories['federation.LibraryTrack'].create_batch(size=5) diff --git a/changes/changelog.d/189.bugfix b/changes/changelog.d/189.bugfix new file mode 100644 index 0000000000000000000000000000000000000000..076058e636736b4bb9344ce566a31943c2c1a0bf --- /dev/null +++ b/changes/changelog.d/189.bugfix @@ -0,0 +1 @@ +Federation cache suppression is now simpler and also deletes orphaned files (#189)