Skip to content
Snippets Groups Projects
Commit 72c3c0fd authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Merge branch '189-cache-deletion' into 'develop'

Resolve "Federation cached tracks not cleaned by celery task"

Closes #189

See merge request funkwhale/funkwhale!183
parents 480b6d7f bc2c9950
No related branches found
No related tags found
No related merge requests found
import datetime
import json
import logging
import os
from django.conf import settings
from django.db.models import Q
from django.utils import timezone
from requests.exceptions import RequestException
......@@ -96,16 +98,38 @@ def clean_music_cache():
delay = preferences['federation__music_cache_duration']
if delay < 1:
return # cache clearing disabled
limit = timezone.now() - datetime.timedelta(minutes=delay)
candidates = models.LibraryTrack.objects.filter(
audio_file__isnull=False
).values_list('local_track_file__track', flat=True)
listenings = Listening.objects.filter(
creation_date__gte=timezone.now() - datetime.timedelta(minutes=delay),
track__pk__in=candidates).values_list('track', flat=True)
too_old = set(candidates) - set(listenings)
to_remove = models.LibraryTrack.objects.filter(
local_track_file__track__pk__in=too_old).only('audio_file')
for lt in to_remove:
Q(audio_file__isnull=False) & (
Q(local_track_file__accessed_date__lt=limit) |
Q(local_track_file__accessed_date=None)
)
).exclude(audio_file='').only('audio_file', 'id')
for lt in candidates:
lt.audio_file.delete()
# we also delete orphaned files, if any
storage = models.LibraryTrack._meta.get_field('audio_file').storage
files = get_files(storage, 'federation_cache')
existing = models.LibraryTrack.objects.filter(audio_file__in=files)
missing = set(files) - set(existing.values_list('audio_file', flat=True))
for m in missing:
storage.delete(m)
def get_files(storage, *parts):
"""
This is a recursive function that return all files available
in a given directory using django's storage.
"""
if not parts:
raise ValueError('Missing path')
dirs, files = storage.listdir(os.path.join(*parts))
for dir in dirs:
files += get_files(storage, *(list(parts) + [dir]))
return [
os.path.join(parts[-1], path)
for path in files
]
# Generated by Django 2.0.3 on 2018-05-06 12:47
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('music', '0025_auto_20180419_2023'),
]
operations = [
migrations.AddField(
model_name='trackfile',
name='accessed_date',
field=models.DateTimeField(blank=True, null=True),
),
]
......@@ -415,6 +415,7 @@ class TrackFile(models.Model):
source = models.URLField(null=True, blank=True, max_length=500)
creation_date = models.DateTimeField(default=timezone.now)
modification_date = models.DateTimeField(auto_now=True)
accessed_date = models.DateTimeField(null=True, blank=True)
duration = models.IntegerField(null=True, blank=True)
acoustid_track_id = models.UUIDField(null=True, blank=True)
mimetype = models.CharField(null=True, blank=True, max_length=200)
......
......@@ -14,6 +14,7 @@ from django.db.models.functions import Length
from django.db.models import Count
from django.http import StreamingHttpResponse
from django.urls import reverse
from django.utils import timezone
from django.utils.decorators import method_decorator
from rest_framework import viewsets, views, mixins
......@@ -264,6 +265,10 @@ class TrackFileViewSet(viewsets.ReadOnlyModelViewSet):
except models.TrackFile.DoesNotExist:
return Response(status=404)
# we update the accessed_date
f.accessed_date = timezone.now()
f.save(update_fields=['accessed_date'])
mt = f.mimetype
audio_file = f.audio_file
try:
......
import datetime
import os
import pathlib
import pytest
from django.core.paginator import Paginator
from django.utils import timezone
......@@ -117,17 +120,16 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories):
lt1 = factories['federation.LibraryTrack'](with_audio_file=True)
lt2 = factories['federation.LibraryTrack'](with_audio_file=True)
lt3 = factories['federation.LibraryTrack'](with_audio_file=True)
tf1 = factories['music.TrackFile'](library_track=lt1)
tf2 = factories['music.TrackFile'](library_track=lt2)
tf3 = factories['music.TrackFile'](library_track=lt3)
# we listen to the first one, and the second one (but weeks ago)
listening1 = factories['history.Listening'](
track=tf1.track,
creation_date=timezone.now())
listening2 = factories['history.Listening'](
track=tf2.track,
creation_date=timezone.now() - datetime.timedelta(minutes=61))
tf1 = factories['music.TrackFile'](
accessed_date=timezone.now(), library_track=lt1)
tf2 = factories['music.TrackFile'](
accessed_date=timezone.now()-datetime.timedelta(minutes=61),
library_track=lt2)
tf3 = factories['music.TrackFile'](
accessed_date=None, library_track=lt3)
path1 = lt1.audio_file.path
path2 = lt2.audio_file.path
path3 = lt3.audio_file.path
tasks.clean_music_cache()
......@@ -138,3 +140,32 @@ def test_clean_federation_music_cache_if_no_listen(preferences, factories):
assert bool(lt1.audio_file) is True
assert bool(lt2.audio_file) is False
assert bool(lt3.audio_file) is False
assert os.path.exists(path1) is True
assert os.path.exists(path2) is False
assert os.path.exists(path3) is False
def test_clean_federation_music_cache_orphaned(
settings, preferences, factories):
preferences['federation__music_cache_duration'] = 60
path = os.path.join(settings.MEDIA_ROOT, 'federation_cache')
keep_path = os.path.join(os.path.join(path, '1a', 'b2'), 'keep.ogg')
remove_path = os.path.join(os.path.join(path, 'c3', 'd4'), 'remove.ogg')
os.makedirs(os.path.dirname(keep_path), exist_ok=True)
os.makedirs(os.path.dirname(remove_path), exist_ok=True)
pathlib.Path(keep_path).touch()
pathlib.Path(remove_path).touch()
lt = factories['federation.LibraryTrack'](
with_audio_file=True,
audio_file__path=keep_path)
tf = factories['music.TrackFile'](
library_track=lt,
accessed_date=timezone.now())
tasks.clean_music_cache()
lt.refresh_from_db()
assert bool(lt.audio_file) is True
assert os.path.exists(lt.audio_file.path) is True
assert os.path.exists(remove_path) is False
......@@ -2,6 +2,7 @@ import io
import pytest
from django.urls import reverse
from django.utils import timezone
from funkwhale_api.music import views
from funkwhale_api.federation import actors
......@@ -149,6 +150,19 @@ def test_can_proxy_remote_track(
assert library_track.audio_file.read() == b'test'
def test_serve_updates_access_date(factories, settings, api_client):
settings.PROTECT_AUDIO_FILES = False
track_file = factories['music.TrackFile']()
now = timezone.now()
assert track_file.accessed_date is None
response = api_client.get(track_file.path)
track_file.refresh_from_db()
assert response.status_code == 200
assert track_file.accessed_date > now
def test_can_create_import_from_federation_tracks(
factories, superuser_api_client, mocker):
lts = factories['federation.LibraryTrack'].create_batch(size=5)
......
Federation cache suppression is now simpler and also deletes orphaned files (#189)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment