Verified Commit 5d2dbbc8 authored by Agate's avatar Agate 💬

Upgraded celery to 4.1, added endpoint logic for fingerprinting audio files

parent 4834b9e4
......@@ -6,8 +6,8 @@ ENV PYTHONUNBUFFERED 1
COPY ./requirements.apt /requirements.apt
RUN apt-get update -qq && grep "^[^#;]" requirements.apt | xargs apt-get install -y
RUN curl -L https://github.com/acoustid/chromaprint/releases/download/v1.4.2/chromaprint-fpcalc-1.4.2-linux-x86_64.tar.gz | tar -xz -C /usr/local/bin --strip 1
RUN fcalc yolofkjdssdhf
COPY ./requirements/base.txt /requirements/base.txt
RUN pip install -r /requirements/base.txt
COPY ./requirements/production.txt /requirements/production.txt
......
......@@ -47,7 +47,6 @@ THIRD_PARTY_APPS = (
'corsheaders',
'rest_framework',
'rest_framework.authtoken',
'djcelery',
'taggit',
'cachalot',
'rest_auth',
......@@ -68,6 +67,7 @@ LOCAL_APPS = (
'funkwhale_api.playlists',
'funkwhale_api.providers.audiofile',
'funkwhale_api.providers.youtube',
'funkwhale_api.providers.acoustid',
)
# See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
......@@ -266,14 +266,14 @@ CACHES["default"]["OPTIONS"] = {
########## CELERY
INSTALLED_APPS += ('funkwhale_api.taskapp.celery.CeleryConfig',)
BROKER_URL = env(
CELERY_BROKER_URL = env(
"CELERY_BROKER_URL", default=env('CACHE_URL', default=CACHE_DEFAULT))
########## END CELERY
# Location of root django.contrib.admin URL, use {% url 'admin:index' %}
ADMIN_URL = r'^admin/'
# Your common stuff: Below this line define 3rd party library settings
CELERY_DEFAULT_RATE_LIMIT = 1
CELERYD_TASK_TIME_LIMIT = 300
CELERY_TASK_DEFAULT_RATE_LIMIT = 1
CELERY_TASK_TIME_LIMIT = 300
import datetime
JWT_AUTH = {
'JWT_ALLOW_REFRESH': True,
......
......@@ -54,7 +54,7 @@ TEST_RUNNER = 'django.test.runner.DiscoverRunner'
########## CELERY
# In development, all tasks will be executed locally by blocking until the task returns
CELERY_ALWAYS_EAGER = False
CELERY_TASK_ALWAYS_EAGER = False
########## END CELERY
# Your local stuff: Below this line define 3rd party library settings
......
......@@ -23,7 +23,7 @@ CACHES = {
}
}
BROKER_URL = 'memory://'
CELERY_BROKER_URL = 'memory://'
# TESTING
# ------------------------------------------------------------------------------
......@@ -31,7 +31,7 @@ TEST_RUNNER = 'django.test.runner.DiscoverRunner'
########## CELERY
# In development, all tasks will be executed locally by blocking until the task returns
CELERY_ALWAYS_EAGER = True
CELERY_TASK_ALWAYS_EAGER = True
########## END CELERY
# Your local stuff: Below this line define 3rd party library settings
......
......@@ -7,6 +7,7 @@ ENV PYTHONDONTWRITEBYTECODE 1
COPY ./requirements.apt /requirements.apt
COPY ./install_os_dependencies.sh /install_os_dependencies.sh
RUN bash install_os_dependencies.sh install
RUN curl -L https://github.com/acoustid/chromaprint/releases/download/v1.4.2/chromaprint-fpcalc-1.4.2-linux-x86_64.tar.gz | tar -xz -C /usr/local/bin --strip 1
RUN mkdir /requirements
COPY ./requirements/base.txt /requirements/base.txt
......
# Generated by Django 2.0 on 2017-12-26 16:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('music', '0015_bind_track_file_to_import_job'),
]
operations = [
migrations.AddField(
model_name='trackfile',
name='acoustid_track_id',
field=models.UUIDField(blank=True, null=True),
),
]
......@@ -15,11 +15,9 @@ from django.utils import timezone
from taggit.managers import TaggableManager
from versatileimagefield.fields import VersatileImageField
from funkwhale_api.taskapp import celery
from funkwhale_api import downloader
from funkwhale_api import musicbrainz
from . import importers
from . import lyrics as lyrics_utils
class APIModelMixin(models.Model):
......@@ -255,14 +253,6 @@ class Lyrics(models.Model):
url = models.URLField(unique=True)
content = models.TextField(null=True, blank=True)
@celery.app.task(name='Lyrics.fetch_content', filter=celery.task_method)
def fetch_content(self):
html = lyrics_utils._get_html(self.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
self.content = cleaned_content
self.save()
@property
def content_rendered(self):
return markdown.markdown(
......@@ -362,6 +352,7 @@ class TrackFile(models.Model):
audio_file = models.FileField(upload_to='tracks/%Y/%m/%d', max_length=255)
source = models.URLField(null=True, blank=True)
duration = models.IntegerField(null=True, blank=True)
acoustid_track_id = models.UUIDField(null=True, blank=True)
def download_file(self):
# import the track file, since there is not any
......@@ -429,26 +420,3 @@ class ImportJob(models.Model):
class Meta:
ordering = ('id', )
@celery.app.task(name='ImportJob.run', filter=celery.task_method)
def run(self, replace=False):
try:
track, created = Track.get_or_create_from_api(mbid=self.mbid)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or TrackFile(
track=track, source=self.source)
track_file.download_file()
track_file.save()
self.status = 'finished'
self.track_file = track_file
self.save()
return track.pk
except Exception as exc:
if not settings.DEBUG:
raise ImportJob.run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
raise
from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client
from django.conf import settings
from . import models
from . import lyrics as lyrics_utils
@celery.app.task(name='acoustid.set_on_track_file')
@celery.require_instance(models.TrackFile, 'track_file')
def set_acoustid_on_track_file(track_file):
client = get_acoustid_client()
result = client.get_best_match(track_file.audio_file.path)
def update(id):
track_file.acoustid_track_id = id
track_file.save(update_fields=['acoustid_track_id'])
return id
if result:
return update(result['id'])
@celery.app.task(name='ImportJob.run', bind=True)
@celery.require_instance(models.ImportJob, 'import_job')
def import_job_run(self, import_job, replace=False):
try:
track, created = models.Track.get_or_create_from_api(mbid=import_job.mbid)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or models.TrackFile(
track=track, source=import_job.source)
track_file.download_file()
track_file.save()
import_job.status = 'finished'
import_job.track_file = track_file
import_job.save()
return track.pk
except Exception as exc:
if not settings.DEBUG:
raise import_job_run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
raise
@celery.app.task(name='Lyrics.fetch_content')
@celery.require_instance(models.Lyrics, 'lyrics')
def fetch_content(lyrics):
html = lyrics_utils._get_html(lyrics.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
lyrics.content = cleaned_content
lyrics.save(update_fields=['content'])
......@@ -22,6 +22,7 @@ from . import models
from . import serializers
from . import importers
from . import filters
from . import tasks
from . import utils
......@@ -129,7 +130,8 @@ class TrackViewSet(TagViewSetMixin, SearchMixin, viewsets.ReadOnlyModelViewSet):
lyrics = work.fetch_lyrics()
try:
if not lyrics.content:
lyrics.fetch_content()
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
except AttributeError:
return Response({'error': 'unavailable lyrics'}, status=404)
serializer = serializers.LyricsSerializer(lyrics)
......@@ -244,7 +246,7 @@ class SubmitViewSet(viewsets.ViewSet):
pass
batch = models.ImportBatch.objects.create(submitted_by=request.user)
job = models.ImportJob.objects.create(mbid=request.POST['mbid'], batch=batch, source=request.POST['import_url'])
job.run.delay()
tasks.import_job_run.delay(import_job_id=job.pk)
serializer = serializers.ImportBatchSerializer(batch)
return Response(serializer.data)
......@@ -272,7 +274,7 @@ class SubmitViewSet(viewsets.ViewSet):
models.TrackFile.objects.get(track__mbid=row['mbid'])
except models.TrackFile.DoesNotExist:
job = models.ImportJob.objects.create(mbid=row['mbid'], batch=batch, source=row['source'])
job.run.delay()
tasks.import_job_run.delay(import_job_id=job.pk)
serializer = serializers.ImportBatchSerializer(batch)
return serializer.data, batch
......
import acoustid
from dynamic_preferences.registries import global_preferences_registry
class Client(object):
def __init__(self, api_key):
self.api_key = api_key
def match(self, file_path):
return acoustid.match(self.api_key, file_path, parse=False)
def get_best_match(self, file_path):
results = self.match(file_path=file_path)
MIN_SCORE_FOR_MATCH = 0.8
try:
rows = results['results']
except KeyError:
return
for row in rows:
if row['score'] >= MIN_SCORE_FOR_MATCH:
return row
def get_acoustid_client():
manager = global_preferences_registry.manager()
return Client(api_key=manager['providers_acoustid__api_key'])
from dynamic_preferences.types import StringPreference, Section
from dynamic_preferences.registries import global_preferences_registry
acoustid = Section('providers_acoustid')
@global_preferences_registry.register
class APIKey(StringPreference):
section = acoustid
name = 'api_key'
default = ''
verbose_name = 'Acoustid API key'
help_text = 'The API key used to query AcoustID. Get one at https://acoustid.org/new-application.'
import acoustid
import os
import datetime
from django.core.files import File
from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client
from funkwhale_api.music import models, metadata
@celery.app.task(name='audiofile.from_path')
def from_path(path):
def import_metadata_without_musicbrainz(path):
data = metadata.Metadata(path)
artist = models.Artist.objects.get_or_create(
name__iexact=data.get('artist'),
defaults={
'name': data.get('artist'),
'mbid': data.get('musicbrainz_artistid', None),
},
)[0]
......@@ -39,11 +39,33 @@ def from_path(path):
'mbid': data.get('musicbrainz_recordingid', None),
},
)[0]
return track
def import_metadata_with_musicbrainz(path):
pass
@celery.app.task(name='audiofile.from_path')
def from_path(path):
acoustid_track_id = None
try:
client = get_acoustid_client()
result = client.get_best_match(path)
acoustid_track_id = result['id']
except acoustid.WebServiceError:
track = import_metadata_without_musicbrainz(path)
except (TypeError, KeyError):
track = import_metadata_without_musicbrainz(path)
else:
track, created = models.Track.get_or_create_from_api(
mbid=result['recordings'][0]['id']
)
if track.files.count() > 0:
raise ValueError('File already exists for track {}'.format(track.pk))
track_file = models.TrackFile(track=track)
track_file = models.TrackFile(
track=track, acoustid_track_id=acoustid_track_id)
track_file.audio_file.save(
os.path.basename(path),
File(open(path, 'rb'))
......
from __future__ import absolute_import
import os
import functools
from celery import Celery
from django.apps import AppConfig
from django.conf import settings
from celery.contrib.methods import task_method
if not settings.configured:
# set the default Django settings module for the 'celery' program.
......@@ -21,12 +23,20 @@ class CeleryConfig(AppConfig):
def ready(self):
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS, force=True)
@app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request)) # pragma: no cover
def require_instance(model_or_qs, parameter_name):
def decorator(function):
@functools.wraps(function)
def inner(*args, **kwargs):
pk = kwargs.pop('_'.join([parameter_name, 'id']))
try:
instance = model_or_qs.get(pk=pk)
except AttributeError:
instance = model_or_qs.objects.get(pk=pk)
kwargs[parameter_name] = instance
return function(*args, **kwargs)
return inner
return decorator
......@@ -79,4 +79,3 @@ case "$1" in
help) usage_message;;
*) wrong_command $1;;
esac
......@@ -7,3 +7,4 @@ libpq-dev
postgresql-client
libav-tools
python3-dev
curl
......@@ -24,7 +24,7 @@ django-redis>=4.5,<4.6
redis>=2.10,<2.11
celery>=3.1,<3.2
celery>=4.1,<4.2
# Your custom requirements go here
......@@ -33,7 +33,6 @@ musicbrainzngs==0.6
youtube_dl>=2017.12.14
djangorestframework>=3.7,<3.8
djangorestframework-jwt>=1.11,<1.12
django-celery>=3.2,<3.3
django-mptt>=0.9,<0.10
google-api-python-client>=1.6,<1.7
arrow>=0.12,<0.13
......@@ -57,3 +56,4 @@ git+https://github.com/EliotBerriot/PyMemoize.git@django
git+https://github.com/EliotBerriot/django-cachalot.git@django-2
django-dynamic-preferences>=1.5,<1.6
pyacoustid>=1.1.5,<1.2
import tempfile
import shutil
import pytest
from django.core.cache import cache as django_cache
from dynamic_preferences.registries import global_preferences_registry
from funkwhale_api.taskapp import celery
@pytest.fixture(scope="session", autouse=True)
......@@ -11,12 +15,23 @@ def factories_autodiscover():
factories.registry.autodiscover(app_names)
@pytest.fixture(autouse=True)
def cache():
yield django_cache
django_cache.clear()
@pytest.fixture
def factories(db):
from funkwhale_api import factories
yield factories.registry
@pytest.fixture
def preferences(db):
yield global_preferences_registry.manager()
@pytest.fixture
def tmpdir():
d = tempfile.mkdtemp()
......
......@@ -34,11 +34,11 @@ def test_can_submit_youtube_url_for_track_import(mocker, superuser_client):
assert track.album.title == 'Marsupial Madness'
def test_import_creates_an_import_with_correct_data(superuser_client, settings):
def test_import_creates_an_import_with_correct_data(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
video_id = 'tPEE9ZwTmy0'
url = reverse('api:v1:submit-single')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url,
{'import_url': 'https://www.youtube.com/watch?v={0}'.format(video_id),
......@@ -54,7 +54,8 @@ def test_import_creates_an_import_with_correct_data(superuser_client, settings):
assert job.source == 'https://www.youtube.com/watch?v={0}'.format(video_id)
def test_can_import_whole_album(mocker, superuser_client, settings):
def test_can_import_whole_album(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['soad'])
......@@ -82,7 +83,6 @@ def test_can_import_whole_album(mocker, superuser_client, settings):
]
}
url = reverse('api:v1:submit-album')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url, json.dumps(payload), content_type="application/json")
......@@ -109,7 +109,8 @@ def test_can_import_whole_album(mocker, superuser_client, settings):
assert job.source == row['source']
def test_can_import_whole_artist(mocker, superuser_client, settings):
def test_can_import_whole_artist(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['soad'])
......@@ -142,7 +143,6 @@ def test_can_import_whole_artist(mocker, superuser_client, settings):
]
}
url = reverse('api:v1:submit-artist')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url, json.dumps(payload), content_type="application/json")
......
......@@ -4,6 +4,7 @@ from django.urls import reverse
from funkwhale_api.music import models
from funkwhale_api.musicbrainz import api
from funkwhale_api.music import serializers
from funkwhale_api.music import tasks
from funkwhale_api.music import lyrics as lyrics_utils
from .mocking import lyricswiki
......@@ -18,7 +19,8 @@ def test_works_import_lyrics_if_any(mocker, factories):
lyrics = factories['music.Lyrics'](
url='http://lyrics.wikia.com/System_Of_A_Down:Chop_Suey!')
lyrics.fetch_content()
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
self.assertIn(
'Grab a brush and put on a little makeup',
lyrics.content,
......
......@@ -2,6 +2,7 @@ import pytest
from funkwhale_api.music import models
from funkwhale_api.music import importers
from funkwhale_api.music import tasks
def test_can_store_release_group_id_on_album(factories):
......@@ -44,6 +45,6 @@ def test_import_job_is_bound_to_track_file(factories, mocker):
job = factories['music.ImportJob'](mbid=track.mbid)
mocker.patch('funkwhale_api.music.models.TrackFile.download_file')
job.run()
tasks.import_job_run(import_job_id=job.pk)
job.refresh_from_db()
assert job.track_file.track == track
from funkwhale_api.providers.acoustid import get_acoustid_client
from funkwhale_api.music import tasks
def test_set_acoustid_on_track_file(factories, mocker):
track_file = factories['music.TrackFile'](acoustid_track_id=None)
id = 'e475bf79-c1ce-4441-bed7-1e33f226c0a2'
payload = {
'results': [
{'id': id,
'recordings': [
{'artists': [
{'id': '9c6bddde-6228-4d9f-ad0d-03f6fcb19e13',
'name': 'Binärpilot'}],
'duration': 268,
'id': 'f269d497-1cc0-4ae4-a0c4-157ec7d73fcb',
'title': 'Bend'}],
'score': 0.860825}],
'status': 'ok'
}
m = mocker.patch('acoustid.match', return_value=payload)
r = tasks.set_acoustid_on_track_file(track_file_id=track_file.pk)
track_file.refresh_from_db()
assert str(track_file.acoustid_track_id) == id
assert r == id
m.assert_called_once_with('', track_file.audio_file.path, parse=False)
def test_set_acoustid_on_track_file_required_high_score(factories, mocker):
track_file = factories['music.TrackFile'](acoustid_track_id=None)
id = 'e475bf79-c1ce-4441-bed7-1e33f226c0a2'
payload = {
'results': [{'score': 0.79}],
'status': 'ok'
}
m = mocker.patch('acoustid.match', return_value=payload)
r = tasks.set_acoustid_on_track_file(track_file_id=track_file.pk)
track_file.refresh_from_db()
assert track_file.acoustid_track_id is None
from funkwhale_api.providers.acoustid import get_acoustid_client
def test_client_is_configured_with_correct_api_key(preferences):
api_key = 'hello world'
preferences['providers_acoustid__api_key'] = api_key
client = get_acoustid_client()
assert client.api_key == api_key
def test_client_returns_raw_results(db, mocker, preferences):
api_key = 'test'
preferences['providers_acoustid__api_key'] = api_key
payload = {
'results': [
{'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
'recordings': [
{'artists': [
{'id': '9c6bddde-6228-4d9f-ad0d-03f6fcb19e13',
'name': 'Binärpilot'}],
'duration': 268,
'id': 'f269d497-1cc0-4ae4-a0c4-157ec7d73fcb',
'title': 'Bend'}],
'score': 0.860825}],
'status': 'ok'
}
m = mocker.patch('acoustid.match', return_value=payload)
client = get_acoustid_client()
response = client.match('/tmp/noopfile.mp3')
assert response == payload
m.assert_called_once_with('test', '/tmp/noopfile.mp3', parse=False)