Skip to content
Snippets Groups Projects
Verified Commit 5d2dbbc8 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Upgraded celery to 4.1, added endpoint logic for fingerprinting audio files

parent 4834b9e4
No related branches found
No related tags found
No related merge requests found
Showing
with 202 additions and 67 deletions
......@@ -6,8 +6,8 @@ ENV PYTHONUNBUFFERED 1
COPY ./requirements.apt /requirements.apt
RUN apt-get update -qq && grep "^[^#;]" requirements.apt | xargs apt-get install -y
RUN curl -L https://github.com/acoustid/chromaprint/releases/download/v1.4.2/chromaprint-fpcalc-1.4.2-linux-x86_64.tar.gz | tar -xz -C /usr/local/bin --strip 1
RUN fcalc yolofkjdssdhf
COPY ./requirements/base.txt /requirements/base.txt
RUN pip install -r /requirements/base.txt
COPY ./requirements/production.txt /requirements/production.txt
......
......@@ -47,7 +47,6 @@ THIRD_PARTY_APPS = (
'corsheaders',
'rest_framework',
'rest_framework.authtoken',
'djcelery',
'taggit',
'cachalot',
'rest_auth',
......@@ -68,6 +67,7 @@ LOCAL_APPS = (
'funkwhale_api.playlists',
'funkwhale_api.providers.audiofile',
'funkwhale_api.providers.youtube',
'funkwhale_api.providers.acoustid',
)
# See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
......@@ -266,14 +266,14 @@ CACHES["default"]["OPTIONS"] = {
########## CELERY
INSTALLED_APPS += ('funkwhale_api.taskapp.celery.CeleryConfig',)
BROKER_URL = env(
CELERY_BROKER_URL = env(
"CELERY_BROKER_URL", default=env('CACHE_URL', default=CACHE_DEFAULT))
########## END CELERY
# Location of root django.contrib.admin URL, use {% url 'admin:index' %}
ADMIN_URL = r'^admin/'
# Your common stuff: Below this line define 3rd party library settings
CELERY_DEFAULT_RATE_LIMIT = 1
CELERYD_TASK_TIME_LIMIT = 300
CELERY_TASK_DEFAULT_RATE_LIMIT = 1
CELERY_TASK_TIME_LIMIT = 300
import datetime
JWT_AUTH = {
'JWT_ALLOW_REFRESH': True,
......
......@@ -54,7 +54,7 @@ TEST_RUNNER = 'django.test.runner.DiscoverRunner'
########## CELERY
# In development, all tasks will be executed locally by blocking until the task returns
CELERY_ALWAYS_EAGER = False
CELERY_TASK_ALWAYS_EAGER = False
########## END CELERY
# Your local stuff: Below this line define 3rd party library settings
......
......@@ -23,7 +23,7 @@ CACHES = {
}
}
BROKER_URL = 'memory://'
CELERY_BROKER_URL = 'memory://'
# TESTING
# ------------------------------------------------------------------------------
......@@ -31,7 +31,7 @@ TEST_RUNNER = 'django.test.runner.DiscoverRunner'
########## CELERY
# In development, all tasks will be executed locally by blocking until the task returns
CELERY_ALWAYS_EAGER = True
CELERY_TASK_ALWAYS_EAGER = True
########## END CELERY
# Your local stuff: Below this line define 3rd party library settings
......
......@@ -7,6 +7,7 @@ ENV PYTHONDONTWRITEBYTECODE 1
COPY ./requirements.apt /requirements.apt
COPY ./install_os_dependencies.sh /install_os_dependencies.sh
RUN bash install_os_dependencies.sh install
RUN curl -L https://github.com/acoustid/chromaprint/releases/download/v1.4.2/chromaprint-fpcalc-1.4.2-linux-x86_64.tar.gz | tar -xz -C /usr/local/bin --strip 1
RUN mkdir /requirements
COPY ./requirements/base.txt /requirements/base.txt
......
# Generated by Django 2.0 on 2017-12-26 16:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('music', '0015_bind_track_file_to_import_job'),
]
operations = [
migrations.AddField(
model_name='trackfile',
name='acoustid_track_id',
field=models.UUIDField(blank=True, null=True),
),
]
......@@ -15,11 +15,9 @@ from django.utils import timezone
from taggit.managers import TaggableManager
from versatileimagefield.fields import VersatileImageField
from funkwhale_api.taskapp import celery
from funkwhale_api import downloader
from funkwhale_api import musicbrainz
from . import importers
from . import lyrics as lyrics_utils
class APIModelMixin(models.Model):
......@@ -255,14 +253,6 @@ class Lyrics(models.Model):
url = models.URLField(unique=True)
content = models.TextField(null=True, blank=True)
@celery.app.task(name='Lyrics.fetch_content', filter=celery.task_method)
def fetch_content(self):
html = lyrics_utils._get_html(self.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
self.content = cleaned_content
self.save()
@property
def content_rendered(self):
return markdown.markdown(
......@@ -362,6 +352,7 @@ class TrackFile(models.Model):
audio_file = models.FileField(upload_to='tracks/%Y/%m/%d', max_length=255)
source = models.URLField(null=True, blank=True)
duration = models.IntegerField(null=True, blank=True)
acoustid_track_id = models.UUIDField(null=True, blank=True)
def download_file(self):
# import the track file, since there is not any
......@@ -429,26 +420,3 @@ class ImportJob(models.Model):
class Meta:
ordering = ('id', )
@celery.app.task(name='ImportJob.run', filter=celery.task_method)
def run(self, replace=False):
try:
track, created = Track.get_or_create_from_api(mbid=self.mbid)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or TrackFile(
track=track, source=self.source)
track_file.download_file()
track_file.save()
self.status = 'finished'
self.track_file = track_file
self.save()
return track.pk
except Exception as exc:
if not settings.DEBUG:
raise ImportJob.run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
raise
from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client
from django.conf import settings
from . import models
from . import lyrics as lyrics_utils
@celery.app.task(name='acoustid.set_on_track_file')
@celery.require_instance(models.TrackFile, 'track_file')
def set_acoustid_on_track_file(track_file):
client = get_acoustid_client()
result = client.get_best_match(track_file.audio_file.path)
def update(id):
track_file.acoustid_track_id = id
track_file.save(update_fields=['acoustid_track_id'])
return id
if result:
return update(result['id'])
@celery.app.task(name='ImportJob.run', bind=True)
@celery.require_instance(models.ImportJob, 'import_job')
def import_job_run(self, import_job, replace=False):
try:
track, created = models.Track.get_or_create_from_api(mbid=import_job.mbid)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or models.TrackFile(
track=track, source=import_job.source)
track_file.download_file()
track_file.save()
import_job.status = 'finished'
import_job.track_file = track_file
import_job.save()
return track.pk
except Exception as exc:
if not settings.DEBUG:
raise import_job_run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
raise
@celery.app.task(name='Lyrics.fetch_content')
@celery.require_instance(models.Lyrics, 'lyrics')
def fetch_content(lyrics):
html = lyrics_utils._get_html(lyrics.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
lyrics.content = cleaned_content
lyrics.save(update_fields=['content'])
......@@ -22,6 +22,7 @@ from . import models
from . import serializers
from . import importers
from . import filters
from . import tasks
from . import utils
......@@ -129,7 +130,8 @@ class TrackViewSet(TagViewSetMixin, SearchMixin, viewsets.ReadOnlyModelViewSet):
lyrics = work.fetch_lyrics()
try:
if not lyrics.content:
lyrics.fetch_content()
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
except AttributeError:
return Response({'error': 'unavailable lyrics'}, status=404)
serializer = serializers.LyricsSerializer(lyrics)
......@@ -244,7 +246,7 @@ class SubmitViewSet(viewsets.ViewSet):
pass
batch = models.ImportBatch.objects.create(submitted_by=request.user)
job = models.ImportJob.objects.create(mbid=request.POST['mbid'], batch=batch, source=request.POST['import_url'])
job.run.delay()
tasks.import_job_run.delay(import_job_id=job.pk)
serializer = serializers.ImportBatchSerializer(batch)
return Response(serializer.data)
......@@ -272,7 +274,7 @@ class SubmitViewSet(viewsets.ViewSet):
models.TrackFile.objects.get(track__mbid=row['mbid'])
except models.TrackFile.DoesNotExist:
job = models.ImportJob.objects.create(mbid=row['mbid'], batch=batch, source=row['source'])
job.run.delay()
tasks.import_job_run.delay(import_job_id=job.pk)
serializer = serializers.ImportBatchSerializer(batch)
return serializer.data, batch
......
import acoustid
from dynamic_preferences.registries import global_preferences_registry
class Client(object):
def __init__(self, api_key):
self.api_key = api_key
def match(self, file_path):
return acoustid.match(self.api_key, file_path, parse=False)
def get_best_match(self, file_path):
results = self.match(file_path=file_path)
MIN_SCORE_FOR_MATCH = 0.8
try:
rows = results['results']
except KeyError:
return
for row in rows:
if row['score'] >= MIN_SCORE_FOR_MATCH:
return row
def get_acoustid_client():
manager = global_preferences_registry.manager()
return Client(api_key=manager['providers_acoustid__api_key'])
from dynamic_preferences.types import StringPreference, Section
from dynamic_preferences.registries import global_preferences_registry
acoustid = Section('providers_acoustid')
@global_preferences_registry.register
class APIKey(StringPreference):
section = acoustid
name = 'api_key'
default = ''
verbose_name = 'Acoustid API key'
help_text = 'The API key used to query AcoustID. Get one at https://acoustid.org/new-application.'
import acoustid
import os
import datetime
from django.core.files import File
from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client
from funkwhale_api.music import models, metadata
@celery.app.task(name='audiofile.from_path')
def from_path(path):
def import_metadata_without_musicbrainz(path):
data = metadata.Metadata(path)
artist = models.Artist.objects.get_or_create(
name__iexact=data.get('artist'),
defaults={
'name': data.get('artist'),
'mbid': data.get('musicbrainz_artistid', None),
},
)[0]
......@@ -39,11 +39,33 @@ def from_path(path):
'mbid': data.get('musicbrainz_recordingid', None),
},
)[0]
return track
def import_metadata_with_musicbrainz(path):
pass
@celery.app.task(name='audiofile.from_path')
def from_path(path):
acoustid_track_id = None
try:
client = get_acoustid_client()
result = client.get_best_match(path)
acoustid_track_id = result['id']
except acoustid.WebServiceError:
track = import_metadata_without_musicbrainz(path)
except (TypeError, KeyError):
track = import_metadata_without_musicbrainz(path)
else:
track, created = models.Track.get_or_create_from_api(
mbid=result['recordings'][0]['id']
)
if track.files.count() > 0:
raise ValueError('File already exists for track {}'.format(track.pk))
track_file = models.TrackFile(track=track)
track_file = models.TrackFile(
track=track, acoustid_track_id=acoustid_track_id)
track_file.audio_file.save(
os.path.basename(path),
File(open(path, 'rb'))
......
from __future__ import absolute_import
import os
import functools
from celery import Celery
from django.apps import AppConfig
from django.conf import settings
from celery.contrib.methods import task_method
if not settings.configured:
# set the default Django settings module for the 'celery' program.
......@@ -21,12 +23,20 @@ class CeleryConfig(AppConfig):
def ready(self):
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS, force=True)
@app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request)) # pragma: no cover
def require_instance(model_or_qs, parameter_name):
def decorator(function):
@functools.wraps(function)
def inner(*args, **kwargs):
pk = kwargs.pop('_'.join([parameter_name, 'id']))
try:
instance = model_or_qs.get(pk=pk)
except AttributeError:
instance = model_or_qs.objects.get(pk=pk)
kwargs[parameter_name] = instance
return function(*args, **kwargs)
return inner
return decorator
......@@ -79,4 +79,3 @@ case "$1" in
help) usage_message;;
*) wrong_command $1;;
esac
......@@ -7,3 +7,4 @@ libpq-dev
postgresql-client
libav-tools
python3-dev
curl
......@@ -24,7 +24,7 @@ django-redis>=4.5,<4.6
redis>=2.10,<2.11
celery>=3.1,<3.2
celery>=4.1,<4.2
# Your custom requirements go here
......@@ -33,7 +33,6 @@ musicbrainzngs==0.6
youtube_dl>=2017.12.14
djangorestframework>=3.7,<3.8
djangorestframework-jwt>=1.11,<1.12
django-celery>=3.2,<3.3
django-mptt>=0.9,<0.10
google-api-python-client>=1.6,<1.7
arrow>=0.12,<0.13
......@@ -57,3 +56,4 @@ git+https://github.com/EliotBerriot/PyMemoize.git@django
git+https://github.com/EliotBerriot/django-cachalot.git@django-2
django-dynamic-preferences>=1.5,<1.6
pyacoustid>=1.1.5,<1.2
import tempfile
import shutil
import pytest
from django.core.cache import cache as django_cache
from dynamic_preferences.registries import global_preferences_registry
from funkwhale_api.taskapp import celery
@pytest.fixture(scope="session", autouse=True)
......@@ -11,12 +15,23 @@ def factories_autodiscover():
factories.registry.autodiscover(app_names)
@pytest.fixture(autouse=True)
def cache():
yield django_cache
django_cache.clear()
@pytest.fixture
def factories(db):
from funkwhale_api import factories
yield factories.registry
@pytest.fixture
def preferences(db):
yield global_preferences_registry.manager()
@pytest.fixture
def tmpdir():
d = tempfile.mkdtemp()
......
......@@ -34,11 +34,11 @@ def test_can_submit_youtube_url_for_track_import(mocker, superuser_client):
assert track.album.title == 'Marsupial Madness'
def test_import_creates_an_import_with_correct_data(superuser_client, settings):
def test_import_creates_an_import_with_correct_data(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
video_id = 'tPEE9ZwTmy0'
url = reverse('api:v1:submit-single')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url,
{'import_url': 'https://www.youtube.com/watch?v={0}'.format(video_id),
......@@ -54,7 +54,8 @@ def test_import_creates_an_import_with_correct_data(superuser_client, settings):
assert job.source == 'https://www.youtube.com/watch?v={0}'.format(video_id)
def test_can_import_whole_album(mocker, superuser_client, settings):
def test_can_import_whole_album(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['soad'])
......@@ -82,7 +83,6 @@ def test_can_import_whole_album(mocker, superuser_client, settings):
]
}
url = reverse('api:v1:submit-album')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url, json.dumps(payload), content_type="application/json")
......@@ -109,7 +109,8 @@ def test_can_import_whole_album(mocker, superuser_client, settings):
assert job.source == row['source']
def test_can_import_whole_artist(mocker, superuser_client, settings):
def test_can_import_whole_artist(mocker, superuser_client):
mocker.patch('funkwhale_api.music.tasks.import_job_run')
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=api_data.artists['get']['soad'])
......@@ -142,7 +143,6 @@ def test_can_import_whole_artist(mocker, superuser_client, settings):
]
}
url = reverse('api:v1:submit-artist')
settings.CELERY_ALWAYS_EAGER = False
response = superuser_client.post(
url, json.dumps(payload), content_type="application/json")
......
......@@ -4,6 +4,7 @@ from django.urls import reverse
from funkwhale_api.music import models
from funkwhale_api.musicbrainz import api
from funkwhale_api.music import serializers
from funkwhale_api.music import tasks
from funkwhale_api.music import lyrics as lyrics_utils
from .mocking import lyricswiki
......@@ -18,7 +19,8 @@ def test_works_import_lyrics_if_any(mocker, factories):
lyrics = factories['music.Lyrics'](
url='http://lyrics.wikia.com/System_Of_A_Down:Chop_Suey!')
lyrics.fetch_content()
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
self.assertIn(
'Grab a brush and put on a little makeup',
lyrics.content,
......
......@@ -2,6 +2,7 @@ import pytest
from funkwhale_api.music import models
from funkwhale_api.music import importers
from funkwhale_api.music import tasks
def test_can_store_release_group_id_on_album(factories):
......@@ -44,6 +45,6 @@ def test_import_job_is_bound_to_track_file(factories, mocker):
job = factories['music.ImportJob'](mbid=track.mbid)
mocker.patch('funkwhale_api.music.models.TrackFile.download_file')
job.run()
tasks.import_job_run(import_job_id=job.pk)
job.refresh_from_db()
assert job.track_file.track == track
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment