From 2e616282fd6b0f9e8793912b81d54ab6757ff460 Mon Sep 17 00:00:00 2001 From: Eliot Berriot <contact@eliotberriot.com> Date: Wed, 27 Dec 2017 20:29:26 +0100 Subject: [PATCH] Now use import job everywhere, even for direct file imports --- api/funkwhale_api/music/factories.py | 8 ++ .../migrations/0017_auto_20171227_1728.py | 28 +++++ api/funkwhale_api/music/models.py | 18 ++- api/funkwhale_api/music/tasks.py | 77 +++++++++--- .../management/commands/import_files.py | 56 +++++++-- .../providers/audiofile/tasks.py | 4 +- api/tests/music/test_tasks.py | 113 +++++++++++++++++- api/tests/test_import_audio_file.py | 72 ++++++----- dev.yml | 2 +- 9 files changed, 315 insertions(+), 63 deletions(-) create mode 100644 api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py diff --git a/api/funkwhale_api/music/factories.py b/api/funkwhale_api/music/factories.py index d776cd94..303e4522 100644 --- a/api/funkwhale_api/music/factories.py +++ b/api/funkwhale_api/music/factories.py @@ -72,6 +72,14 @@ class ImportJobFactory(factory.django.DjangoModelFactory): model = 'music.ImportJob' +@registry.register(name='music.FileImportJob') +class FileImportJobFactory(ImportJobFactory): + source = 'file://' + mbid = None + audio_file = factory.django.FileField( + from_path=os.path.join(SAMPLES_PATH, 'test.ogg')) + + @registry.register class WorkFactory(factory.django.DjangoModelFactory): mbid = factory.Faker('uuid4') diff --git a/api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py b/api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py new file mode 100644 index 00000000..dfca6643 --- /dev/null +++ b/api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py @@ -0,0 +1,28 @@ +# Generated by Django 2.0 on 2017-12-27 17:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('music', '0016_trackfile_acoustid_track_id'), + ] + + operations = [ + migrations.AddField( + model_name='importbatch', + name='source', + field=models.CharField(choices=[('api', 'api'), ('shell', 'shell')], default='api', max_length=30), + ), + migrations.AddField( + model_name='importjob', + name='audio_file', + field=models.FileField(blank=True, max_length=255, null=True, upload_to='imports/%Y/%m/%d'), + ), + migrations.AlterField( + model_name='importjob', + name='mbid', + field=models.UUIDField(blank=True, editable=False, null=True), + ), + ] diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index f80a0c25..fdb88c33 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -384,9 +384,17 @@ class TrackFile(models.Model): class ImportBatch(models.Model): + IMPORT_BATCH_SOURCES = [ + ('api', 'api'), + ('shell', 'shell') + ] + source = models.CharField( + max_length=30, default='api', choices=IMPORT_BATCH_SOURCES) creation_date = models.DateTimeField(default=timezone.now) submitted_by = models.ForeignKey( - 'users.User', related_name='imports', on_delete=models.CASCADE) + 'users.User', + related_name='imports', + on_delete=models.CASCADE) class Meta: ordering = ['-creation_date'] @@ -411,12 +419,16 @@ class ImportJob(models.Model): blank=True, on_delete=models.CASCADE) source = models.URLField() - mbid = models.UUIDField(editable=False) + mbid = models.UUIDField(editable=False, null=True, blank=True) STATUS_CHOICES = ( ('pending', 'Pending'), - ('finished', 'finished'), + ('finished', 'Finished'), + ('errored', 'Errored'), + ('skipped', 'Skipped'), ) status = models.CharField(choices=STATUS_CHOICES, default='pending', max_length=30) + audio_file = models.FileField( + upload_to='imports/%Y/%m/%d', max_length=255, null=True, blank=True) class Meta: ordering = ('id', ) diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index 64399106..009f0068 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -1,3 +1,5 @@ +from django.core.files.base import ContentFile + from funkwhale_api.taskapp import celery from funkwhale_api.providers.acoustid import get_acoustid_client @@ -20,29 +22,72 @@ def set_acoustid_on_track_file(track_file): return update(result['id']) +def _do_import(import_job, replace): + from_file = bool(import_job.audio_file) + mbid = import_job.mbid + acoustid_track_id = None + duration = None + track = None + if not mbid and from_file: + # we try to deduce mbid from acoustid + client = get_acoustid_client() + match = client.get_best_match(import_job.audio_file.path) + if not match: + raise ValueError('Cannot get match') + duration = match['recordings'][0]['duration'] + mbid = match['recordings'][0]['id'] + acoustid_track_id = match['id'] + if mbid: + track, _ = models.Track.get_or_create_from_api(mbid=mbid) + else: + track = import_track_data_from_path(import_job.audio_file.path) + + track_file = None + if replace: + track_file = track.files.first() + elif track.files.count() > 0: + if import_job.audio_file: + import_job.audio_file.delete() + import_job.status = 'skipped' + import_job.save() + return + + track_file = track_file or models.TrackFile( + track=track, source=import_job.source) + track_file.acoustid_track_id = acoustid_track_id + if from_file: + track_file.audio_file = ContentFile(import_job.audio_file.read()) + track_file.audio_file.name = import_job.audio_file.name + track_file.duration = duration + else: + track_file.download_file() + track_file.save() + import_job.status = 'finished' + import_job.track_file = track_file + if import_job.audio_file: + # it's imported on the track, we don't need it anymore + import_job.audio_file.delete() + import_job.save() + return track.pk + + @celery.app.task(name='ImportJob.run', bind=True) @celery.require_instance(models.ImportJob, 'import_job') def import_job_run(self, import_job, replace=False): - try: - track, created = models.Track.get_or_create_from_api(mbid=import_job.mbid) - track_file = None - if replace: - track_file = track.files.first() - elif track.files.count() > 0: - return - - track_file = track_file or models.TrackFile( - track=track, source=import_job.source) - track_file.download_file() - track_file.save() - import_job.status = 'finished' - import_job.track_file = track_file + def mark_errored(): + import_job.status = 'errored' import_job.save() - return track.pk + try: + return _do_import(import_job, replace) except Exception as exc: if not settings.DEBUG: - raise import_job_run.retry(args=[self], exc=exc, countdown=30, max_retries=3) + try: + self.retry(exc=exc, countdown=30, max_retries=3) + except: + mark_errored() + raise + mark_errored() raise diff --git a/api/funkwhale_api/providers/audiofile/management/commands/import_files.py b/api/funkwhale_api/providers/audiofile/management/commands/import_files.py index a4bfe555..4130e93f 100644 --- a/api/funkwhale_api/providers/audiofile/management/commands/import_files.py +++ b/api/funkwhale_api/providers/audiofile/management/commands/import_files.py @@ -1,6 +1,10 @@ import glob +import os + +from django.core.files import File from django.core.management.base import BaseCommand, CommandError -from funkwhale_api.providers.audiofile import tasks +from funkwhale_api.music import tasks +from funkwhale_api.users.models import User class Command(BaseCommand): @@ -15,6 +19,11 @@ class Command(BaseCommand): default=False, help='Will match the pattern recursively (including subdirectories)', ) + parser.add_argument( + '--username', + dest='username', + help='The username of the user you want to be bound to the import', + ) parser.add_argument( '--async', action='store_true', @@ -46,6 +55,20 @@ class Command(BaseCommand): if not matching: raise CommandError('No file matching pattern, aborting') + user = None + if options['username']: + try: + user = User.objects.get(username=options['username']) + except User.DoesNotExist: + raise CommandError('Invalid username') + else: + # we bind the import to the first registered superuser + try: + user = User.objects.filter(is_superuser=True).order_by('pk').first() + assert user is not None + except AssertionError: + raise CommandError( + 'No superuser available, please provide a --username') if options['interactive']: message = ( 'Are you sure you want to do this?\n\n' @@ -54,18 +77,35 @@ class Command(BaseCommand): if input(''.join(message)) != 'yes': raise CommandError("Import cancelled.") + batch = self.do_import(matching, user=user, options=options) + + message = 'Successfully imported {} tracks' + if options['async']: + message = 'Successfully launched import for {} tracks' + self.stdout.write(message.format(len(matching))) + self.stdout.write( + "For details, please refer to import batch #".format(batch.pk)) + + def do_import(self, matching, user, options): message = 'Importing {}...' if options['async']: message = 'Launching import for {}...' + # we create an import batch binded to the user + batch = user.imports.create(source='shell') + async = options['async'] + handler = tasks.import_job_run.delay if async else tasks.import_job_run for path in matching: - self.stdout.write(message.format(path)) + job = batch.jobs.create( + source='file://' + path, + ) + name = os.path.basename(path) + with open(path, 'rb') as f: + job.audio_file.save(name, File(f)) + + job.save() try: - tasks.from_path(path) + handler(import_job_id=job.pk) except Exception as e: self.stdout.write('Error: {}'.format(e)) - - message = 'Successfully imported {} tracks' - if options['async']: - message = 'Successfully launched import for {} tracks' - self.stdout.write(message.format(len(matching))) + return batch diff --git a/api/funkwhale_api/providers/audiofile/tasks.py b/api/funkwhale_api/providers/audiofile/tasks.py index eec12922..bc18456c 100644 --- a/api/funkwhale_api/providers/audiofile/tasks.py +++ b/api/funkwhale_api/providers/audiofile/tasks.py @@ -8,7 +8,7 @@ from funkwhale_api.providers.acoustid import get_acoustid_client from funkwhale_api.music import models, metadata -def import_metadata_without_musicbrainz(path): +def import_track_data_from_path(path): data = metadata.Metadata(path) artist = models.Artist.objects.get_or_create( name__iexact=data.get('artist'), @@ -53,7 +53,7 @@ def from_path(path): result = client.get_best_match(path) acoustid_track_id = result['id'] except acoustid.WebServiceError: - track = import_metadata_without_musicbrainz(path) + track = import_track_data_from_path(path) except (TypeError, KeyError): track = import_metadata_without_musicbrainz(path) else: diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py index 6052fd34..873ca18f 100644 --- a/api/tests/music/test_tasks.py +++ b/api/tests/music/test_tasks.py @@ -1,7 +1,13 @@ -from funkwhale_api.providers.acoustid import get_acoustid_client +import os +import pytest +from funkwhale_api.providers.acoustid import get_acoustid_client from funkwhale_api.music import tasks +from . import data as api_data + +DATA_DIR = os.path.dirname(os.path.abspath(__file__)) + def test_set_acoustid_on_track_file(factories, mocker): track_file = factories['music.TrackFile'](acoustid_track_id=None) @@ -40,3 +46,108 @@ def test_set_acoustid_on_track_file_required_high_score(factories, mocker): track_file.refresh_from_db() assert track_file.acoustid_track_id is None + + +def test_import_job_can_run_with_file_and_acoustid(factories, mocker): + path = os.path.join(DATA_DIR, 'test.ogg') + mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed' + acoustid_payload = { + 'results': [ + {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2', + 'recordings': [ + { + 'duration': 268, + 'id': mbid}], + 'score': 0.860825}], + 'status': 'ok' + } + mocker.patch( + 'funkwhale_api.musicbrainz.api.artists.get', + return_value=api_data.artists['get']['adhesive_wombat']) + mocker.patch( + 'funkwhale_api.musicbrainz.api.releases.get', + return_value=api_data.albums['get']['marsupial']) + mocker.patch( + 'funkwhale_api.musicbrainz.api.recordings.search', + return_value=api_data.tracks['search']['8bitadventures']) + mocker.patch('acoustid.match', return_value=acoustid_payload) + + job = factories['music.FileImportJob'](audio_file__path=path) + f = job.audio_file + tasks.import_job_run(import_job_id=job.pk) + job.refresh_from_db() + + track_file = job.track_file + + with open(path, 'rb') as f: + assert track_file.audio_file.read() == f.read() + assert track_file.duration == 268 + # audio file is deleted from import job once persisted to audio file + assert not job.audio_file + assert job.status == 'finished' + assert job.source == 'file://' + + +def test_import_job_can_be_skipped(factories, mocker): + path = os.path.join(DATA_DIR, 'test.ogg') + mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed' + track_file = factories['music.TrackFile'](track__mbid=mbid) + acoustid_payload = { + 'results': [ + {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2', + 'recordings': [ + { + 'duration': 268, + 'id': mbid}], + 'score': 0.860825}], + 'status': 'ok' + } + mocker.patch( + 'funkwhale_api.musicbrainz.api.artists.get', + return_value=api_data.artists['get']['adhesive_wombat']) + mocker.patch( + 'funkwhale_api.musicbrainz.api.releases.get', + return_value=api_data.albums['get']['marsupial']) + mocker.patch( + 'funkwhale_api.musicbrainz.api.recordings.search', + return_value=api_data.tracks['search']['8bitadventures']) + mocker.patch('acoustid.match', return_value=acoustid_payload) + + job = factories['music.FileImportJob'](audio_file__path=path) + f = job.audio_file + tasks.import_job_run(import_job_id=job.pk) + job.refresh_from_db() + + assert job.track_file is None + # audio file is deleted from import job once persisted to audio file + assert not job.audio_file + assert job.status == 'skipped' + + +def test_import_job_can_be_errored(factories, mocker): + path = os.path.join(DATA_DIR, 'test.ogg') + mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed' + track_file = factories['music.TrackFile'](track__mbid=mbid) + acoustid_payload = { + 'results': [ + {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2', + 'recordings': [ + { + 'duration': 268, + 'id': mbid}], + 'score': 0.860825}], + 'status': 'ok' + } + class MyException(Exception): + pass + mocker.patch('acoustid.match', side_effect=MyException()) + + job = factories['music.FileImportJob']( + audio_file__path=path, track_file=None) + + with pytest.raises(MyException): + tasks.import_job_run(import_job_id=job.pk) + job.refresh_from_db() + + assert job.track_file is None + assert job.status == 'errored' diff --git a/api/tests/test_import_audio_file.py b/api/tests/test_import_audio_file.py index 26198686..e81e9b81 100644 --- a/api/tests/test_import_audio_file.py +++ b/api/tests/test_import_audio_file.py @@ -1,8 +1,10 @@ +import pytest import acoustid import datetime import os +from django.core.management import call_command +from django.core.management.base import CommandError -from .music import data as api_data from funkwhale_api.providers.audiofile import tasks DATA_DIR = os.path.join( @@ -11,35 +13,7 @@ DATA_DIR = os.path.join( ) -def test_import_file_with_acoustid(db, mocker, preferences): - mbid = api_data.tracks['get']['8bitadventures']['recording']['id'] - payload = { - 'results': [{ - 'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2', - 'recordings': [{'id': mbid}], - 'score': 0.86 - }] - } - path = os.path.join(DATA_DIR, 'dummy_file.ogg') - m = mocker.patch('acoustid.match', return_value=payload) - mocker.patch( - 'funkwhale_api.musicbrainz.api.artists.get', - return_value=api_data.artists['get']['adhesive_wombat']) - mocker.patch( - 'funkwhale_api.musicbrainz.api.releases.get', - return_value=api_data.albums['get']['marsupial']) - mocker.patch( - 'funkwhale_api.musicbrainz.api.recordings.get', - return_value=api_data.tracks['get']['8bitadventures']) - track_file = tasks.from_path(path) - result = payload['results'][0] - - assert track_file.acoustid_track_id == result['id'] - assert track_file.track.mbid == result['recordings'][0]['id'] - m.assert_called_once_with('', path, parse=False) - - -def test_can_import_single_audio_file_without_acoustid(db, mocker): +def test_can_create_track_from_file_metadata(db, mocker): mocker.patch('acoustid.match', side_effect=acoustid.WebServiceError('test')) metadata = { 'artist': ['Test artist'], @@ -56,8 +30,8 @@ def test_can_import_single_audio_file_without_acoustid(db, mocker): 'funkwhale_api.music.metadata.Metadata.get_file_type', return_value='OggVorbis', ) - track_file = tasks.from_path(os.path.join(DATA_DIR, 'dummy_file.ogg')) - track = track_file.track + track = tasks.import_track_data_from_path( + os.path.join(DATA_DIR, 'dummy_file.ogg')) assert track.title == metadata['title'][0] assert track.mbid == metadata['musicbrainz_trackid'][0] @@ -67,3 +41,37 @@ def test_can_import_single_audio_file_without_acoustid(db, mocker): assert track.album.release_date == datetime.date(2012, 8, 15) assert track.artist.name == metadata['artist'][0] assert track.artist.mbid == metadata['musicbrainz_artistid'][0] + + +def test_management_command_requires_a_valid_username(factories, mocker): + path = os.path.join(DATA_DIR, 'dummy_file.ogg') + user = factories['users.User'](username='me') + mocker.patch('funkwhale_api.providers.audiofile.management.commands.import_files.Command.do_import') # NOQA + with pytest.raises(CommandError): + call_command('import_files', path, username='not_me', interactive=False) + call_command('import_files', path, username='me', interactive=False) + + +def test_import_files_creates_a_batch_and_job(factories, mocker): + m = mocker.patch('funkwhale_api.music.tasks.import_job_run.delay') + user = factories['users.User'](username='me') + path = os.path.join(DATA_DIR, 'dummy_file.ogg') + call_command( + 'import_files', + path, + username='me', + async=True, + interactive=False) + + batch = user.imports.latest('id') + assert batch.source == 'shell' + assert batch.jobs.count() == 1 + + job = batch.jobs.first() + + assert job.status == 'pending' + with open(path, 'rb') as f: + assert job.audio_file.read() == f.read() + + assert job.source == 'file://' + path + m.assert_called_once_with(import_job_id=job.pk) diff --git a/dev.yml b/dev.yml index 77d288de..befc4b24 100644 --- a/dev.yml +++ b/dev.yml @@ -30,7 +30,7 @@ services: links: - postgres - redis - command: celery -A funkwhale_api.taskapp worker + command: celery -A funkwhale_api.taskapp worker -l debug environment: - "DJANGO_ALLOWED_HOSTS=localhost" - "DJANGO_SETTINGS_MODULE=config.settings.local" -- GitLab