From 2e616282fd6b0f9e8793912b81d54ab6757ff460 Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Wed, 27 Dec 2017 20:29:26 +0100
Subject: [PATCH] Now use import job everywhere, even for direct file imports

---
 api/funkwhale_api/music/factories.py          |   8 ++
 .../migrations/0017_auto_20171227_1728.py     |  28 +++++
 api/funkwhale_api/music/models.py             |  18 ++-
 api/funkwhale_api/music/tasks.py              |  77 +++++++++---
 .../management/commands/import_files.py       |  56 +++++++--
 .../providers/audiofile/tasks.py              |   4 +-
 api/tests/music/test_tasks.py                 | 113 +++++++++++++++++-
 api/tests/test_import_audio_file.py           |  72 ++++++-----
 dev.yml                                       |   2 +-
 9 files changed, 315 insertions(+), 63 deletions(-)
 create mode 100644 api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py

diff --git a/api/funkwhale_api/music/factories.py b/api/funkwhale_api/music/factories.py
index d776cd94..303e4522 100644
--- a/api/funkwhale_api/music/factories.py
+++ b/api/funkwhale_api/music/factories.py
@@ -72,6 +72,14 @@ class ImportJobFactory(factory.django.DjangoModelFactory):
         model = 'music.ImportJob'
 
 
+@registry.register(name='music.FileImportJob')
+class FileImportJobFactory(ImportJobFactory):
+    source = 'file://'
+    mbid = None
+    audio_file = factory.django.FileField(
+        from_path=os.path.join(SAMPLES_PATH, 'test.ogg'))
+
+
 @registry.register
 class WorkFactory(factory.django.DjangoModelFactory):
     mbid = factory.Faker('uuid4')
diff --git a/api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py b/api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py
new file mode 100644
index 00000000..dfca6643
--- /dev/null
+++ b/api/funkwhale_api/music/migrations/0017_auto_20171227_1728.py
@@ -0,0 +1,28 @@
+# Generated by Django 2.0 on 2017-12-27 17:28
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('music', '0016_trackfile_acoustid_track_id'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='importbatch',
+            name='source',
+            field=models.CharField(choices=[('api', 'api'), ('shell', 'shell')], default='api', max_length=30),
+        ),
+        migrations.AddField(
+            model_name='importjob',
+            name='audio_file',
+            field=models.FileField(blank=True, max_length=255, null=True, upload_to='imports/%Y/%m/%d'),
+        ),
+        migrations.AlterField(
+            model_name='importjob',
+            name='mbid',
+            field=models.UUIDField(blank=True, editable=False, null=True),
+        ),
+    ]
diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index f80a0c25..fdb88c33 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -384,9 +384,17 @@ class TrackFile(models.Model):
 
 
 class ImportBatch(models.Model):
+    IMPORT_BATCH_SOURCES = [
+        ('api', 'api'),
+        ('shell', 'shell')
+    ]
+    source = models.CharField(
+        max_length=30, default='api', choices=IMPORT_BATCH_SOURCES)
     creation_date = models.DateTimeField(default=timezone.now)
     submitted_by = models.ForeignKey(
-        'users.User', related_name='imports', on_delete=models.CASCADE)
+        'users.User',
+        related_name='imports',
+        on_delete=models.CASCADE)
 
     class Meta:
         ordering = ['-creation_date']
@@ -411,12 +419,16 @@ class ImportJob(models.Model):
         blank=True,
         on_delete=models.CASCADE)
     source = models.URLField()
-    mbid = models.UUIDField(editable=False)
+    mbid = models.UUIDField(editable=False, null=True, blank=True)
     STATUS_CHOICES = (
         ('pending', 'Pending'),
-        ('finished', 'finished'),
+        ('finished', 'Finished'),
+        ('errored', 'Errored'),
+        ('skipped', 'Skipped'),
     )
     status = models.CharField(choices=STATUS_CHOICES, default='pending', max_length=30)
+    audio_file = models.FileField(
+        upload_to='imports/%Y/%m/%d', max_length=255, null=True, blank=True)
 
     class Meta:
         ordering = ('id', )
diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py
index 64399106..009f0068 100644
--- a/api/funkwhale_api/music/tasks.py
+++ b/api/funkwhale_api/music/tasks.py
@@ -1,3 +1,5 @@
+from django.core.files.base import ContentFile
+
 from funkwhale_api.taskapp import celery
 from funkwhale_api.providers.acoustid import get_acoustid_client
 
@@ -20,29 +22,72 @@ def set_acoustid_on_track_file(track_file):
         return update(result['id'])
 
 
+def _do_import(import_job, replace):
+    from_file = bool(import_job.audio_file)
+    mbid = import_job.mbid
+    acoustid_track_id = None
+    duration = None
+    track = None
+    if not mbid and from_file:
+        # we try to deduce mbid from acoustid
+        client = get_acoustid_client()
+        match = client.get_best_match(import_job.audio_file.path)
+        if not match:
+            raise ValueError('Cannot get match')
+        duration = match['recordings'][0]['duration']
+        mbid = match['recordings'][0]['id']
+        acoustid_track_id = match['id']
+    if mbid:
+        track, _ = models.Track.get_or_create_from_api(mbid=mbid)
+    else:
+        track = import_track_data_from_path(import_job.audio_file.path)
+
+    track_file = None
+    if replace:
+        track_file = track.files.first()
+    elif track.files.count() > 0:
+        if import_job.audio_file:
+            import_job.audio_file.delete()
+        import_job.status = 'skipped'
+        import_job.save()
+        return
+
+    track_file = track_file or models.TrackFile(
+        track=track, source=import_job.source)
+    track_file.acoustid_track_id = acoustid_track_id
+    if from_file:
+        track_file.audio_file = ContentFile(import_job.audio_file.read())
+        track_file.audio_file.name = import_job.audio_file.name
+        track_file.duration = duration
+    else:
+        track_file.download_file()
+    track_file.save()
+    import_job.status = 'finished'
+    import_job.track_file = track_file
+    if import_job.audio_file:
+        # it's imported on the track, we don't need it anymore
+        import_job.audio_file.delete()
+    import_job.save()
+    return track.pk
+
+
 @celery.app.task(name='ImportJob.run', bind=True)
 @celery.require_instance(models.ImportJob, 'import_job')
 def import_job_run(self, import_job, replace=False):
-    try:
-        track, created = models.Track.get_or_create_from_api(mbid=import_job.mbid)
-        track_file = None
-        if replace:
-            track_file = track.files.first()
-        elif track.files.count() > 0:
-            return
-
-        track_file = track_file or models.TrackFile(
-            track=track, source=import_job.source)
-        track_file.download_file()
-        track_file.save()
-        import_job.status = 'finished'
-        import_job.track_file = track_file
+    def mark_errored():
+        import_job.status = 'errored'
         import_job.save()
-        return track.pk
 
+    try:
+        return _do_import(import_job, replace)
     except Exception as exc:
         if not settings.DEBUG:
-            raise import_job_run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
+            try:
+                self.retry(exc=exc, countdown=30, max_retries=3)
+            except:
+                mark_errored()
+                raise
+        mark_errored()
         raise
 
 
diff --git a/api/funkwhale_api/providers/audiofile/management/commands/import_files.py b/api/funkwhale_api/providers/audiofile/management/commands/import_files.py
index a4bfe555..4130e93f 100644
--- a/api/funkwhale_api/providers/audiofile/management/commands/import_files.py
+++ b/api/funkwhale_api/providers/audiofile/management/commands/import_files.py
@@ -1,6 +1,10 @@
 import glob
+import os
+
+from django.core.files import File
 from django.core.management.base import BaseCommand, CommandError
-from funkwhale_api.providers.audiofile import tasks
+from funkwhale_api.music import tasks
+from funkwhale_api.users.models import User
 
 
 class Command(BaseCommand):
@@ -15,6 +19,11 @@ class Command(BaseCommand):
             default=False,
             help='Will match the pattern recursively (including subdirectories)',
         )
+        parser.add_argument(
+            '--username',
+            dest='username',
+            help='The username of the user you want to be bound to the import',
+        )
         parser.add_argument(
             '--async',
             action='store_true',
@@ -46,6 +55,20 @@ class Command(BaseCommand):
         if not matching:
             raise CommandError('No file matching pattern, aborting')
 
+        user = None
+        if options['username']:
+            try:
+                user = User.objects.get(username=options['username'])
+            except User.DoesNotExist:
+                raise CommandError('Invalid username')
+        else:
+            # we bind the import to the first registered superuser
+            try:
+                user = User.objects.filter(is_superuser=True).order_by('pk').first()
+                assert user is not None
+            except AssertionError:
+                raise CommandError(
+                    'No superuser available, please provide a --username')
         if options['interactive']:
             message = (
                 'Are you sure you want to do this?\n\n'
@@ -54,18 +77,35 @@ class Command(BaseCommand):
             if input(''.join(message)) != 'yes':
                 raise CommandError("Import cancelled.")
 
+        batch = self.do_import(matching, user=user, options=options)
+
+        message = 'Successfully imported {} tracks'
+        if options['async']:
+            message = 'Successfully launched import for {} tracks'
+        self.stdout.write(message.format(len(matching)))
+        self.stdout.write(
+            "For details, please refer to import batch #".format(batch.pk))
+
+    def do_import(self, matching, user, options):
         message = 'Importing {}...'
         if options['async']:
             message = 'Launching import for {}...'
 
+        # we create an import batch binded to the user
+        batch = user.imports.create(source='shell')
+        async = options['async']
+        handler = tasks.import_job_run.delay if async else tasks.import_job_run
         for path in matching:
-            self.stdout.write(message.format(path))
+            job = batch.jobs.create(
+                source='file://' + path,
+            )
+            name = os.path.basename(path)
+            with open(path, 'rb') as f:
+                job.audio_file.save(name, File(f))
+
+            job.save()
             try:
-                tasks.from_path(path)
+                handler(import_job_id=job.pk)
             except Exception as e:
                 self.stdout.write('Error: {}'.format(e))
-
-        message = 'Successfully imported {} tracks'
-        if options['async']:
-            message = 'Successfully launched import for {} tracks'
-        self.stdout.write(message.format(len(matching)))
+        return batch
diff --git a/api/funkwhale_api/providers/audiofile/tasks.py b/api/funkwhale_api/providers/audiofile/tasks.py
index eec12922..bc18456c 100644
--- a/api/funkwhale_api/providers/audiofile/tasks.py
+++ b/api/funkwhale_api/providers/audiofile/tasks.py
@@ -8,7 +8,7 @@ from funkwhale_api.providers.acoustid import get_acoustid_client
 from funkwhale_api.music import models, metadata
 
 
-def import_metadata_without_musicbrainz(path):
+def import_track_data_from_path(path):
     data = metadata.Metadata(path)
     artist = models.Artist.objects.get_or_create(
         name__iexact=data.get('artist'),
@@ -53,7 +53,7 @@ def from_path(path):
         result = client.get_best_match(path)
         acoustid_track_id = result['id']
     except acoustid.WebServiceError:
-        track = import_metadata_without_musicbrainz(path)
+        track = import_track_data_from_path(path)
     except (TypeError, KeyError):
         track = import_metadata_without_musicbrainz(path)
     else:
diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py
index 6052fd34..873ca18f 100644
--- a/api/tests/music/test_tasks.py
+++ b/api/tests/music/test_tasks.py
@@ -1,7 +1,13 @@
-from funkwhale_api.providers.acoustid import get_acoustid_client
+import os
+import pytest
 
+from funkwhale_api.providers.acoustid import get_acoustid_client
 from funkwhale_api.music import tasks
 
+from . import data as api_data
+
+DATA_DIR = os.path.dirname(os.path.abspath(__file__))
+
 
 def test_set_acoustid_on_track_file(factories, mocker):
     track_file = factories['music.TrackFile'](acoustid_track_id=None)
@@ -40,3 +46,108 @@ def test_set_acoustid_on_track_file_required_high_score(factories, mocker):
     track_file.refresh_from_db()
 
     assert track_file.acoustid_track_id is None
+
+
+def test_import_job_can_run_with_file_and_acoustid(factories, mocker):
+    path = os.path.join(DATA_DIR, 'test.ogg')
+    mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
+    acoustid_payload = {
+        'results': [
+            {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
+             'recordings': [
+                {
+                 'duration': 268,
+                 'id': mbid}],
+            'score': 0.860825}],
+        'status': 'ok'
+    }
+    mocker.patch(
+        'funkwhale_api.musicbrainz.api.artists.get',
+        return_value=api_data.artists['get']['adhesive_wombat'])
+    mocker.patch(
+        'funkwhale_api.musicbrainz.api.releases.get',
+        return_value=api_data.albums['get']['marsupial'])
+    mocker.patch(
+        'funkwhale_api.musicbrainz.api.recordings.search',
+        return_value=api_data.tracks['search']['8bitadventures'])
+    mocker.patch('acoustid.match', return_value=acoustid_payload)
+
+    job = factories['music.FileImportJob'](audio_file__path=path)
+    f = job.audio_file
+    tasks.import_job_run(import_job_id=job.pk)
+    job.refresh_from_db()
+
+    track_file = job.track_file
+
+    with open(path, 'rb') as f:
+        assert track_file.audio_file.read() == f.read()
+    assert track_file.duration == 268
+    # audio file is deleted from import job once persisted to audio file
+    assert not job.audio_file
+    assert job.status == 'finished'
+    assert job.source == 'file://'
+
+
+def test_import_job_can_be_skipped(factories, mocker):
+    path = os.path.join(DATA_DIR, 'test.ogg')
+    mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
+    track_file = factories['music.TrackFile'](track__mbid=mbid)
+    acoustid_payload = {
+        'results': [
+            {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
+             'recordings': [
+                {
+                 'duration': 268,
+                 'id': mbid}],
+            'score': 0.860825}],
+        'status': 'ok'
+    }
+    mocker.patch(
+        'funkwhale_api.musicbrainz.api.artists.get',
+        return_value=api_data.artists['get']['adhesive_wombat'])
+    mocker.patch(
+        'funkwhale_api.musicbrainz.api.releases.get',
+        return_value=api_data.albums['get']['marsupial'])
+    mocker.patch(
+        'funkwhale_api.musicbrainz.api.recordings.search',
+        return_value=api_data.tracks['search']['8bitadventures'])
+    mocker.patch('acoustid.match', return_value=acoustid_payload)
+
+    job = factories['music.FileImportJob'](audio_file__path=path)
+    f = job.audio_file
+    tasks.import_job_run(import_job_id=job.pk)
+    job.refresh_from_db()
+
+    assert job.track_file is None
+    # audio file is deleted from import job once persisted to audio file
+    assert not job.audio_file
+    assert job.status == 'skipped'
+
+
+def test_import_job_can_be_errored(factories, mocker):
+    path = os.path.join(DATA_DIR, 'test.ogg')
+    mbid = '9968a9d6-8d92-4051-8f76-674e157b6eed'
+    track_file = factories['music.TrackFile'](track__mbid=mbid)
+    acoustid_payload = {
+        'results': [
+            {'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
+             'recordings': [
+                {
+                 'duration': 268,
+                 'id': mbid}],
+            'score': 0.860825}],
+        'status': 'ok'
+    }
+    class MyException(Exception):
+        pass
+    mocker.patch('acoustid.match', side_effect=MyException())
+
+    job = factories['music.FileImportJob'](
+        audio_file__path=path, track_file=None)
+
+    with pytest.raises(MyException):
+        tasks.import_job_run(import_job_id=job.pk)
+    job.refresh_from_db()
+
+    assert job.track_file is None
+    assert job.status == 'errored'
diff --git a/api/tests/test_import_audio_file.py b/api/tests/test_import_audio_file.py
index 26198686..e81e9b81 100644
--- a/api/tests/test_import_audio_file.py
+++ b/api/tests/test_import_audio_file.py
@@ -1,8 +1,10 @@
+import pytest
 import acoustid
 import datetime
 import os
+from django.core.management import call_command
+from django.core.management.base import CommandError
 
-from .music import data as api_data
 from funkwhale_api.providers.audiofile import tasks
 
 DATA_DIR = os.path.join(
@@ -11,35 +13,7 @@ DATA_DIR = os.path.join(
 )
 
 
-def test_import_file_with_acoustid(db, mocker, preferences):
-    mbid = api_data.tracks['get']['8bitadventures']['recording']['id']
-    payload = {
-        'results': [{
-            'id': 'e475bf79-c1ce-4441-bed7-1e33f226c0a2',
-            'recordings': [{'id': mbid}],
-            'score': 0.86
-        }]
-    }
-    path = os.path.join(DATA_DIR, 'dummy_file.ogg')
-    m = mocker.patch('acoustid.match', return_value=payload)
-    mocker.patch(
-        'funkwhale_api.musicbrainz.api.artists.get',
-        return_value=api_data.artists['get']['adhesive_wombat'])
-    mocker.patch(
-        'funkwhale_api.musicbrainz.api.releases.get',
-        return_value=api_data.albums['get']['marsupial'])
-    mocker.patch(
-        'funkwhale_api.musicbrainz.api.recordings.get',
-        return_value=api_data.tracks['get']['8bitadventures'])
-    track_file = tasks.from_path(path)
-    result = payload['results'][0]
-
-    assert track_file.acoustid_track_id == result['id']
-    assert track_file.track.mbid == result['recordings'][0]['id']
-    m.assert_called_once_with('', path, parse=False)
-
-
-def test_can_import_single_audio_file_without_acoustid(db, mocker):
+def test_can_create_track_from_file_metadata(db, mocker):
     mocker.patch('acoustid.match', side_effect=acoustid.WebServiceError('test'))
     metadata = {
         'artist': ['Test artist'],
@@ -56,8 +30,8 @@ def test_can_import_single_audio_file_without_acoustid(db, mocker):
         'funkwhale_api.music.metadata.Metadata.get_file_type',
         return_value='OggVorbis',
     )
-    track_file = tasks.from_path(os.path.join(DATA_DIR, 'dummy_file.ogg'))
-    track = track_file.track
+    track = tasks.import_track_data_from_path(
+        os.path.join(DATA_DIR, 'dummy_file.ogg'))
 
     assert track.title == metadata['title'][0]
     assert track.mbid == metadata['musicbrainz_trackid'][0]
@@ -67,3 +41,37 @@ def test_can_import_single_audio_file_without_acoustid(db, mocker):
     assert track.album.release_date == datetime.date(2012, 8, 15)
     assert track.artist.name == metadata['artist'][0]
     assert track.artist.mbid == metadata['musicbrainz_artistid'][0]
+
+
+def test_management_command_requires_a_valid_username(factories, mocker):
+    path = os.path.join(DATA_DIR, 'dummy_file.ogg')
+    user = factories['users.User'](username='me')
+    mocker.patch('funkwhale_api.providers.audiofile.management.commands.import_files.Command.do_import')  # NOQA
+    with pytest.raises(CommandError):
+        call_command('import_files', path, username='not_me', interactive=False)
+    call_command('import_files', path, username='me', interactive=False)
+
+
+def test_import_files_creates_a_batch_and_job(factories, mocker):
+    m = mocker.patch('funkwhale_api.music.tasks.import_job_run.delay')
+    user = factories['users.User'](username='me')
+    path = os.path.join(DATA_DIR, 'dummy_file.ogg')
+    call_command(
+        'import_files',
+        path,
+        username='me',
+        async=True,
+        interactive=False)
+
+    batch = user.imports.latest('id')
+    assert batch.source == 'shell'
+    assert batch.jobs.count() == 1
+
+    job = batch.jobs.first()
+
+    assert job.status == 'pending'
+    with open(path, 'rb') as f:
+        assert job.audio_file.read() == f.read()
+
+    assert job.source == 'file://' + path
+    m.assert_called_once_with(import_job_id=job.pk)
diff --git a/dev.yml b/dev.yml
index 77d288de..befc4b24 100644
--- a/dev.yml
+++ b/dev.yml
@@ -30,7 +30,7 @@ services:
     links:
      - postgres
      - redis
-    command: celery -A funkwhale_api.taskapp worker
+    command: celery -A funkwhale_api.taskapp worker -l debug
     environment:
       - "DJANGO_ALLOWED_HOSTS=localhost"
       - "DJANGO_SETTINGS_MODULE=config.settings.local"
-- 
GitLab