From bbae4e323b31cc2c89e50212cde9c9c49233f09f Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Wed, 6 Jun 2018 21:45:38 +0200
Subject: [PATCH] Fix #288: Huge performance boost during CLI import that
 queries MusicBrainz

---
 api/funkwhale_api/music/models.py             | 35 ++++++++++-
 .../providers/audiofile/tasks.py              | 53 ++++++++++------
 api/tests/music/test_models.py                | 47 ++++++++++++++
 api/tests/test_import_audio_file.py           | 62 ++++++++++++++++---
 changes/changelog.d/288.enhancement           |  1 +
 5 files changed, 172 insertions(+), 26 deletions(-)
 create mode 100644 changes/changelog.d/288.enhancement

diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index 3c488fca1..bf3f9e12c 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -437,7 +437,40 @@ class Track(APIModelMixin):
             title__iexact=title,
             defaults=kwargs)
 
-
+    @classmethod
+    def get_or_create_from_release(cls, release_mbid, mbid):
+        release_mbid = str(release_mbid)
+        mbid = str(mbid)
+        try:
+            return cls.objects.get(mbid=mbid), False
+        except cls.DoesNotExist:
+            pass
+
+        album = Album.get_or_create_from_api(release_mbid)[0]
+        data = musicbrainz.client.api.releases.get(
+            str(album.mbid), includes=Album.api_includes)
+        tracks = [
+            t
+            for m in data['release']['medium-list']
+            for t in m['track-list']
+        ]
+        track_data = None
+        for track in tracks:
+            if track['recording']['id'] == mbid:
+                track_data = track
+                break
+        if not track_data:
+            raise ValueError('No track found matching this ID')
+
+        return cls.objects.update_or_create(
+            mbid=mbid,
+            defaults={
+                'position': int(track['position']),
+                'title': track['recording']['title'],
+                'album': album,
+                'artist': album.artist,
+            }
+        )
 class TrackFile(models.Model):
     uuid = models.UUIDField(
         unique=True, db_index=True, default=uuid.uuid4)
diff --git a/api/funkwhale_api/providers/audiofile/tasks.py b/api/funkwhale_api/providers/audiofile/tasks.py
index 40114c877..fb6306735 100644
--- a/api/funkwhale_api/providers/audiofile/tasks.py
+++ b/api/funkwhale_api/providers/audiofile/tasks.py
@@ -12,25 +12,43 @@ from funkwhale_api.music import models, metadata
 @transaction.atomic
 def import_track_data_from_path(path):
     data = metadata.Metadata(path)
-    artist = models.Artist.objects.get_or_create(
-        name__iexact=data.get('artist'),
-        defaults={
-            'name': data.get('artist'),
-            'mbid': data.get('musicbrainz_artistid', None),
-        },
-    )[0]
+    album = None
+    track_mbid = data.get('musicbrainz_recordingid', None)
+    album_mbid = data.get('musicbrainz_albumid', None)
 
-    release_date = data.get('date', default=None)
-    album = models.Album.objects.get_or_create(
-        title__iexact=data.get('album'),
-        artist=artist,
-        defaults={
-            'title': data.get('album'),
-            'release_date': release_date,
-            'mbid': data.get('musicbrainz_albumid', None),
-        },
-    )[0]
+    if album_mbid and track_mbid:
+        # to gain performance and avoid additional mb lookups,
+        # we import from the release data, which is already cached
+        return models.Track.get_or_create_from_release(
+            album_mbid, track_mbid)[0]
+    elif track_mbid:
+        return models.Track.get_or_create_from_api(track_mbid)[0]
+    elif album_mbid:
+        album = models.Album.get_or_create_from_api(album_mbid)[0]
 
+    artist = album.artist if album else None
+    artist_mbid = data.get('musicbrainz_artistid', None)
+    if not artist:
+        if artist_mbid:
+            artist = models.Artist.get_or_create_from_api(artist_mbid)[0]
+        else:
+            artist = models.Artist.objects.get_or_create(
+                name__iexact=data.get('artist'),
+                defaults={
+                    'name': data.get('artist'),
+                },
+            )[0]
+
+    release_date = data.get('date', default=None)
+    if not album:
+        album = models.Album.objects.get_or_create(
+            title__iexact=data.get('album'),
+            artist=artist,
+            defaults={
+                'title': data.get('album'),
+                'release_date': release_date,
+            },
+        )[0]
     position = data.get('track_number', default=None)
     track = models.Track.objects.get_or_create(
         title__iexact=data.get('title'),
@@ -38,7 +56,6 @@ def import_track_data_from_path(path):
         defaults={
             'title': data.get('title'),
             'position': position,
-            'mbid': data.get('musicbrainz_recordingid', None),
         },
     )[0]
     return track
diff --git a/api/tests/music/test_models.py b/api/tests/music/test_models.py
index feb68ea33..0ef54eb66 100644
--- a/api/tests/music/test_models.py
+++ b/api/tests/music/test_models.py
@@ -43,6 +43,53 @@ def test_import_album_stores_release_group(factories):
     assert album.artist == artist
 
 
+def test_import_track_from_release(factories, mocker):
+    album = factories['music.Album'](
+        mbid='430347cb-0879-3113-9fde-c75b658c298e')
+    album_data = {
+        'release': {
+            'id': album.mbid,
+            'title': 'Daydream Nation',
+            'status': 'Official',
+            'medium-count': 1,
+            'medium-list': [
+                {
+                    'position': '1',
+                    'format': 'CD',
+                    'track-list': [
+                        {
+                            'id': '03baca8b-855a-3c05-8f3d-d3235287d84d',
+                            'position': '4',
+                            'number': '4',
+                            'length': '417973',
+                            'recording': {
+                                'id': '2109e376-132b-40ad-b993-2bb6812e19d4',
+                                'title': 'Teen Age Riot',
+                                'length': '417973'},
+                            'track_or_recording_length': '417973'
+                        }
+                    ],
+                    'track-count': 1
+                }
+            ],
+        }
+    }
+    mocked_get = mocker.patch(
+        'funkwhale_api.musicbrainz.api.releases.get',
+        return_value=album_data)
+    track_data = album_data['release']['medium-list'][0]['track-list'][0]
+    track = models.Track.get_or_create_from_release(
+        '430347cb-0879-3113-9fde-c75b658c298e',
+        track_data['recording']['id'],
+    )[0]
+    mocked_get.assert_called_once_with(
+        album.mbid, includes=models.Album.api_includes)
+    assert track.title == track_data['recording']['title']
+    assert track.mbid == track_data['recording']['id']
+    assert track.album == album
+    assert track.artist == album.artist
+    assert track.position == int(track_data['position'])
+
 def test_import_job_is_bound_to_track_file(factories, mocker):
     track = factories['music.Track']()
     job = factories['music.ImportJob'](mbid=track.mbid)
diff --git a/api/tests/test_import_audio_file.py b/api/tests/test_import_audio_file.py
index de8186075..da3d1959c 100644
--- a/api/tests/test_import_audio_file.py
+++ b/api/tests/test_import_audio_file.py
@@ -15,16 +15,13 @@ DATA_DIR = os.path.join(
 )
 
 
-def test_can_create_track_from_file_metadata(db, mocker):
+def test_can_create_track_from_file_metadata_no_mbid(db, mocker):
     metadata = {
         'artist': ['Test artist'],
         'album': ['Test album'],
         'title': ['Test track'],
         'TRACKNUMBER': ['4'],
         'date': ['2012-08-15'],
-        'musicbrainz_albumid': ['a766da8b-8336-47aa-a3ee-371cc41ccc75'],
-        'musicbrainz_trackid': ['bd21ac48-46d8-4e78-925f-d9cc2a294656'],
-        'musicbrainz_artistid': ['013c8e5b-d72a-4cd3-8dee-6c64d6125823'],
     }
     m1 = mocker.patch('mutagen.File', return_value=metadata)
     m2 = mocker.patch(
@@ -35,13 +32,64 @@ def test_can_create_track_from_file_metadata(db, mocker):
         os.path.join(DATA_DIR, 'dummy_file.ogg'))
 
     assert track.title == metadata['title'][0]
-    assert track.mbid == uuid.UUID(metadata['musicbrainz_trackid'][0])
+    assert track.mbid is None
     assert track.position == 4
     assert track.album.title == metadata['album'][0]
-    assert track.album.mbid == uuid.UUID(metadata['musicbrainz_albumid'][0])
+    assert track.album.mbid is None
     assert track.album.release_date == datetime.date(2012, 8, 15)
     assert track.artist.name == metadata['artist'][0]
-    assert track.artist.mbid == uuid.UUID(metadata['musicbrainz_artistid'][0])
+    assert track.artist.mbid is None
+
+
+def test_can_create_track_from_file_metadata_mbid(factories, mocker):
+    album = factories['music.Album']()
+    mocker.patch(
+        'funkwhale_api.music.models.Album.get_or_create_from_api',
+        return_value=(album, True),
+    )
+
+    album_data = {
+        'release': {
+            'id': album.mbid,
+            'medium-list': [
+                {
+                    'track-list': [
+                        {
+                            'id': '03baca8b-855a-3c05-8f3d-d3235287d84d',
+                            'position': '4',
+                            'number': '4',
+                            'recording': {
+                                'id': '2109e376-132b-40ad-b993-2bb6812e19d4',
+                                'title': 'Teen Age Riot',
+                            },
+                        }
+                    ],
+                    'track-count': 1
+                }
+            ],
+        }
+    }
+    mocker.patch(
+        'funkwhale_api.musicbrainz.api.releases.get',
+        return_value=album_data)
+    track_data = album_data['release']['medium-list'][0]['track-list'][0]
+    metadata = {
+        'musicbrainz_albumid': [album.mbid],
+        'musicbrainz_trackid': [track_data['recording']['id']],
+    }
+    m1 = mocker.patch('mutagen.File', return_value=metadata)
+    m2 = mocker.patch(
+        'funkwhale_api.music.metadata.Metadata.get_file_type',
+        return_value='OggVorbis',
+    )
+    track = tasks.import_track_data_from_path(
+        os.path.join(DATA_DIR, 'dummy_file.ogg'))
+
+    assert track.title == track_data['recording']['title']
+    assert track.mbid == track_data['recording']['id']
+    assert track.position == 4
+    assert track.album == album
+    assert track.artist == album.artist
 
 
 def test_management_command_requires_a_valid_username(factories, mocker):
diff --git a/changes/changelog.d/288.enhancement b/changes/changelog.d/288.enhancement
new file mode 100644
index 000000000..7125bdb71
--- /dev/null
+++ b/changes/changelog.d/288.enhancement
@@ -0,0 +1 @@
+Huge performance boost (~x5 to x7) during CLI import that queries MusicBrainz (#288)
-- 
GitLab