From 61cf04b3764522ae24cf0104e7ac7965687c5a4f Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Wed, 29 Jan 2020 15:33:50 +0100
Subject: [PATCH] Fix #348, #474, #557, #740, #928: improved deduplication
 logic to prevent skipped files during import

---
 .../migrations/0050_auto_20200129_1344.py     |  18 +++
 api/funkwhale_api/music/models.py             |   1 +
 api/funkwhale_api/music/tasks.py              |  13 +-
 api/tests/music/test_tasks.py                 | 131 ++++++++++++++++++
 changes/changelog.d/348.bugfix                |   1 +
 5 files changed, 162 insertions(+), 2 deletions(-)
 create mode 100644 api/funkwhale_api/music/migrations/0050_auto_20200129_1344.py
 create mode 100644 changes/changelog.d/348.bugfix

diff --git a/api/funkwhale_api/music/migrations/0050_auto_20200129_1344.py b/api/funkwhale_api/music/migrations/0050_auto_20200129_1344.py
new file mode 100644
index 000000000..ecd74003d
--- /dev/null
+++ b/api/funkwhale_api/music/migrations/0050_auto_20200129_1344.py
@@ -0,0 +1,18 @@
+# Generated by Django 2.2.9 on 2020-01-29 13:44
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('music', '0049_auto_20200122_1020'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='track',
+            name='mbid',
+            field=models.UUIDField(blank=True, db_index=True, null=True),
+        ),
+    ]
diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index e1852870e..1d156a6c6 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -475,6 +475,7 @@ def get_artist(release_list):
 
 
 class Track(APIModelMixin):
+    mbid = models.UUIDField(db_index=True, null=True, blank=True)
     title = models.CharField(max_length=MAX_LENGTHS["TRACK_TITLE"])
     artist = models.ForeignKey(Artist, related_name="tracks", on_delete=models.CASCADE)
     disc_number = models.PositiveIntegerField(null=True, blank=True)
diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py
index 3e95e5325..636a54cf4 100644
--- a/api/funkwhale_api/music/tasks.py
+++ b/api/funkwhale_api/music/tasks.py
@@ -625,9 +625,18 @@ def _get_track(data, attributed_to=None, **forced_values):
         else truncate(data.get("copyright"), models.MAX_LENGTHS["COPYRIGHT"])
     )
 
-    query = Q(title__iexact=track_title, artist=artist, album=album, position=position)
+    query = Q(
+        title__iexact=track_title,
+        artist=artist,
+        album=album,
+        position=position,
+        disc_number=disc_number,
+    )
     if track_mbid:
-        query |= Q(mbid=track_mbid)
+        if album_mbid:
+            query |= Q(mbid=track_mbid, album__mbid=album_mbid)
+        else:
+            query |= Q(mbid=track_mbid)
     if track_fid:
         query |= Q(fid=track_fid)
 
diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py
index 053f62ede..bbbc9c745 100644
--- a/api/tests/music/test_tasks.py
+++ b/api/tests/music/test_tasks.py
@@ -1164,3 +1164,134 @@ def test_can_download_image_file_for_album_mbid(binary_cover, mocker, factories)
 
     assert album.attachment_cover.file.read() == binary_cover
     assert album.attachment_cover.mimetype == "image/jpeg"
+
+
+def test_can_import_track_with_same_mbid_in_different_albums(factories, mocker):
+    artist = factories["music.Artist"]()
+    upload = factories["music.Upload"](
+        playable=True, track__artist=artist, track__album__artist=artist
+    )
+    assert upload.track.mbid is not None
+    data = {
+        "title": upload.track.title,
+        "artists": [{"name": artist.name, "mbid": artist.mbid}],
+        "album": {
+            "title": "The Slip",
+            "mbid": uuid.UUID("12b57d46-a192-499e-a91f-7da66790a1c1"),
+            "release_date": datetime.date(2008, 5, 5),
+            "artists": [{"name": artist.name, "mbid": artist.mbid}],
+        },
+        "position": 1,
+        "disc_number": 1,
+        "mbid": upload.track.mbid,
+    }
+
+    mocker.patch.object(metadata.TrackMetadataSerializer, "validated_data", data)
+    mocker.patch.object(tasks, "populate_album_cover")
+
+    new_upload = factories["music.Upload"](library=upload.library)
+
+    tasks.process_upload(upload_id=new_upload.pk)
+
+    new_upload.refresh_from_db()
+
+    assert new_upload.import_status == "finished"
+
+
+def test_import_track_with_same_mbid_in_same_albums_skipped(factories, mocker):
+    artist = factories["music.Artist"]()
+    upload = factories["music.Upload"](
+        playable=True, track__artist=artist, track__album__artist=artist
+    )
+    assert upload.track.mbid is not None
+    data = {
+        "title": upload.track.title,
+        "artists": [{"name": artist.name, "mbid": artist.mbid}],
+        "album": {
+            "title": upload.track.album.title,
+            "mbid": upload.track.album.mbid,
+            "artists": [{"name": artist.name, "mbid": artist.mbid}],
+        },
+        "position": 1,
+        "disc_number": 1,
+        "mbid": upload.track.mbid,
+    }
+
+    mocker.patch.object(metadata.TrackMetadataSerializer, "validated_data", data)
+    mocker.patch.object(tasks, "populate_album_cover")
+
+    new_upload = factories["music.Upload"](library=upload.library)
+
+    tasks.process_upload(upload_id=new_upload.pk)
+
+    new_upload.refresh_from_db()
+
+    assert new_upload.import_status == "skipped"
+
+
+def test_can_import_track_with_same_position_in_different_discs(factories, mocker):
+    upload = factories["music.Upload"](playable=True)
+    artist_data = [
+        {
+            "name": upload.track.album.artist.name,
+            "mbid": upload.track.album.artist.mbid,
+        }
+    ]
+    data = {
+        "title": upload.track.title,
+        "artists": artist_data,
+        "album": {
+            "title": "The Slip",
+            "mbid": upload.track.album.mbid,
+            "release_date": datetime.date(2008, 5, 5),
+            "artists": artist_data,
+        },
+        "position": upload.track.position,
+        "disc_number": 2,
+        "mbid": None,
+    }
+
+    mocker.patch.object(metadata.TrackMetadataSerializer, "validated_data", data)
+    mocker.patch.object(tasks, "populate_album_cover")
+
+    new_upload = factories["music.Upload"](library=upload.library)
+
+    tasks.process_upload(upload_id=new_upload.pk)
+
+    new_upload.refresh_from_db()
+
+    assert new_upload.import_status == "finished"
+
+
+def test_can_import_track_with_same_position_in_same_discs_skipped(factories, mocker):
+    upload = factories["music.Upload"](playable=True)
+    artist_data = [
+        {
+            "name": upload.track.album.artist.name,
+            "mbid": upload.track.album.artist.mbid,
+        }
+    ]
+    data = {
+        "title": upload.track.title,
+        "artists": artist_data,
+        "album": {
+            "title": "The Slip",
+            "mbid": upload.track.album.mbid,
+            "release_date": datetime.date(2008, 5, 5),
+            "artists": artist_data,
+        },
+        "position": upload.track.position,
+        "disc_number": upload.track.disc_number,
+        "mbid": None,
+    }
+
+    mocker.patch.object(metadata.TrackMetadataSerializer, "validated_data", data)
+    mocker.patch.object(tasks, "populate_album_cover")
+
+    new_upload = factories["music.Upload"](library=upload.library)
+
+    tasks.process_upload(upload_id=new_upload.pk)
+
+    new_upload.refresh_from_db()
+
+    assert new_upload.import_status == "skipped"
diff --git a/changes/changelog.d/348.bugfix b/changes/changelog.d/348.bugfix
new file mode 100644
index 000000000..10c04b982
--- /dev/null
+++ b/changes/changelog.d/348.bugfix
@@ -0,0 +1 @@
+Improved deduplication logic to prevent skipped files during import (#348, #474, #557, #740, #928)
-- 
GitLab