From 618c6d8bb0212cd43789d92da3e318797be6b2c8 Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Fri, 27 Sep 2019 12:39:23 +0200
Subject: [PATCH] Truncate too long long values when importing instead of
 crashing

---
 api/funkwhale_api/music/models.py        | 17 ++++++++++----
 api/funkwhale_api/music/tasks.py         | 18 ++++++++++-----
 api/tests/music/test_tasks.py            | 28 +++++++++++++++++++++++-
 changes/changelog.d/imports-small.bugfix |  1 +
 4 files changed, 54 insertions(+), 10 deletions(-)
 create mode 100644 changes/changelog.d/imports-small.bugfix

diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index 831289cda..49d22fa85 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -34,6 +34,13 @@ from . import importers, metadata, utils
 
 logger = logging.getLogger(__name__)
 
+MAX_LENGTHS = {
+    "ARTIST_NAME": 255,
+    "ALBUM_TITLE": 255,
+    "TRACK_TITLE": 255,
+    "COPYRIGHT": 500,
+}
+
 
 def empty_dict():
     return {}
@@ -187,7 +194,7 @@ class ArtistQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
 
 
 class Artist(APIModelMixin):
-    name = models.CharField(max_length=255)
+    name = models.CharField(max_length=MAX_LENGTHS["ARTIST_NAME"])
     federation_namespace = "artists"
     musicbrainz_model = "artist"
     musicbrainz_mapping = {
@@ -275,7 +282,7 @@ class AlbumQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
 
 
 class Album(APIModelMixin):
-    title = models.CharField(max_length=255)
+    title = models.CharField(max_length=MAX_LENGTHS["ALBUM_TITLE"])
     artist = models.ForeignKey(Artist, related_name="albums", on_delete=models.CASCADE)
     release_date = models.DateField(null=True, blank=True)
     release_group_id = models.UUIDField(null=True, blank=True)
@@ -448,7 +455,7 @@ def get_artist(release_list):
 
 
 class Track(APIModelMixin):
-    title = models.CharField(max_length=255)
+    title = models.CharField(max_length=MAX_LENGTHS["TRACK_TITLE"])
     artist = models.ForeignKey(Artist, related_name="tracks", on_delete=models.CASCADE)
     disc_number = models.PositiveIntegerField(null=True, blank=True)
     position = models.PositiveIntegerField(null=True, blank=True)
@@ -472,7 +479,9 @@ class Track(APIModelMixin):
         on_delete=models.SET_NULL,
         related_name="attributed_tracks",
     )
-    copyright = models.CharField(max_length=500, null=True, blank=True)
+    copyright = models.CharField(
+        max_length=MAX_LENGTHS["COPYRIGHT"], null=True, blank=True
+    )
     federation_namespace = "tracks"
     musicbrainz_model = "recording"
     api = musicbrainz.api.recordings
diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py
index 0c1220e40..ca32c808e 100644
--- a/api/funkwhale_api/music/tasks.py
+++ b/api/funkwhale_api/music/tasks.py
@@ -406,6 +406,12 @@ def get_track_from_import_metadata(data, update_cover=False, attributed_to=None)
     return track
 
 
+def truncate(v, length):
+    if v is None:
+        return v
+    return v[:length]
+
+
 def _get_track(data, attributed_to=None):
     track_uuid = getter(data, "funkwhale", "track", "uuid")
 
@@ -447,7 +453,7 @@ def _get_track(data, attributed_to=None):
     artist_data = artists[0]
     artist_mbid = artist_data.get("mbid", None)
     artist_fid = artist_data.get("fid", None)
-    artist_name = artist_data["name"]
+    artist_name = truncate(artist_data["name"], models.MAX_LENGTHS["ARTIST_NAME"])
 
     if artist_mbid:
         query = Q(mbid=artist_mbid)
@@ -473,7 +479,9 @@ def _get_track(data, attributed_to=None):
 
     album_artists = getter(data, "album", "artists", default=artists) or artists
     album_artist_data = album_artists[0]
-    album_artist_name = album_artist_data.get("name")
+    album_artist_name = truncate(
+        album_artist_data.get("name"), models.MAX_LENGTHS["ARTIST_NAME"]
+    )
     if album_artist_name == artist_name:
         album_artist = artist
     else:
@@ -502,7 +510,7 @@ def _get_track(data, attributed_to=None):
 
     # get / create album
     album_data = data["album"]
-    album_title = album_data["title"]
+    album_title = truncate(album_data["title"], models.MAX_LENGTHS["ALBUM_TITLE"])
     album_fid = album_data.get("fid", None)
 
     if album_mbid:
@@ -531,7 +539,7 @@ def _get_track(data, attributed_to=None):
         tags_models.add_tags(album, *album_data.get("tags", []))
 
     # get / create track
-    track_title = data["title"]
+    track_title = truncate(data["title"], models.MAX_LENGTHS["TRACK_TITLE"])
     position = data.get("position", 1)
     query = Q(title__iexact=track_title, artist=artist, album=album, position=position)
     if track_mbid:
@@ -549,7 +557,7 @@ def _get_track(data, attributed_to=None):
         "from_activity_id": from_activity_id,
         "attributed_to": data.get("attributed_to", attributed_to),
         "license": licenses.match(data.get("license"), data.get("copyright")),
-        "copyright": data.get("copyright"),
+        "copyright": truncate(data.get("copyright"), models.MAX_LENGTHS["COPYRIGHT"]),
     }
     if data.get("fdate"):
         defaults["creation_date"] = data.get("fdate")
diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py
index 439c74b72..8eae4a3be 100644
--- a/api/tests/music/test_tasks.py
+++ b/api/tests/music/test_tasks.py
@@ -9,7 +9,7 @@ from django.utils import timezone
 
 from funkwhale_api.federation import serializers as federation_serializers
 from funkwhale_api.federation import jsonld
-from funkwhale_api.music import licenses, metadata, signals, tasks
+from funkwhale_api.music import licenses, metadata, models, signals, tasks
 
 DATA_DIR = os.path.dirname(os.path.abspath(__file__))
 
@@ -79,6 +79,32 @@ def test_can_create_track_from_file_metadata_attributed_to(factories, mocker):
     assert track.artist.attributed_to == actor
 
 
+def test_can_create_track_from_file_metadata_truncates_too_long_values(
+    factories, mocker
+):
+    metadata = {
+        "title": "a" * 5000,
+        "artists": [{"name": "b" * 5000}],
+        "album": {"title": "c" * 5000, "release_date": datetime.date(2012, 8, 15)},
+        "position": 4,
+        "disc_number": 2,
+        "copyright": "d" * 5000,
+    }
+
+    track = tasks.get_track_from_import_metadata(metadata)
+
+    assert track.title == metadata["title"][: models.MAX_LENGTHS["TRACK_TITLE"]]
+    assert track.copyright == metadata["copyright"][: models.MAX_LENGTHS["COPYRIGHT"]]
+    assert (
+        track.album.title
+        == metadata["album"]["title"][: models.MAX_LENGTHS["ALBUM_TITLE"]]
+    )
+    assert (
+        track.artist.name
+        == metadata["artists"][0]["name"][: models.MAX_LENGTHS["ARTIST_NAME"]]
+    )
+
+
 def test_can_create_track_from_file_metadata_featuring(factories):
     metadata = {
         "title": "Whole Lotta Love",
diff --git a/changes/changelog.d/imports-small.bugfix b/changes/changelog.d/imports-small.bugfix
new file mode 100644
index 000000000..aed6bb902
--- /dev/null
+++ b/changes/changelog.d/imports-small.bugfix
@@ -0,0 +1 @@
+Fixed import crashing with empty cover file or too long values on some fields
-- 
GitLab