From 4781e7821ebbac2e23707d7d776093046b97eb49 Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Tue, 24 Jul 2018 23:05:18 +0200
Subject: [PATCH] Fix #376: Smarter date parsing during import by replacing
 arrow with pendulum

---
 api/funkwhale_api/music/metadata.py      | 22 +++++++++++-----------
 api/funkwhale_api/music/models.py        |  7 ++-----
 api/requirements/base.txt                |  2 +-
 api/tests/federation/test_actors.py      |  6 +++---
 api/tests/federation/test_serializers.py |  4 ++--
 api/tests/music/test_metadata.py         |  8 ++++++++
 changes/changelog.d/376.bugfix           |  1 +
 7 files changed, 28 insertions(+), 22 deletions(-)
 create mode 100644 changes/changelog.d/376.bugfix

diff --git a/api/funkwhale_api/music/metadata.py b/api/funkwhale_api/music/metadata.py
index d2534f6b..9aa08820 100644
--- a/api/funkwhale_api/music/metadata.py
+++ b/api/funkwhale_api/music/metadata.py
@@ -1,5 +1,6 @@
-import arrow
+import datetime
 import mutagen
+import pendulum
 from django import forms
 
 NODEFAULT = object()
@@ -101,6 +102,11 @@ class FirstUUIDField(forms.UUIDField):
         return super().to_python(value)
 
 
+def get_date(value):
+    parsed = pendulum.parse(str(value))
+    return datetime.date(parsed.year, parsed.month, parsed.day)
+
+
 VALIDATION = {
     "musicbrainz_artistid": FirstUUIDField(),
     "musicbrainz_albumid": FirstUUIDField(),
@@ -118,7 +124,7 @@ CONF = {
             "title": {},
             "artist": {},
             "album": {},
-            "date": {"field": "date", "to_application": lambda v: arrow.get(v).date()},
+            "date": {"field": "date", "to_application": get_date},
             "musicbrainz_albumid": {},
             "musicbrainz_artistid": {},
             "musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
@@ -134,7 +140,7 @@ CONF = {
             "title": {},
             "artist": {},
             "album": {},
-            "date": {"field": "date", "to_application": lambda v: arrow.get(v).date()},
+            "date": {"field": "date", "to_application": get_date},
             "musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
             "musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
             "musicbrainz_recordingid": {"field": "MusicBrainz Track Id"},
@@ -148,10 +154,7 @@ CONF = {
             "title": {"field": "TIT2"},
             "artist": {"field": "TPE1"},
             "album": {"field": "TALB"},
-            "date": {
-                "field": "TDRC",
-                "to_application": lambda v: arrow.get(str(v)).date(),
-            },
+            "date": {"field": "TDRC", "to_application": get_date},
             "musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
             "musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
             "musicbrainz_recordingid": {
@@ -172,10 +175,7 @@ CONF = {
             "title": {},
             "artist": {},
             "album": {},
-            "date": {
-                "field": "date",
-                "to_application": lambda v: arrow.get(str(v)).date(),
-            },
+            "date": {"field": "date", "to_application": get_date},
             "musicbrainz_albumid": {},
             "musicbrainz_artistid": {},
             "musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index c8dd6131..3aa249f0 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -1,11 +1,10 @@
-import datetime
 import os
 import shutil
 import tempfile
 import uuid
 
-import arrow
 import markdown
+import pendulum
 from django.conf import settings
 from django.core.files import File
 from django.core.files.base import ContentFile
@@ -125,9 +124,7 @@ def import_artist(v):
 
 
 def parse_date(v):
-    if len(v) == 4:
-        return datetime.date(int(v), 1, 1)
-    d = arrow.get(v).date()
+    d = pendulum.parse(v).date()
     return d
 
 
diff --git a/api/requirements/base.txt b/api/requirements/base.txt
index e4a85d99..bb441ac3 100644
--- a/api/requirements/base.txt
+++ b/api/requirements/base.txt
@@ -35,7 +35,7 @@ djangorestframework>=3.7,<3.8
 djangorestframework-jwt>=1.11,<1.12
 oauth2client<4
 google-api-python-client>=1.6,<1.7
-arrow>=0.12,<0.13
+pendulum>=2,<3
 persisting-theory>=0.2,<0.3
 django-versatileimagefield>=1.9,<1.10
 django-filter>=1.1,<1.2
diff --git a/api/tests/federation/test_actors.py b/api/tests/federation/test_actors.py
index 736ec8bf..1244533d 100644
--- a/api/tests/federation/test_actors.py
+++ b/api/tests/federation/test_actors.py
@@ -1,4 +1,4 @@
-import arrow
+import pendulum
 import pytest
 from django.urls import reverse
 from django.utils import timezone
@@ -455,7 +455,7 @@ def test_library_actor_handle_create_audio(mocker, factories):
         assert lt.title == a["metadata"]["recording"]["title"]
         assert lt.artist_name == a["metadata"]["artist"]["name"]
         assert lt.album_title == a["metadata"]["release"]["title"]
-        assert lt.published_date == arrow.get(a["published"])
+        assert lt.published_date == pendulum.parse(a["published"])
 
 
 def test_library_actor_handle_create_audio_autoimport(mocker, factories):
@@ -494,7 +494,7 @@ def test_library_actor_handle_create_audio_autoimport(mocker, factories):
         assert lt.title == a["metadata"]["recording"]["title"]
         assert lt.artist_name == a["metadata"]["artist"]["name"]
         assert lt.album_title == a["metadata"]["release"]["title"]
-        assert lt.published_date == arrow.get(a["published"])
+        assert lt.published_date == pendulum.parse(a["published"])
 
     batch = music_models.ImportBatch.objects.latest("id")
 
diff --git a/api/tests/federation/test_serializers.py b/api/tests/federation/test_serializers.py
index 6b14656b..b42c843e 100644
--- a/api/tests/federation/test_serializers.py
+++ b/api/tests/federation/test_serializers.py
@@ -1,4 +1,4 @@
-import arrow
+import pendulum
 import pytest
 from django.core.paginator import Paginator
 
@@ -492,7 +492,7 @@ def test_activity_pub_audio_serializer_to_library_track(factories):
     assert lt.title == audio["metadata"]["recording"]["title"]
     assert lt.artist_name == audio["metadata"]["artist"]["name"]
     assert lt.album_title == audio["metadata"]["release"]["title"]
-    assert lt.published_date == arrow.get(audio["published"])
+    assert lt.published_date == pendulum.parse(audio["published"])
 
 
 def test_activity_pub_audio_serializer_to_library_track_no_duplicate(factories):
diff --git a/api/tests/music/test_metadata.py b/api/tests/music/test_metadata.py
index fbdf5b81..1dfe927a 100644
--- a/api/tests/music/test_metadata.py
+++ b/api/tests/music/test_metadata.py
@@ -122,3 +122,11 @@ def test_mbid_clean_keeps_only_first(field_name):
     result = field.to_python("/".join([u1, u2]))
 
     assert str(result) == u1
+
+
+@pytest.mark.parametrize(
+    "raw,expected",
+    [("2017", datetime.date(2017, 1, 1)), ("2017-12-31", datetime.date(2017, 12, 31))],
+)
+def test_date_parsing(raw, expected):
+    assert metadata.get_date(raw) == expected
diff --git a/changes/changelog.d/376.bugfix b/changes/changelog.d/376.bugfix
new file mode 100644
index 00000000..b5e8379d
--- /dev/null
+++ b/changes/changelog.d/376.bugfix
@@ -0,0 +1 @@
+Smarter date parsing during import by replacing arrow with pendulum (#376)
-- 
GitLab