From 8e84e2bf39c09aa960ac990674cd47c8464a0e33 Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Mon, 26 Nov 2018 17:07:55 +0100
Subject: [PATCH] Fix #622: More resilient date parsing during audio import,
 will not crash anymore on invalid dates

---
 api/funkwhale_api/music/metadata.py | 32 ++++++++++++++++++++++++++---
 api/tests/music/test_metadata.py    | 26 ++++++++++++++++++++++-
 changes/changelog.d/622.enhancement |  2 ++
 3 files changed, 56 insertions(+), 4 deletions(-)
 create mode 100644 changes/changelog.d/622.enhancement

diff --git a/api/funkwhale_api/music/metadata.py b/api/funkwhale_api/music/metadata.py
index 21daf2747..112782e41 100644
--- a/api/funkwhale_api/music/metadata.py
+++ b/api/funkwhale_api/music/metadata.py
@@ -1,8 +1,11 @@
 import datetime
+import logging
 import mutagen
 import pendulum
+
 from django import forms
 
+logger = logging.getLogger(__name__)
 NODEFAULT = object()
 
 
@@ -14,6 +17,10 @@ class UnsupportedTag(KeyError):
     pass
 
 
+class ParseError(ValueError):
+    pass
+
+
 def get_id3_tag(f, k):
     if k == "pictures":
         return f.tags.getall("APIC")
@@ -103,8 +110,22 @@ class FirstUUIDField(forms.UUIDField):
 
 
 def get_date(value):
-    parsed = pendulum.parse(str(value))
-    return datetime.date(parsed.year, parsed.month, parsed.day)
+    ADDITIONAL_FORMATS = ["%Y-%d-%m %H:%M"]  # deezer date format
+    try:
+        parsed = pendulum.parse(str(value))
+        return datetime.date(parsed.year, parsed.month, parsed.day)
+    except pendulum.exceptions.ParserError:
+        pass
+
+    for date_format in ADDITIONAL_FORMATS:
+        try:
+            parsed = datetime.datetime.strptime(value, date_format)
+        except ValueError:
+            continue
+        else:
+            return datetime.date(parsed.year, parsed.month, parsed.day)
+
+    raise ParseError("{} cannot be parsed as a date".format(value))
 
 
 def split_and_return_first(separator):
@@ -275,7 +296,7 @@ class Metadata(object):
             v = field.to_python(v)
         return v
 
-    def all(self):
+    def all(self, ignore_parse_errors=True):
         """
         Return a dict containing all metadata of the file
         """
@@ -286,6 +307,11 @@ class Metadata(object):
                 data[field] = self.get(field, None)
             except (TagNotFound, forms.ValidationError):
                 data[field] = None
+            except ParseError as e:
+                if not ignore_parse_errors:
+                    raise
+                logger.warning("Unparsable field {}: {}".format(field, str(e)))
+                data[field] = None
 
         return data
 
diff --git a/api/tests/music/test_metadata.py b/api/tests/music/test_metadata.py
index 82c991c0b..e386c41fc 100644
--- a/api/tests/music/test_metadata.py
+++ b/api/tests/music/test_metadata.py
@@ -196,7 +196,31 @@ def test_mbid_clean_keeps_only_first(field_name):
 
 @pytest.mark.parametrize(
     "raw,expected",
-    [("2017", datetime.date(2017, 1, 1)), ("2017-12-31", datetime.date(2017, 12, 31))],
+    [
+        ("2017", datetime.date(2017, 1, 1)),
+        ("2017-12-31", datetime.date(2017, 12, 31)),
+        ("2017-14-01 01:32", datetime.date(2017, 1, 14)),  # deezer format
+    ],
 )
 def test_date_parsing(raw, expected):
     assert metadata.get_date(raw) == expected
+
+
+def test_date_parsing_failure():
+    with pytest.raises(metadata.ParseError):
+        metadata.get_date("noop")
+
+
+def test_metadata_all_ignore_parse_errors_true(mocker):
+    path = os.path.join(DATA_DIR, "sample.flac")
+    data = metadata.Metadata(path)
+    mocker.patch.object(data, "get", side_effect=metadata.ParseError("Failure"))
+    assert data.all()["date"] is None
+
+
+def test_metadata_all_ignore_parse_errors_false(mocker):
+    path = os.path.join(DATA_DIR, "sample.flac")
+    data = metadata.Metadata(path)
+    mocker.patch.object(data, "get", side_effect=metadata.ParseError("Failure"))
+    with pytest.raises(metadata.ParseError):
+        data.all(ignore_parse_errors=False)
diff --git a/changes/changelog.d/622.enhancement b/changes/changelog.d/622.enhancement
new file mode 100644
index 000000000..dafebf44f
--- /dev/null
+++ b/changes/changelog.d/622.enhancement
@@ -0,0 +1,2 @@
+More resilient date parsing during audio import, will not crash anymore on
+invalid dates (#622)
-- 
GitLab