diff --git a/api/funkwhale_api/music/metadata.py b/api/funkwhale_api/music/metadata.py index 21daf2747c6f8cfe01ff3c12ea19f86b7bc5bc3d..112782e4136aef27a5579c0dbb4cd70211788d64 100644 --- a/api/funkwhale_api/music/metadata.py +++ b/api/funkwhale_api/music/metadata.py @@ -1,8 +1,11 @@ import datetime +import logging import mutagen import pendulum + from django import forms +logger = logging.getLogger(__name__) NODEFAULT = object() @@ -14,6 +17,10 @@ class UnsupportedTag(KeyError): pass +class ParseError(ValueError): + pass + + def get_id3_tag(f, k): if k == "pictures": return f.tags.getall("APIC") @@ -103,8 +110,22 @@ class FirstUUIDField(forms.UUIDField): def get_date(value): - parsed = pendulum.parse(str(value)) - return datetime.date(parsed.year, parsed.month, parsed.day) + ADDITIONAL_FORMATS = ["%Y-%d-%m %H:%M"] # deezer date format + try: + parsed = pendulum.parse(str(value)) + return datetime.date(parsed.year, parsed.month, parsed.day) + except pendulum.exceptions.ParserError: + pass + + for date_format in ADDITIONAL_FORMATS: + try: + parsed = datetime.datetime.strptime(value, date_format) + except ValueError: + continue + else: + return datetime.date(parsed.year, parsed.month, parsed.day) + + raise ParseError("{} cannot be parsed as a date".format(value)) def split_and_return_first(separator): @@ -275,7 +296,7 @@ class Metadata(object): v = field.to_python(v) return v - def all(self): + def all(self, ignore_parse_errors=True): """ Return a dict containing all metadata of the file """ @@ -286,6 +307,11 @@ class Metadata(object): data[field] = self.get(field, None) except (TagNotFound, forms.ValidationError): data[field] = None + except ParseError as e: + if not ignore_parse_errors: + raise + logger.warning("Unparsable field {}: {}".format(field, str(e))) + data[field] = None return data diff --git a/api/tests/music/test_metadata.py b/api/tests/music/test_metadata.py index 82c991c0b2ea493824b2ce1a956869721426cf9f..e386c41fc7a030b99425ff93cf085cfba0c9d425 100644 --- a/api/tests/music/test_metadata.py +++ b/api/tests/music/test_metadata.py @@ -196,7 +196,31 @@ def test_mbid_clean_keeps_only_first(field_name): @pytest.mark.parametrize( "raw,expected", - [("2017", datetime.date(2017, 1, 1)), ("2017-12-31", datetime.date(2017, 12, 31))], + [ + ("2017", datetime.date(2017, 1, 1)), + ("2017-12-31", datetime.date(2017, 12, 31)), + ("2017-14-01 01:32", datetime.date(2017, 1, 14)), # deezer format + ], ) def test_date_parsing(raw, expected): assert metadata.get_date(raw) == expected + + +def test_date_parsing_failure(): + with pytest.raises(metadata.ParseError): + metadata.get_date("noop") + + +def test_metadata_all_ignore_parse_errors_true(mocker): + path = os.path.join(DATA_DIR, "sample.flac") + data = metadata.Metadata(path) + mocker.patch.object(data, "get", side_effect=metadata.ParseError("Failure")) + assert data.all()["date"] is None + + +def test_metadata_all_ignore_parse_errors_false(mocker): + path = os.path.join(DATA_DIR, "sample.flac") + data = metadata.Metadata(path) + mocker.patch.object(data, "get", side_effect=metadata.ParseError("Failure")) + with pytest.raises(metadata.ParseError): + data.all(ignore_parse_errors=False) diff --git a/changes/changelog.d/622.enhancement b/changes/changelog.d/622.enhancement new file mode 100644 index 0000000000000000000000000000000000000000..dafebf44f9ce02fb7ae6d90d76f6943383ef8246 --- /dev/null +++ b/changes/changelog.d/622.enhancement @@ -0,0 +1,2 @@ +More resilient date parsing during audio import, will not crash anymore on +invalid dates (#622)