Commit 1bee3a46 authored by Agate's avatar Agate 💬

Import trust source

parent ad7e6a97
......@@ -4,7 +4,6 @@ import urllib.parse
from django.core.exceptions import ObjectDoesNotExist
from django.core.paginator import Paginator
from django.db.models import F, Q
from rest_framework import serializers
from funkwhale_api.common import utils as funkwhale_utils
......@@ -21,6 +20,31 @@ AP_CONTEXT = [
logger = logging.getLogger(__name__)
class LinkSerializer(serializers.Serializer):
type = serializers.ChoiceField(choices=["Link"])
href = serializers.URLField(max_length=500)
mediaType = serializers.CharField()
def __init__(self, *args, **kwargs):
self.allowed_mimetypes = kwargs.pop("allowed_mimetypes", [])
super().__init__(*args, **kwargs)
def validate_mediaType(self, v):
if not self.allowed_mimetypes:
# no restrictions
return v
for mt in self.allowed_mimetypes:
if mt.endswith("/*"):
if v.startswith(mt.replace("*", "")):
return v
else:
if v == mt:
return v
raise serializers.ValidationError(
"Invalid mimetype {}. Allowed: {}".format(v, self.allowed_mimetypes)
)
class ActorSerializer(serializers.Serializer):
id = serializers.URLField(max_length=500)
outbox = serializers.URLField(max_length=500)
......@@ -626,32 +650,8 @@ class MusicEntitySerializer(serializers.Serializer):
musicbrainzId = serializers.UUIDField(allow_null=True, required=False)
name = serializers.CharField(max_length=1000)
def create(self, validated_data):
mbid = validated_data.get("musicbrainzId")
candidates = self.model.objects.filter(
Q(mbid=mbid) | Q(fid=validated_data["id"])
).order_by(F("fid").desc(nulls_last=True))
existing = candidates.first()
if existing:
return existing
# nothing matching in our database, let's create a new object
return self.model.objects.create(**self.get_create_data(validated_data))
def get_create_data(self, validated_data):
return {
"mbid": validated_data.get("musicbrainzId"),
"fid": validated_data["id"],
"name": validated_data["name"],
"creation_date": validated_data["published"],
"from_activity": self.context.get("activity"),
}
class ArtistSerializer(MusicEntitySerializer):
model = music_models.Artist
def to_representation(self, instance):
d = {
"type": "Artist",
......@@ -667,9 +667,11 @@ class ArtistSerializer(MusicEntitySerializer):
class AlbumSerializer(MusicEntitySerializer):
model = music_models.Album
released = serializers.DateField(allow_null=True, required=False)
artists = serializers.ListField(child=ArtistSerializer(), min_length=1)
cover = LinkSerializer(
allowed_mimetypes=["image/*"], allow_null=True, required=False
)
def to_representation(self, instance):
d = {
......@@ -688,7 +690,12 @@ class AlbumSerializer(MusicEntitySerializer):
],
}
if instance.cover:
d["cover"] = {"type": "Image", "url": utils.full_url(instance.cover.url)}
d["cover"] = {
"type": "Link",
"href": utils.full_url(instance.cover.url),
"mediaType": mimetypes.guess_type(instance.cover.path)[0]
or "image/jpeg",
}
if self.context.get("include_ap_context", self.parent is None):
d["@context"] = AP_CONTEXT
return d
......@@ -711,7 +718,6 @@ class AlbumSerializer(MusicEntitySerializer):
class TrackSerializer(MusicEntitySerializer):
model = music_models.Track
position = serializers.IntegerField(min_value=0, allow_null=True, required=False)
artists = serializers.ListField(child=ArtistSerializer(), min_length=1)
album = AlbumSerializer()
......@@ -738,32 +744,22 @@ class TrackSerializer(MusicEntitySerializer):
d["@context"] = AP_CONTEXT
return d
def get_create_data(self, validated_data):
artist_data = validated_data["artists"][0]
artist = ArtistSerializer(
context={"activity": self.context.get("activity")}
).create(artist_data)
album = AlbumSerializer(
context={"activity": self.context.get("activity")}
).create(validated_data["album"])
def create(self, validated_data):
from funkwhale_api.music import tasks as music_tasks
return {
"mbid": validated_data.get("musicbrainzId"),
"fid": validated_data["id"],
"title": validated_data["name"],
"position": validated_data.get("position"),
"creation_date": validated_data["published"],
"artist": artist,
"album": album,
"from_activity": self.context.get("activity"),
}
metadata = music_tasks.federation_audio_track_to_metadata(validated_data)
from_activity = self.context.get("activity")
if from_activity:
metadata["from_activity_id"] = from_activity.pk
track = music_tasks.get_track_from_import_metadata(metadata)
return track
class UploadSerializer(serializers.Serializer):
type = serializers.ChoiceField(choices=["Audio"])
id = serializers.URLField(max_length=500)
library = serializers.URLField(max_length=500)
url = serializers.JSONField()
url = LinkSerializer(allowed_mimetypes=["audio/*"])
published = serializers.DateTimeField()
updated = serializers.DateTimeField(required=False, allow_null=True)
bitrate = serializers.IntegerField(min_value=0)
......
......@@ -93,9 +93,9 @@ def convert_track_number(v):
class FirstUUIDField(forms.UUIDField):
def to_python(self, value):
try:
# sometimes, Picard leaves to uuids in the field, separated
# by a slash
value = value.split("/")[0]
# sometimes, Picard leaves two uuids in the field, separated
# by a slash or a ;
value = value.split(";")[0].split("/")[0].strip()
except (AttributeError, IndexError, TypeError):
pass
......@@ -107,10 +107,18 @@ def get_date(value):
return datetime.date(parsed.year, parsed.month, parsed.day)
def split_and_return_first(separator):
def inner(v):
return v.split(separator)[0].strip()
return inner
VALIDATION = {
"musicbrainz_artistid": FirstUUIDField(),
"musicbrainz_albumid": FirstUUIDField(),
"musicbrainz_recordingid": FirstUUIDField(),
"musicbrainz_albumartistid": FirstUUIDField(),
}
CONF = {
......@@ -123,10 +131,15 @@ CONF = {
},
"title": {},
"artist": {},
"album_artist": {
"field": "albumartist",
"to_application": split_and_return_first(";"),
},
"album": {},
"date": {"field": "date", "to_application": get_date},
"musicbrainz_albumid": {},
"musicbrainz_artistid": {},
"musicbrainz_albumartistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
},
},
......@@ -139,10 +152,15 @@ CONF = {
},
"title": {},
"artist": {},
"album_artist": {
"field": "albumartist",
"to_application": split_and_return_first(";"),
},
"album": {},
"date": {"field": "date", "to_application": get_date},
"musicbrainz_albumid": {},
"musicbrainz_artistid": {},
"musicbrainz_albumartistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
},
},
......@@ -155,10 +173,12 @@ CONF = {
},
"title": {},
"artist": {},
"album_artist": {"field": "albumartist"},
"album": {},
"date": {"field": "date", "to_application": get_date},
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
"musicbrainz_recordingid": {"field": "MusicBrainz Track Id"},
},
},
......@@ -169,10 +189,12 @@ CONF = {
"track_number": {"field": "TRCK", "to_application": convert_track_number},
"title": {"field": "TIT2"},
"artist": {"field": "TPE1"},
"album_artist": {"field": "TPE2"},
"album": {"field": "TALB"},
"date": {"field": "TDRC", "to_application": get_date},
"musicbrainz_albumid": {"field": "MusicBrainz Album Id"},
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
"musicbrainz_recordingid": {
"field": "UFID",
"getter": get_mp3_recording_id,
......@@ -190,10 +212,12 @@ CONF = {
},
"title": {},
"artist": {},
"album_artist": {"field": "albumartist"},
"album": {},
"date": {"field": "date", "to_application": get_date},
"musicbrainz_albumid": {},
"musicbrainz_artistid": {},
"musicbrainz_albumartistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
"test": {},
"pictures": {},
......@@ -201,6 +225,19 @@ CONF = {
},
}
ALL_FIELDS = [
"track_number",
"title",
"artist",
"album_artist",
"album",
"date",
"musicbrainz_albumid",
"musicbrainz_artistid",
"musicbrainz_albumartistid",
"musicbrainz_recordingid",
]
class Metadata(object):
def __init__(self, path):
......@@ -238,6 +275,20 @@ class Metadata(object):
v = field.to_python(v)
return v
def all(self):
"""
Return a dict containing all metadata of the file
"""
data = {}
for field in ALL_FIELDS:
try:
data[field] = self.get(field, None)
except (TagNotFound, forms.ValidationError):
data[field] = None
return data
def get_picture(self, picture_type="cover_front"):
ptype = getattr(mutagen.id3.PictureType, picture_type.upper())
try:
......
import datetime
import logging
import os
import tempfile
import uuid
......@@ -21,11 +22,14 @@ from versatileimagefield.image_warmer import VersatileImageFieldWarmer
from funkwhale_api import musicbrainz
from funkwhale_api.common import fields
from funkwhale_api.common import session
from funkwhale_api.common import utils as common_utils
from funkwhale_api.federation import models as federation_models
from funkwhale_api.federation import utils as federation_utils
from . import importers, metadata, utils
logger = logging.getLogger(__file__)
def empty_dict():
return {}
......@@ -240,14 +244,35 @@ class Album(APIModelMixin):
def get_image(self, data=None):
if data:
f = ContentFile(data["content"])
extensions = {"image/jpeg": "jpg", "image/png": "png", "image/gif": "gif"}
extension = extensions.get(data["mimetype"], "jpg")
self.cover.save("{}.{}".format(self.uuid, extension), f)
else:
if data.get("content"):
# we have to cover itself
f = ContentFile(data["content"])
elif data.get("url"):
# we can fetch from a url
try:
response = session.get_session().get(
data.get("url"),
timeout=3,
verify=settings.EXTERNAL_REQUESTS_VERIFY_SSL,
)
response.raise_for_status()
except Exception as e:
logger.warn(
"Cannot download cover at url %s: %s", data.get("url"), e
)
return
else:
f = ContentFile(response.content)
self.cover.save("{}.{}".format(self.uuid, extension), f, save=False)
self.save(update_fields=["cover"])
return self.cover.file
if self.mbid:
image_data = musicbrainz.api.images.get_front(str(self.mbid))
f = ContentFile(image_data)
self.cover.save("{0}.jpg".format(self.mbid), f)
self.cover.save("{0}.jpg".format(self.mbid), f, save=False)
self.save(update_fields=["cover"])
return self.cover.file
def __str__(self):
......
This diff is collapsed.
......@@ -77,6 +77,29 @@ class Command(BaseCommand):
"with their newest version."
),
)
parser.add_argument(
"--outbox",
action="store_true",
dest="outbox",
default=False,
help=(
"Use this flag to notify library followers of newly imported files. "
"You'll likely want to keep this disabled for CLI imports, especially if"
"you plan to import hundreds or thousands of files, as it will cause a lot "
"of overhead on your server and on servers you are federating with."
),
)
parser.add_argument(
"--broadcast",
action="store_true",
dest="broadcast",
default=False,
help=(
"Use this flag to enable realtime updates about the import in the UI. "
"This causes some overhead, so it's disabled by default."
),
)
parser.add_argument(
"--reference",
......@@ -261,6 +284,8 @@ class Command(BaseCommand):
async_,
options["replace"],
options["in_place"],
options["outbox"],
options["broadcast"],
)
except Exception as e:
if options["exit_on_failure"]:
......@@ -272,11 +297,29 @@ class Command(BaseCommand):
errors.append((path, "{} {}".format(e.__class__.__name__, e)))
return errors
def create_upload(self, path, reference, library, async_, replace, in_place):
def create_upload(
self,
path,
reference,
library,
async_,
replace,
in_place,
dispatch_outbox,
broadcast,
):
import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
upload = models.Upload(library=library, import_reference=reference)
upload.source = "file://" + path
upload.import_metadata = {"replace": replace}
upload.import_metadata = {
"funkwhale": {
"config": {
"replace": replace,
"dispatch_outbox": dispatch_outbox,
"broadcast": broadcast,
}
}
}
if not in_place:
name = os.path.basename(path)
with open(path, "rb") as f:
......
......@@ -10,3 +10,4 @@ django-debug-toolbar>=1.9,<1.10
# improved REPL
ipdb==0.8.1
black
profiling
......@@ -11,7 +11,7 @@ import uuid
from faker.providers import internet as internet_provider
import factory
import pytest
import requests_mock
from django.contrib.auth.models import AnonymousUser
from django.core.cache import cache as django_cache
from django.core.files import uploadedfile
......@@ -271,14 +271,13 @@ def media_root(settings):
shutil.rmtree(tmp_dir)
@pytest.fixture
def r_mock():
@pytest.fixture(autouse=True)
def r_mock(requests_mock):
"""
Returns a requests_mock.mock() object you can use to mock HTTP calls made
using python-requests
"""
with requests_mock.mock() as m:
yield m
yield requests_mock
@pytest.fixture
......
import io
import pytest
import uuid
......@@ -588,42 +589,6 @@ def test_music_library_serializer_from_private(factories, mocker):
)
@pytest.mark.parametrize(
"model,serializer_class",
[
("music.Artist", serializers.ArtistSerializer),
("music.Album", serializers.AlbumSerializer),
("music.Track", serializers.TrackSerializer),
],
)
def test_music_entity_serializer_create_existing_mbid(
model, serializer_class, factories
):
entity = factories[model]()
data = {"musicbrainzId": str(entity.mbid), "id": "https://noop"}
serializer = serializer_class()
assert serializer.create(data) == entity
@pytest.mark.parametrize(
"model,serializer_class",
[
("music.Artist", serializers.ArtistSerializer),
("music.Album", serializers.AlbumSerializer),
("music.Track", serializers.TrackSerializer),
],
)
def test_music_entity_serializer_create_existing_fid(
model, serializer_class, factories
):
entity = factories[model](fid="https://entity.url")
data = {"musicbrainzId": None, "id": "https://entity.url"}
serializer = serializer_class()
assert serializer.create(data) == entity
def test_activity_pub_artist_serializer_to_ap(factories):
artist = factories["music.Artist"]()
expected = {
......@@ -639,30 +604,6 @@ def test_activity_pub_artist_serializer_to_ap(factories):
assert serializer.data == expected
def test_activity_pub_artist_serializer_from_ap(factories):
activity = factories["federation.Activity"]()
published = timezone.now()
data = {
"type": "Artist",
"id": "http://hello.artist",
"name": "John Smith",
"musicbrainzId": str(uuid.uuid4()),
"published": published.isoformat(),
}
serializer = serializers.ArtistSerializer(data=data, context={"activity": activity})
assert serializer.is_valid(raise_exception=True)
artist = serializer.save()
assert artist.from_activity == activity
assert artist.name == data["name"]
assert artist.fid == data["id"]
assert str(artist.mbid) == data["musicbrainzId"]
assert artist.creation_date == published
def test_activity_pub_album_serializer_to_ap(factories):
album = factories["music.Album"]()
......@@ -671,7 +612,11 @@ def test_activity_pub_album_serializer_to_ap(factories):
"type": "Album",
"id": album.fid,
"name": album.title,
"cover": {"type": "Image", "url": utils.full_url(album.cover.url)},
"cover": {
"type": "Link",
"mediaType": "image/jpeg",
"href": utils.full_url(album.cover.url),
},
"musicbrainzId": album.mbid,
"published": album.creation_date.isoformat(),
"released": album.release_date.isoformat(),
......@@ -686,49 +631,6 @@ def test_activity_pub_album_serializer_to_ap(factories):
assert serializer.data == expected
def test_activity_pub_album_serializer_from_ap(factories):
activity = factories["federation.Activity"]()
published = timezone.now()
released = timezone.now().date()
data = {
"type": "Album",
"id": "http://hello.album",
"name": "Purple album",
"musicbrainzId": str(uuid.uuid4()),
"published": published.isoformat(),
"released": released.isoformat(),
"artists": [
{
"type": "Artist",
"id": "http://hello.artist",
"name": "John Smith",
"musicbrainzId": str(uuid.uuid4()),
"published": published.isoformat(),
}
],
}
serializer = serializers.AlbumSerializer(data=data, context={"activity": activity})
assert serializer.is_valid(raise_exception=True)
album = serializer.save()
artist = album.artist
assert album.from_activity == activity
assert album.title == data["name"]
assert album.fid == data["id"]
assert str(album.mbid) == data["musicbrainzId"]
assert album.creation_date == published
assert album.release_date == released
assert artist.from_activity == activity
assert artist.name == data["artists"][0]["name"]
assert artist.fid == data["artists"][0]["id"]
assert str(artist.mbid) == data["artists"][0]["musicbrainzId"]
assert artist.creation_date == published
def test_activity_pub_track_serializer_to_ap(factories):
track = factories["music.Track"]()
expected = {
......@@ -753,7 +655,7 @@ def test_activity_pub_track_serializer_to_ap(factories):
assert serializer.data == expected
def test_activity_pub_track_serializer_from_ap(factories):
def test_activity_pub_track_serializer_from_ap(factories, r_mock):
activity = factories["federation.Activity"]()
published = timezone.now()
released = timezone.now().date()
......@@ -771,6 +673,11 @@ def test_activity_pub_track_serializer_from_ap(factories):
"musicbrainzId": str(uuid.uuid4()),
"published": published.isoformat(),
"released": released.isoformat(),
"cover": {
"type": "Link",
"href": "https://cover.image/test.png",
"mediaType": "image/png",
},
"artists": [
{
"type": "Artist",
......@@ -791,12 +698,14 @@ def test_activity_pub_track_serializer_from_ap(factories):
}
],
}
r_mock.get(data["album"]["cover"]["href"], body=io.BytesIO(b"coucou"))
serializer = serializers.TrackSerializer(data=data, context={"activity": activity})
assert serializer.is_valid(raise_exception=True)
track = serializer.save()
album = track.album
artist = track.artist
album_artist = track.album.artist
assert track.from_activity == activity
assert track.fid == data["id"]
......@@ -806,7 +715,8 @@ def test_activity_pub_track_serializer_from_ap(factories):
assert str(track.mbid) == data["musicbrainzId"]
assert album.from_activity == activity
assert album.cover.read() == b"coucou"
assert album.cover.path.endswith(".png")
assert album.title == data["album"]["name"]
assert album.fid == data["album"]["id"]
assert str(album.mbid) == data["album"]["musicbrainzId"]
......@@ -819,6 +729,12 @@ def test_activity_pub_track_serializer_from_ap(factories):
assert str(artist.mbid) == data["artists"][0]["musicbrainzId"]
assert artist.creation_date == published
assert album_artist.from_activity == activity
assert album_artist.name == data["album"]["artists"][0]["name"]
assert album_artist.fid == data["album"]["artists"][0]["id"]
assert str(album_artist.mbid) == data["album"]["artists"][0]["musicbrainzId"]
assert album_artist.creation_date == published
def test_activity_pub_upload_serializer_from_ap(factories, mocker):
activity = factories["federation.Activity"]()
......
No preview for this file type
......@@ -9,21 +9,46 @@ from funkwhale_api.music import metadata
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
def test_get_all_metadata_at_once():
path = os.path.join(DATA_DIR, "test.ogg")
data = metadata.Metadata(path)
expected = {
"title": "Peer Gynt Suite no. 1, op. 46: I. Morning",
"artist": "Edvard Grieg",
"album_artist": "Edvard Grieg",
"album": "Peer Gynt Suite no. 1, op. 46",
"date": datetime.date(2012, 8, 15),
"track_number": 1,
"musicbrainz_albumid": uuid.UUID("a766da8b-8336-47aa-a3ee-371cc41ccc75"),
"musicbrainz_recordingid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"),
"musicbrainz_artistid": uuid.UUID("013c8e5b-d72a-4cd3-8dee-6c64d6125823"),
"musicbrainz_albumartistid": uuid.UUID("013c8e5b-d72a-4cd3-8dee-6c64d6125823"),
}
assert data.all() == expected
@pytest.mark.parametrize(
"field,value",
[
("title", "Peer Gynt Suite no. 1, op. 46: I. Morning"),
("artist", "Edvard Grieg"),
("album_artist", "Edvard Grieg"),
("album", "Peer Gynt Suite no. 1, op. 46"),
("date", datetime.date(2012, 8, 15)),
("track_number", 1),
("musicbrainz_albumid", uuid.UUID("a766da8b-8336-47aa-a3ee-371cc41ccc75")),
("musicbrainz_recordingid", uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656")),
("musicbrainz_artistid", uuid.UUID("013c8e5b-d72a-4cd3-8dee-6c64d6125823")),
(
"musicbrainz_albumartistid",
uuid.UUID("013c8e5b-d72a-4cd3-8dee-6c64d6125823"),
),
],
)
def test_can_get_metadata_from_opus_file(field, value):
path = os.path.join(DATA_DIR, "test.opus")
def test_can_get_metadata_from_ogg_file(field, value):
path = os.path.join(DATA_DIR, "test.ogg")
data = metadata.Metadata(path)