From 4d6e894b6291684b97a42d0d864020137f800715 Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Thu, 5 Apr 2018 23:22:28 +0200
Subject: [PATCH] AudioCollection to import job and track file creation

---
 api/funkwhale_api/music/serializers.py |  81 ++++++++++++++++++++
 api/funkwhale_api/music/tasks.py       |  48 +++++++++++-
 api/tests/music/test_import.py         | 101 +++++++++++++++++++++++++
 api/tests/music/test_serializers.py    |  32 ++++++++
 4 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 api/tests/music/test_serializers.py

diff --git a/api/funkwhale_api/music/serializers.py b/api/funkwhale_api/music/serializers.py
index 48419bbe..9f0b7af5 100644
--- a/api/funkwhale_api/music/serializers.py
+++ b/api/funkwhale_api/music/serializers.py
@@ -1,3 +1,4 @@
+from django.db import transaction
 from rest_framework import serializers
 from taggit.models import Tag
 
@@ -150,3 +151,83 @@ class TrackActivitySerializer(activity_serializers.ModelSerializer):
 
     def get_type(self, obj):
         return 'Audio'
+
+
+class AudioMetadataSerializer(serializers.Serializer):
+    artist = serializers.CharField(required=False)
+    release = serializers.CharField(required=False)
+    recording = serializers.CharField(required=False)
+
+
+class AudioSerializer(serializers.Serializer):
+    type = serializers.CharField()
+    id = serializers.URLField()
+    url = serializers.JSONField()
+    metadata = AudioMetadataSerializer()
+
+    def validate_type(self, v):
+        if v != 'Audio':
+            raise serializers.ValidationError('Invalid type for audio')
+        return v
+
+    def validate_url(self, v):
+        try:
+            url = v['href']
+        except (KeyError, TypeError):
+            raise serializers.ValidationError('Missing href')
+
+        try:
+            media_type = v['mediaType']
+        except (KeyError, TypeError):
+            raise serializers.ValidationError('Missing mediaType')
+
+        if not media_type.startswith('audio/'):
+            raise serializers.ValidationError('Invalid mediaType')
+
+        return url
+
+    def validate_url(self, v):
+        try:
+            url = v['href']
+        except (KeyError, TypeError):
+            raise serializers.ValidationError('Missing href')
+
+        try:
+            media_type = v['mediaType']
+        except (KeyError, TypeError):
+            raise serializers.ValidationError('Missing mediaType')
+
+        if not media_type.startswith('audio/'):
+            raise serializers.ValidationError('Invalid mediaType')
+
+        return v
+
+    def create(self, validated_data, batch):
+        metadata = validated_data['metadata'].copy()
+        metadata['mediaType'] = validated_data['url']['mediaType']
+        return models.ImportJob.objects.create(
+            batch=batch,
+            source=validated_data['url']['href'],
+            federation_source=validated_data['id'],
+            metadata=metadata,
+        )
+
+
+class AudioCollectionImportSerializer(serializers.Serializer):
+    id = serializers.URLField()
+    items = serializers.ListField(
+        child=AudioSerializer(),
+        min_length=1,
+    )
+
+    @transaction.atomic
+    def create(self, validated_data):
+        batch = models.ImportBatch.objects.create(
+            federation_actor=self.context['sender'],
+            federation_source=validated_data['id'],
+            source='federation',
+        )
+        for i in validated_data['items']:
+            s = AudioSerializer(data=i)
+            job = s.create(i, batch)
+        return batch
diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py
index bf7a847d..e4214d99 100644
--- a/api/funkwhale_api/music/tasks.py
+++ b/api/funkwhale_api/music/tasks.py
@@ -25,6 +25,44 @@ def set_acoustid_on_track_file(track_file):
         return update(result['id'])
 
 
+def get_mbid(url, type):
+    prefix = 'https://musicbrainz.org/{}/'.format(type)
+    if url.startswith(prefix):
+        return url.replace(prefix, '')
+
+
+def import_track_from_metadata(metadata):
+    raw_track = metadata['recording']
+    if isinstance(raw_track, str):
+        track_mbid = get_mbid(raw_track, 'recording')
+        return models.Track.get_or_create_from_api(mbid=track_mbid)
+
+    raw_album = metadata['release']
+    if isinstance(raw_album, str):
+        album_mbid = get_mbid(raw_album, 'release')
+        album = models.Album.get_or_create_from_api(mbid=album_mbid)
+        return models.Track.get_or_create_from_title(
+            raw_track['title'], artist=album.artist, album=album)
+
+    raw_artist = metadata['artist']
+    if isinstance(raw_artist, str):
+        artist_mbid = get_mbid(raw_artist, 'artist')
+        artist = models.Artist.get_or_create_from_api(mbid=artist_mbid)
+        album = models.Album.get_or_create_from_title(
+            raw_album['title'], artist=artist)
+        return models.Track.get_or_create_from_title(
+            raw_track['title'], artist=artist, album=album)
+
+    # worst case scenario, we have absolutely no way to link to a
+    # musicbrainz resource, we rely on the name/titles
+    artist = models.Artist.get_or_create_from_name(
+        raw_artist['name'])
+    album = models.Album.get_or_create_from_title(
+        raw_album['title'], artist=artist)
+    return models.Track.get_or_create_from_title(
+        raw_track['title'], artist=artist, album=album)
+
+
 def _do_import(import_job, replace, use_acoustid=True):
     from_file = bool(import_job.audio_file)
     mbid = import_job.mbid
@@ -43,9 +81,14 @@ def _do_import(import_job, replace, use_acoustid=True):
             acoustid_track_id = match['id']
     if mbid:
         track, _ = models.Track.get_or_create_from_api(mbid=mbid)
-    else:
+    elif import_job.audio_file:
         track = import_track_data_from_path(import_job.audio_file.path)
+    else:
+        # probably federation, we use metadata stored on the job itself
+        if not import_job.metadata:
+            raise ValueError('We cannot import without at least metadatas')
 
+        track = import_track_from_metadata(import_job.metadata)
     track_file = None
     if replace:
         track_file = track.files.first()
@@ -63,6 +106,9 @@ def _do_import(import_job, replace, use_acoustid=True):
         track_file.audio_file = ContentFile(import_job.audio_file.read())
         track_file.audio_file.name = import_job.audio_file.name
         track_file.duration = duration
+    elif import_job.federation_source:
+        # no downloading, we hotlink
+        pass
     else:
         track_file.download_file()
     track_file.save()
diff --git a/api/tests/music/test_import.py b/api/tests/music/test_import.py
index 0f709e81..e9ad9d0f 100644
--- a/api/tests/music/test_import.py
+++ b/api/tests/music/test_import.py
@@ -1,7 +1,10 @@
 import json
+import pytest
 
 from django.urls import reverse
 
+from funkwhale_api.music import tasks
+
 
 def test_create_import_can_bind_to_request(
         artists, albums, mocker, factories, superuser_api_client):
@@ -33,3 +36,101 @@ def test_create_import_can_bind_to_request(
     batch = request.import_batches.latest('id')
 
     assert batch.import_request == request
+
+
+@pytest.mark.parametrize('url,type,expected', [
+    ('https://musicbrainz.org/artist/test', 'artist', 'test'),
+    ('https://musicbrainz.org/release/test', 'release', 'test'),
+    ('https://musicbrainz.org/recording/test', 'recording', 'test'),
+    ('https://musicbrainz.org/recording/test', 'artist', None),
+])
+def test_get_mbid(url, type, expected):
+    assert tasks.get_mbid(url, type) == expected
+
+
+def test_import_job_from_federation_no_musicbrainz(factories):
+    job = factories['music.ImportJob'](
+        federation=True,
+        metadata__artist={'name': 'Hello'},
+        metadata__release={'title': 'World'},
+        metadata__recording={'title': 'Ping'},
+        mbid=None,
+    )
+
+    tasks.import_job_run(import_job_id=job.pk)
+    job.refresh_from_db()
+
+    tf = job.track_file
+    assert tf.track.title == 'Ping'
+    assert tf.track.artist.name == 'Hello'
+    assert tf.track.album.title == 'World'
+
+
+def test_import_job_from_federation_musicbrainz_recording(factories, mocker):
+    t = factories['music.Track']()
+    track_from_api = mocker.patch(
+        'funkwhale_api.music.models.Track.get_or_create_from_api',
+        return_value=t)
+    job = factories['music.ImportJob'](
+        federation=True,
+        metadata__artist={'name': 'Hello'},
+        metadata__release={'title': 'World'},
+        mbid=None,
+    )
+
+    tasks.import_job_run(import_job_id=job.pk)
+    job.refresh_from_db()
+
+    tf = job.track_file
+    assert tf.track == t
+    track_from_api.assert_called_once_with(
+        mbid=tasks.get_mbid(job.metadata['recording'], 'recording'))
+
+
+def test_import_job_from_federation_musicbrainz_release(factories, mocker):
+    a = factories['music.Album']()
+    album_from_api = mocker.patch(
+        'funkwhale_api.music.models.Album.get_or_create_from_api',
+        return_value=a)
+    job = factories['music.ImportJob'](
+        federation=True,
+        metadata__artist={'name': 'Hello'},
+        metadata__recording={'title': 'Ping'},
+        mbid=None,
+    )
+
+    tasks.import_job_run(import_job_id=job.pk)
+    job.refresh_from_db()
+
+    tf = job.track_file
+    assert tf.track.title == 'Ping'
+    assert tf.track.artist == a.artist
+    assert tf.track.album == a
+
+    album_from_api.assert_called_once_with(
+        mbid=tasks.get_mbid(job.metadata['release'], 'release'))
+
+
+def test_import_job_from_federation_musicbrainz_artist(factories, mocker):
+    a = factories['music.Artist']()
+    artist_from_api = mocker.patch(
+        'funkwhale_api.music.models.Artist.get_or_create_from_api',
+        return_value=a)
+    job = factories['music.ImportJob'](
+        federation=True,
+        metadata__release={'title': 'World'},
+        metadata__recording={'title': 'Ping'},
+        mbid=None,
+    )
+
+    tasks.import_job_run(import_job_id=job.pk)
+    job.refresh_from_db()
+
+    tf = job.track_file
+    assert tf.track.title == 'Ping'
+    assert tf.track.artist == a
+    assert tf.track.album.artist == a
+    assert tf.track.album.title == 'World'
+
+    artist_from_api.assert_called_once_with(
+        mbid=tasks.get_mbid(job.metadata['artist'], 'artist'))
diff --git a/api/tests/music/test_serializers.py b/api/tests/music/test_serializers.py
new file mode 100644
index 00000000..556ac4c0
--- /dev/null
+++ b/api/tests/music/test_serializers.py
@@ -0,0 +1,32 @@
+from funkwhale_api.music import serializers
+
+
+def test_activity_pub_audio_collection_serializer(factories):
+    sender = factories['federation.Actor']()
+
+    collection = {
+        'id': 'https://batch.import',
+        'type': 'Collection',
+        'totalItems': 2,
+        'items': factories['federation.Audio'].create_batch(size=2)
+    }
+
+    serializer = serializers.AudioCollectionImportSerializer(
+        data=collection, context={'sender': sender})
+
+    assert serializer.is_valid(raise_exception=True)
+
+    batch = serializer.save()
+    jobs = list(batch.jobs.all())
+
+    assert batch.source == 'federation'
+    assert batch.federation_source == collection['id']
+    assert batch.federation_actor == sender
+    assert len(jobs) == 2
+
+    for i, a in enumerate(collection['items']):
+        job = jobs[i]
+        assert job.federation_source == a['id']
+        assert job.source == a['url']['href']
+        a['metadata']['mediaType'] = a['url']['mediaType']
+        assert job.metadata == a['metadata']
-- 
GitLab