Commit 125d0eed authored by Eliot Berriot's avatar Eliot Berriot 💬

Federation scanning

parent a3875e39
......@@ -14,9 +14,23 @@ class NestedLibraryFollowSerializer(serializers.ModelSerializer):
fields = ["creation_date", "uuid", "fid", "approved", "modification_date"]
class LibraryScanSerializer(serializers.ModelSerializer):
class Meta:
model = music_models.LibraryScan
fields = [
"total_files",
"processed_files",
"errored_files",
"status",
"creation_date",
"modification_date",
]
class LibrarySerializer(serializers.ModelSerializer):
actor = federation_serializers.APIActorSerializer()
uploads_count = serializers.SerializerMethodField()
latest_scan = serializers.SerializerMethodField()
follow = serializers.SerializerMethodField()
class Meta:
......@@ -31,6 +45,7 @@ class LibrarySerializer(serializers.ModelSerializer):
"uploads_count",
"privacy_level",
"follow",
"latest_scan",
]
def get_uploads_count(self, o):
......@@ -42,6 +57,11 @@ class LibrarySerializer(serializers.ModelSerializer):
except (AttributeError, IndexError):
return None
def get_latest_scan(self, o):
scan = o.scans.order_by("-creation_date").first()
if scan:
return LibraryScanSerializer(scan).data
class LibraryFollowSerializer(serializers.ModelSerializer):
target = common_serializers.RelatedField("uuid", LibrarySerializer(), required=True)
......@@ -54,6 +74,9 @@ class LibraryFollowSerializer(serializers.ModelSerializer):
def validate_target(self, v):
actor = self.context["actor"]
if v.actor == actor:
raise serializers.ValidationError("You cannot follow your own library")
if v.received_follows.filter(actor=actor).exists():
raise serializers.ValidationError("You are already following this library")
return v
......
......@@ -31,13 +31,14 @@ class LibraryFollowViewSet(
mixins.CreateModelMixin,
mixins.ListModelMixin,
mixins.RetrieveModelMixin,
mixins.DestroyModelMixin,
viewsets.GenericViewSet,
):
lookup_field = "uuid"
queryset = (
models.LibraryFollow.objects.all()
.order_by("-creation_date")
.select_related("target__actor", "actor")
.select_related("actor", "target__actor")
)
serializer_class = api_serializers.LibraryFollowSerializer
permission_classes = [permissions.IsAuthenticated]
......@@ -52,6 +53,13 @@ class LibraryFollowViewSet(
follow = serializer.save(actor=self.request.user.actor)
routes.outbox.dispatch({"type": "Follow"}, context={"follow": follow})
@transaction.atomic
def perform_destroy(self, instance):
routes.outbox.dispatch(
{"type": "Undo", "object": {"type": "Follow"}}, context={"follow": instance}
)
instance.delete()
def get_serializer_context(self):
context = super().get_serializer_context()
context["actor"] = self.request.user.actor
......@@ -96,8 +104,25 @@ class LibraryViewSet(mixins.RetrieveModelMixin, viewsets.GenericViewSet):
qs = super().get_queryset()
return qs.viewable_by(actor=self.request.user.actor)
@decorators.list_route(methods=["post"])
@decorators.detail_route(methods=["post"])
def scan(self, request, *args, **kwargs):
library = self.get_object()
if library.actor.is_local:
return response.Response({"status": "skipped"}, 200)
scan = library.schedule_scan(actor=request.user.actor)
if scan:
return response.Response(
{
"status": "scheduled",
"scan": api_serializers.LibraryScanSerializer(scan).data,
},
200,
)
return response.Response({"status": "skipped"}, 200)
@decorators.list_route(methods=["post"])
def fetch(self, request, *args, **kwargs):
try:
fid = request.data["fid"]
except KeyError:
......@@ -110,7 +135,7 @@ class LibraryViewSet(mixins.RetrieveModelMixin, viewsets.GenericViewSet):
)
except requests.exceptions.RequestException as e:
return response.Response(
{"detail": "Error while scanning the library: {}".format(str(e))},
{"detail": "Error while fetching the library: {}".format(str(e))},
status=400,
)
except serializers.serializers.ValidationError as e:
......
......@@ -90,7 +90,7 @@ def get_library_data(library_url, actor):
return {"errors": ["Permission denied while scanning library"]}
elif scode >= 400:
return {"errors": ["Error {} while fetching the library".format(scode)]}
serializer = serializers.PaginatedCollectionSerializer(data=response.json())
serializer = serializers.LibrarySerializer(data=response.json())
if not serializer.is_valid():
return {"errors": ["Invalid ActivityPub response from remote library"]}
......
......@@ -77,6 +77,38 @@ def outbox_accept(context):
}
@inbox.register({"type": "Undo", "object.type": "Follow"})
def inbox_undo_follow(payload, context):
serializer = serializers.UndoFollowSerializer(data=payload, context=context)
if not serializer.is_valid(raise_exception=context.get("raise_exception", False)):
logger.debug(
"Discarding invalid follow undo from {}: %s",
context["actor"].fid,
serializer.errors,
)
return
serializer.save()
@outbox.register({"type": "Undo", "object.type": "Follow"})
def outbox_undo_follow(context):
follow = context["follow"]
actor = follow.actor
if follow._meta.label == "federation.LibraryFollow":
recipient = follow.target.actor
else:
recipient = follow.target
payload = serializers.UndoFollowSerializer(follow, context={"actor": actor}).data
yield {
"actor": actor,
"type": "Undo",
"payload": with_recipients(payload, to=[recipient]),
"object": follow,
"related_object": follow.target,
}
@outbox.register({"type": "Follow"})
def outbox_follow(context):
follow = context["follow"]
......
......@@ -343,7 +343,7 @@ class AcceptFollowSerializer(serializers.Serializer):
follow.approved = True
follow.save()
if follow.target._meta.label == "music.Library":
follow.target.schedule_scan()
follow.target.schedule_scan(actor=follow.actor)
return follow
......@@ -354,7 +354,8 @@ class UndoFollowSerializer(serializers.Serializer):
type = serializers.ChoiceField(choices=["Undo"])
def validate_actor(self, v):
expected = self.context.get("follow_target")
expected = self.context.get("actor")
if expected and expected.fid != v:
raise serializers.ValidationError("Invalid actor")
try:
......@@ -366,11 +367,19 @@ class UndoFollowSerializer(serializers.Serializer):
# we ensure the accept actor actually match the follow actor
if validated_data["actor"] != validated_data["object"]["actor"]:
raise serializers.ValidationError("Actor mismatch")
target = validated_data["object"]["object"]
if target._meta.label == "music.Library":
follow_class = models.LibraryFollow
else:
follow_class = models.Follow
try:
validated_data["follow"] = models.Follow.objects.filter(
actor=validated_data["actor"], target=validated_data["object"]["object"]
validated_data["follow"] = follow_class.objects.filter(
actor=validated_data["actor"], target=target
).get()
except models.Follow.DoesNotExist:
except follow_class.DoesNotExist:
raise serializers.ValidationError("No follow to remove")
return validated_data
......@@ -545,7 +554,7 @@ class LibrarySerializer(PaginatedCollectionSerializer):
"summary": library.description,
"page_size": 100,
"actor": library.actor,
"items": library.uploads.filter(import_status="finished"),
"items": library.uploads.for_federation(),
"type": "Library",
}
r = super().to_representation(conf)
......@@ -599,9 +608,10 @@ class CollectionPageSerializer(serializers.Serializer):
raw_items = [item_serializer(data=i, context=self.context) for i in v]
valid_items = []
for i in raw_items:
if i.is_valid():
try:
i.is_valid(raise_exception=True)
valid_items.append(i)
else:
except serializers.ValidationError:
logger.debug("Invalid item %s: %s", i.data, i.errors)
return valid_items
......
......@@ -191,7 +191,7 @@ class MusicLibraryViewSet(
authentication_classes = [authentication.SignatureAuthentication]
permission_classes = []
renderer_classes = [renderers.ActivityPubRenderer]
serializer_class = serializers.PaginatedCollectionSerializer
serializer_class = serializers.LibrarySerializer
queryset = music_models.Library.objects.all().select_related("actor")
lookup_field = "uuid"
......@@ -203,7 +203,7 @@ class MusicLibraryViewSet(
"actor": lb.actor,
"name": lb.name,
"summary": lb.description,
"items": lb.uploads.order_by("-creation_date"),
"items": lb.uploads.for_federation().order_by("-creation_date"),
"item_serializer": serializers.UploadSerializer,
}
page = request.GET.get("page")
......
......@@ -78,9 +78,37 @@ class UploadAdmin(admin.ModelAdmin):
list_filter = ["mimetype", "import_status", "library__privacy_level"]
def launch_scan(modeladmin, request, queryset):
for library in queryset:
library.schedule_scan(actor=request.user.actor, force=True)
launch_scan.short_description = "Launch scan"
@admin.register(models.Library)
class LibraryAdmin(admin.ModelAdmin):
list_display = ["id", "name", "actor", "uuid", "privacy_level", "creation_date"]
list_select_related = True
search_fields = ["actor__username", "name", "description"]
list_filter = ["privacy_level"]
actions = [launch_scan]
@admin.register(models.LibraryScan)
class LibraryScanAdmin(admin.ModelAdmin):
list_display = [
"id",
"library",
"actor",
"status",
"creation_date",
"modification_date",
"status",
"total_files",
"processed_files",
"errored_files",
]
list_select_related = True
search_fields = ["actor__username", "library__name"]
list_filter = ["status"]
import datetime
import logging
import mimetypes
import os
import tempfile
import uuid
......@@ -553,25 +554,8 @@ class Track(APIModelMixin):
class UploadQuerySet(models.QuerySet):
def playable_by(self, actor, include=True):
from funkwhale_api.federation.models import LibraryFollow
if actor is None:
libraries = Library.objects.filter(privacy_level="everyone")
libraries = Library.objects.viewable_by(actor)
else:
me_query = models.Q(privacy_level="me", actor=actor)
instance_query = models.Q(
privacy_level="instance", actor__domain=actor.domain
)
followed_libraries = LibraryFollow.objects.filter(
actor=actor, approved=True
).values_list("target", flat=True)
libraries = Library.objects.filter(
me_query
| instance_query
| models.Q(privacy_level="everyone")
| models.Q(pk__in=followed_libraries)
)
if include:
return self.filter(library__in=libraries)
return self.exclude(library__in=libraries)
......@@ -579,6 +563,9 @@ class UploadQuerySet(models.QuerySet):
def local(self, include=True):
return self.exclude(library__actor__user__isnull=include)
def for_federation(self):
return self.filter(import_status="finished", mimetype__startswith="audio/")
TRACK_FILE_IMPORT_STATUS_CHOICES = (
("pending", "Pending"),
......@@ -731,8 +718,11 @@ class Upload(models.Model):
}
def save(self, **kwargs):
if not self.mimetype and self.audio_file:
self.mimetype = utils.guess_mimetype(self.audio_file)
if not self.mimetype:
if self.audio_file:
self.mimetype = utils.guess_mimetype(self.audio_file)
elif self.source and self.source.startswith("file://"):
self.mimetype = mimetypes.guess_type(self.source)[0]
if not self.size and self.audio_file:
self.size = self.audio_file.size
if not self.pk and not self.fid and self.library.actor.is_local:
......@@ -869,6 +859,24 @@ class LibraryQuerySet(models.QuerySet):
)
)
def viewable_by(self, actor):
from funkwhale_api.federation.models import LibraryFollow
if actor is None:
return Library.objects.filter(privacy_level="everyone")
me_query = models.Q(privacy_level="me", actor=actor)
instance_query = models.Q(privacy_level="instance", actor__domain=actor.domain)
followed_libraries = LibraryFollow.objects.filter(
actor=actor, approved=True
).values_list("target", flat=True)
return Library.objects.filter(
me_query
| instance_query
| models.Q(privacy_level="everyone")
| models.Q(pk__in=followed_libraries)
)
class Library(federation_models.FederationMixin):
uuid = models.UUIDField(unique=True, db_index=True, default=uuid.uuid4)
......@@ -904,14 +912,20 @@ class Library(federation_models.FederationMixin):
return True
return False
def schedule_scan(self):
latest_scan = self.scans.order_by("-creation_date").first()
def schedule_scan(self, actor, force=False):
latest_scan = (
self.scans.exclude(status="errored").order_by("-creation_date").first()
)
delay_between_scans = datetime.timedelta(seconds=3600 * 24)
now = timezone.now()
if latest_scan and latest_scan.creation_date + delay_between_scans > now:
if (
not force
and latest_scan
and latest_scan.creation_date + delay_between_scans > now
):
return
scan = self.scans.create(total_files=self.uploads_count)
scan = self.scans.create(total_files=self.uploads_count, actor=actor)
from . import tasks
common_utils.on_commit(tasks.start_library_scan.delay, library_scan_id=scan.pk)
......@@ -921,6 +935,7 @@ class Library(federation_models.FederationMixin):
SCAN_STATUS = [
("pending", "pending"),
("scanning", "scanning"),
("errored", "errored"),
("finished", "finished"),
]
......
......@@ -29,7 +29,6 @@ logger = logging.getLogger(__name__)
def update_album_cover(album, source=None, cover_data=None, replace=False):
if album.cover and not replace:
return
if cover_data:
return album.get_image(data=cover_data)
......@@ -118,17 +117,17 @@ def import_batch_notify_followers(import_batch):
activity.deliver(create, on_behalf_of=library_actor, to=[f.url])
@celery.app.task(
name="music.start_library_scan",
retry_backoff=60,
max_retries=5,
autoretry_for=[RequestException],
)
@celery.app.task(name="music.start_library_scan")
@celery.require_instance(
models.LibraryScan.objects.select_related().filter(status="pending"), "library_scan"
)
def start_library_scan(library_scan):
data = lb.get_library_data(library_scan.library.fid, actor=library_scan.actor)
try:
data = lb.get_library_data(library_scan.library.fid, actor=library_scan.actor)
except Exception:
library_scan.status = "errored"
library_scan.save(update_fields=["status", "modification_date"])
raise
library_scan.modification_date = timezone.now()
library_scan.status = "scanning"
library_scan.total_files = data["totalItems"]
......@@ -152,10 +151,6 @@ def scan_library_page(library_scan, page_url):
for item_serializer in data["items"]:
upload = item_serializer.save(library=library_scan.library)
if upload.import_status == "pending" and not upload.track:
# this track is not matched to any musicbrainz or other musical
# metadata
process_upload.delay(upload_id=upload.pk)
uploads.append(upload)
library_scan.processed_files = F("processed_files") + len(uploads)
......
......@@ -271,6 +271,15 @@ def media_root(settings):
shutil.rmtree(tmp_dir)
@pytest.fixture(autouse=True)
def disabled_musicbrainz(mocker):
# we ensure no music brainz requests gets out
yield mocker.patch(
"musicbrainzngs.musicbrainz._safe_read",
side_effect=Exception("Disabled network calls"),
)
@pytest.fixture(autouse=True)
def r_mock(requests_mock):
"""
......
import pytest
from funkwhale_api.federation import api_serializers
from funkwhale_api.federation import serializers
......@@ -14,6 +16,7 @@ def test_library_serializer(factories):
"uploads_count": library.uploads_count,
"privacy_level": library.privacy_level,
"follow": None,
"latest_scan": None,
}
serializer = api_serializers.LibrarySerializer(library)
......@@ -21,6 +24,16 @@ def test_library_serializer(factories):
assert serializer.data == expected
def test_library_serializer_latest_scan(factories):
library = factories["music.Library"](uploads_count=5678)
scan = factories["music.LibraryScan"](library=library)
setattr(library, "latest_scans", [scan])
expected = api_serializers.LibraryScanSerializer(scan).data
serializer = api_serializers.LibrarySerializer(library)
assert serializer.data["latest_scan"] == expected
def test_library_serializer_with_follow(factories):
library = factories["music.Library"](uploads_count=5678)
follow = factories["federation.LibraryFollow"](target=library)
......@@ -36,6 +49,7 @@ def test_library_serializer_with_follow(factories):
"uploads_count": library.uploads_count,
"privacy_level": library.privacy_level,
"follow": api_serializers.NestedLibraryFollowSerializer(follow).data,
"latest_scan": None,
}
serializer = api_serializers.LibrarySerializer(library)
......@@ -43,7 +57,7 @@ def test_library_serializer_with_follow(factories):
assert serializer.data == expected
def test_library_serializer_validates_existing_follow(factories):
def test_library_follow_serializer_validates_existing_follow(factories):
follow = factories["federation.LibraryFollow"]()
serializer = api_serializers.LibraryFollowSerializer(
data={"target": follow.target.uuid}, context={"actor": follow.actor}
......@@ -53,6 +67,16 @@ def test_library_serializer_validates_existing_follow(factories):
assert "target" in serializer.errors
def test_library_follow_serializer_do_not_allow_own_library(factories):
actor = factories["federation.Actor"]()
library = factories["music.Library"](actor=actor)
serializer = api_serializers.LibraryFollowSerializer(context={"actor": actor})
with pytest.raises(api_serializers.serializers.ValidationError) as e:
serializer.validate_target(library)
assert "own library" in str(e)
def test_manage_upload_action_read(factories):
ii = factories["federation.InboxItem"]()
s = api_serializers.InboxItemActionSerializer(queryset=None)
......
......@@ -20,12 +20,12 @@ def test_user_can_list_their_library_follows(factories, logged_in_api_client):
assert response.data["results"][0]["uuid"] == str(follow.uuid)
def test_user_can_scan_library_using_url(mocker, factories, logged_in_api_client):
def test_user_can_fetch_library_using_url(mocker, factories, logged_in_api_client):
library = factories["music.Library"]()
mocked_retrieve = mocker.patch(
"funkwhale_api.federation.utils.retrieve", return_value=library
)
url = reverse("api:v1:federation:libraries-scan")
url = reverse("api:v1:federation:libraries-fetch")
response = logged_in_api_client.post(url, {"fid": library.fid})
assert mocked_retrieve.call_count == 1
args = mocked_retrieve.call_args
......@@ -36,6 +36,22 @@ def test_user_can_scan_library_using_url(mocker, factories, logged_in_api_client
assert response.data["results"] == [api_serializers.LibrarySerializer(library).data]
def test_user_can_schedule_library_scan(mocker, factories, logged_in_api_client):
actor = logged_in_api_client.user.create_actor()
library = factories["music.Library"](privacy_level="everyone")
schedule_scan = mocker.patch(
"funkwhale_api.music.models.Library.schedule_scan", return_value=True
)
url = reverse("api:v1:federation:libraries-scan", kwargs={"uuid": library.uuid})
response = logged_in_api_client.post(url)
assert response.status_code == 200
schedule_scan.assert_called_once_with(actor=actor)
def test_can_follow_library(factories, logged_in_api_client, mocker):
dispatch = mocker.patch("funkwhale_api.federation.routes.outbox.dispatch")
actor = logged_in_api_client.user.create_actor()
......@@ -53,6 +69,24 @@ def test_can_follow_library(factories, logged_in_api_client, mocker):
dispatch.assert_called_once_with({"type": "Follow"}, context={"follow": follow})
def test_can_undo_library_follow(factories, logged_in_api_client, mocker):
dispatch = mocker.patch("funkwhale_api.federation.routes.outbox.dispatch")
actor = logged_in_api_client.user.create_actor()
follow = factories["federation.LibraryFollow"](actor=actor)
delete = mocker.patch.object(follow.__class__, "delete")
url = reverse(
"api:v1:federation:library-follows-detail", kwargs={"uuid": follow.uuid}
)
response = logged_in_api_client.delete(url)
assert response.status_code == 204
delete.assert_called_once_with()
dispatch.assert_called_once_with(
{"type": "Undo", "object": {"type": "Follow"}}, context={"follow": follow}
)
@pytest.mark.parametrize("action", ["accept", "reject"])
def test_user_cannot_edit_someone_else_library_follow(
factories, logged_in_api_client, action
......
......@@ -11,6 +11,7 @@ from funkwhale_api.federation import routes, serializers
({"type": "Create", "object.type": "Audio"}, routes.inbox_create_audio),
({"type": "Delete", "object.type": "Library"}, routes.inbox_delete_library),
({"type": "Delete", "object.type": "Audio"}, routes.inbox_delete_audio),
({"type": "Undo", "object.type": "Follow"}, routes.inbox_undo_follow),
],
)
def test_inbox_routes(route, handler):
......@@ -30,6 +31,7 @@ def test_inbox_routes(route, handler):
({"type": "Create", "object.type": "Audio"}, routes.outbox_create_audio),
({"type": "Delete", "object.type": "Library"}, routes.outbox_delete_library),
({"type": "Delete", "object.type": "Audio"}, routes.outbox_delete_audio),
({"type": "Undo", "object.type": "Follow"}, routes.outbox_undo_follow),
],
)
def test_outbox_routes(route, handler):
......@@ -148,7 +150,7 @@ def test_inbox_accept(factories, mocker):
follow.refresh_from_db()
assert follow.approved is True
mocked_scan.assert_called_once_with()
mocked_scan.assert_called_once_with(actor=follow.actor)
def test_outbox_follow_library(factories, mocker):
......@@ -311,3 +313,43 @@ def test_outbox_delete_audio(factories):
assert dict(activity["payload"]) == dict(expected)
assert activity["actor"] == upload.library.actor
def test_inbox_delete_follow_library(factories):
local_actor = factories["users.User"]().create_actor()
remote_actor = factories["federation.Actor"]()
follow = factories["federation.LibraryFollow"](
actor=local_actor, target__actor=remote_actor, approved=True
)
assert follow.approved is True
serializer = serializers.UndoFollowSerializer(
follow, context={"actor": local_actor}
)
ii = factories["federation.InboxItem"](actor=local_actor)
routes.inbox_undo_follow(
serializer.data,
context={"actor": local_actor, "inbox_items": [ii], "raise_exception": True},
)
with pytest.raises(follow.__class__.DoesNotExist):
follow.refresh_from_db()
def test_outbox_delete_follow_library(factories):
remote_actor = factories["federation.Actor"]()
local_actor = factories["federation.Actor"](local=True)
follow = factories["federation.LibraryFollow"](
actor=local_actor, target__actor=remote_actor
)
activity = list(routes.outbox_undo_follow({"follow": follow}))[0]
serializer = serializers.UndoFollowSerializer(
follow, context={"actor": follow.actor}
)
expected = serializer.data
expected["to"] = [follow.target.actor]
assert activity["payload"] == expected
assert activity["actor"] == follow.actor
assert activity["object"] == follow
assert activity["related_object"] == follow.target
......@@ -736,7 +736,7 @@ def test_activity_pub_track_serializer_from_ap(factories, r_mock):
assert album_artist.creation_date == published
def test_activity_pub_upload_serializer_from_ap(factories, mocker):
def test_activity_pub_upload_serializer_from_ap(factories, mocker, r_mock):
activity = factories["federation.Activity"]()
library = factories["music.Library"]()
......@@ -769,6 +769,11 @@ def test_activity_pub_upload_serializer_from_ap(factories, mocker):
"musicbrainzId": str(uuid.uuid4()),
"published": published.isoformat(),
"released": released.isoformat(),
"cover": {
"type": "Link",
"href": "https://cover.image/test.png",
"mediaType": "image/png",
},
"artists": [
{
"type": "Artist",
......@@ -790,6 +795,7 @@ def test_activity_pub_upload_serializer_from_ap(factories, mocker):
],
},
}
r_mock.get(data["track"]["album"]["cover"]["href"], body=io.BytesIO(b"coucou"))
serializer = serializers.UploadSerializer(data=data, context={"activity": activity})
assert serializer.is_valid(raise_exception=True)
......
......@@ -149,7 +149,7 @@ def test_music_library_retrieve(factories, api_client, privacy_level):
def test_music_library_retrieve_page_public(factories, api_client):
library = factories["music.Library"](privacy_level="everyone")
upload = factories["music.Upload"](library=library)
upload = factories["music.Upload"](library=library, import_status="finished")
id = library.get_federation_id()
expected = serializers.CollectionPageSerializer(
{
......
......@@ -388,11 +388,12 @@ def test_library_schedule_scan(factories, now, mocker):
on_commit = mocker.patch("funkwhale_api.common.utils.on_commit")
library = factories["music.Library"](uploads_count=5)
scan = library.schedule_scan()
scan = library.schedule_scan(library.actor)
assert scan.creation_date >= now
assert scan.status == "pending"
assert scan.library == library
assert scan.actor == library.actor