Commit ec99d529 authored by Agate's avatar Agate 💬

Merge branch '170-rss-subscribe' into 'develop'

See #170: subscribe to 3d-party RSS feeds in Funkwhale

See merge request !1055
parents 7cae1ae5 deb1f357
Pipeline #9928 passed with stages
in 20 minutes and 58 seconds
......@@ -580,6 +580,11 @@ CELERY_BROKER_URL = env(
CELERY_TASK_DEFAULT_RATE_LIMIT = 1
CELERY_TASK_TIME_LIMIT = 300
CELERY_BEAT_SCHEDULE = {
"audio.fetch_rss_feeds": {
"task": "audio.fetch_rss_feeds",
"schedule": crontab(minute="0", hour="*"),
"options": {"expires": 60 * 60},
},
"common.prune_unattached_attachments": {
"task": "common.prune_unattached_attachments",
"schedule": crontab(minute="0", hour="*"),
......@@ -976,3 +981,11 @@ MIN_DELAY_BETWEEN_DOWNLOADS_COUNT = env.int(
MARKDOWN_EXTENSIONS = env.list("MARKDOWN_EXTENSIONS", default=["nl2br", "extra"])
LINKIFIER_SUPPORTED_TLDS = ["audio"] + env.list("LINKINFIER_SUPPORTED_TLDS", default=[])
EXTERNAL_MEDIA_PROXY_ENABLED = env.bool("EXTERNAL_MEDIA_PROXY_ENABLED", default=True)
# By default, only people who subscribe to a podcast RSS will have access to it
# switch to "instance" or "everyone" to change that
PODCASTS_THIRD_PARTY_VISIBILITY = env("PODCASTS_THIRD_PARTY_VISIBILITY", default="me")
PODCASTS_RSS_FEED_REFRESH_DELAY = env.int(
"PODCASTS_RSS_FEED_REFRESH_DELAY", default=60 * 60 * 24
)
......@@ -109,3 +109,5 @@ ITUNES_CATEGORIES = {
"TV Reviews",
],
}
ITUNES_SUBCATEGORIES = [s for p in ITUNES_CATEGORIES.values() for s in p]
import uuid
import factory
from funkwhale_api.factories import registry, NoUpdateOnCreate
from funkwhale_api.federation import actors
from funkwhale_api.federation import factories as federation_factories
from funkwhale_api.music import factories as music_factories
......@@ -11,6 +14,10 @@ def set_actor(o):
return models.generate_actor(str(o.uuid))
def get_rss_channel_name():
return "rssfeed-{}".format(uuid.uuid4())
@registry.register
class ChannelFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
uuid = factory.Faker("uuid4")
......@@ -32,10 +39,20 @@ class ChannelFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
model = "audio.Channel"
class Params:
external = factory.Trait(
attributed_to=factory.LazyFunction(actors.get_service_actor),
library__privacy_level="me",
actor=factory.SubFactory(
federation_factories.ActorFactory,
local=True,
preferred_username=factory.LazyFunction(get_rss_channel_name),
),
)
local = factory.Trait(
attributed_to=factory.SubFactory(
federation_factories.ActorFactory, local=True
),
library__privacy_level="everyone",
artist__local=True,
)
......
......@@ -19,6 +19,19 @@ def empty_dict():
return {}
class ChannelQuerySet(models.QuerySet):
def external_rss(self, include=True):
from funkwhale_api.federation import actors
query = models.Q(
attributed_to=actors.get_service_actor(),
actor__preferred_username__startswith="rssfeed-",
)
if include:
return self.filter(query)
return self.exclude(query)
class Channel(models.Model):
uuid = models.UUIDField(default=uuid.uuid4, unique=True)
artist = models.OneToOneField(
......@@ -45,6 +58,8 @@ class Channel(models.Model):
default=empty_dict, max_length=50000, encoder=DjangoJSONEncoder, blank=True
)
objects = ChannelQuerySet.as_manager()
def get_absolute_url(self):
suffix = self.uuid
if self.actor.is_local:
......@@ -54,7 +69,9 @@ class Channel(models.Model):
return federation_utils.full_url("/channels/{}".format(suffix))
def get_rss_url(self):
if not self.artist.is_local:
if not self.artist.is_local or self.actor.preferred_username.startswith(
"rssfeed-"
):
return self.rss_url
return federation_utils.full_url(
......@@ -81,5 +98,6 @@ def generate_actor(username, **kwargs):
@receiver(post_delete, sender=Channel)
def delete_channel_related_objs(instance, **kwargs):
instance.library.delete()
instance.actor.delete()
if instance.actor != instance.attributed_to:
instance.actor.delete()
instance.artist.delete()
......@@ -21,12 +21,16 @@ class PodcastRSSRenderer(renderers.JSONRenderer):
}
final.update(data)
tree = dict_to_xml_tree("rss", final)
return b'<?xml version="1.0" encoding="UTF-8"?>\n' + ET.tostring(
tree, encoding="utf-8"
)
return render_xml(tree)
class PodcastRSSContentNegociation(negotiation.DefaultContentNegotiation):
def select_renderer(self, request, renderers, format_suffix=None):
return (PodcastRSSRenderer(), PodcastRSSRenderer.media_type)
def render_xml(tree):
return b'<?xml version="1.0" encoding="UTF-8"?>\n' + ET.tostring(
tree, encoding="utf-8"
)
This diff is collapsed.
import datetime
import logging
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from funkwhale_api.taskapp import celery
from . import models
from . import serializers
logger = logging.getLogger(__name__)
@celery.app.task(name="audio.fetch_rss_feeds")
def fetch_rss_feeds():
limit = timezone.now() - datetime.timedelta(
seconds=settings.PODCASTS_RSS_FEED_REFRESH_DELAY
)
candidates = (
models.Channel.objects.external_rss()
.filter(actor__last_fetch_date__lte=limit)
.values_list("rss_url", flat=True)
)
total = len(candidates)
logger.info("Refreshing %s rss feeds…", total)
for url in candidates:
fetch_rss_feed.delay(rss_url=url)
@celery.app.task(name="audio.fetch_rss_feed")
@transaction.atomic
def fetch_rss_feed(rss_url):
channel = (
models.Channel.objects.external_rss()
.filter(rss_url=rss_url)
.order_by("id")
.first()
)
if not channel:
logger.warn("Cannot refresh non external feed")
return
try:
serializers.get_channel_from_rss_url(rss_url)
except serializers.BlockedFeedException:
# channel was blocked since last fetch, let's delete it
logger.info("Deleting blocked channel linked to %s", rss_url)
channel.delete()
......@@ -8,12 +8,12 @@ from rest_framework import viewsets
from django import http
from django.db import transaction
from django.db.models import Count, Prefetch, Q
from django.db.utils import IntegrityError
from funkwhale_api.common import locales
from funkwhale_api.common import permissions
from funkwhale_api.common import preferences
from funkwhale_api.common.mixins import MultipleLookupDetailMixin
from funkwhale_api.federation import actors
from funkwhale_api.federation import models as federation_models
from funkwhale_api.federation import routes
from funkwhale_api.federation import utils as federation_utils
......@@ -100,17 +100,19 @@ class ChannelViewSet(
)
def subscribe(self, request, *args, **kwargs):
object = self.get_object()
subscription = federation_models.Follow(
target=object.actor, approved=True, actor=request.user.actor,
)
subscription = federation_models.Follow(actor=request.user.actor)
subscription.fid = subscription.get_federation_id()
try:
subscription.save()
except IntegrityError:
# there's already a subscription for this actor/channel
subscription = object.actor.received_follows.filter(
actor=request.user.actor
).get()
subscription, created = SubscriptionsViewSet.queryset.get_or_create(
target=object.actor,
actor=request.user.actor,
defaults={
"approved": True,
"fid": subscription.fid,
"uuid": subscription.uuid,
},
)
# prefetch stuff
subscription = SubscriptionsViewSet.queryset.get(pk=subscription.pk)
data = serializers.SubscriptionSerializer(subscription).data
return response.Response(data, status=201)
......@@ -135,6 +137,10 @@ class ChannelViewSet(
if not object.attributed_to.is_local:
return response.Response({"detail": "Not found"}, status=404)
if object.attributed_to == actors.get_service_actor():
# external feed, we redirect to the canonical one
return http.HttpResponseRedirect(object.rss_url)
uploads = (
object.library.uploads.playable_by(None)
.prefetch_related(
......@@ -170,6 +176,49 @@ class ChannelViewSet(
}
return response.Response(data)
@decorators.action(
methods=["post"],
detail=False,
url_path="rss-subscribe",
url_name="rss_subscribe",
)
@transaction.atomic
def rss_subscribe(self, request, *args, **kwargs):
serializer = serializers.RssSubscribeSerializer(data=request.data)
if not serializer.is_valid():
return response.Response(serializer.errors, status=400)
channel = (
models.Channel.objects.filter(rss_url=serializer.validated_data["url"],)
.order_by("id")
.first()
)
if not channel:
# try to retrieve the channel via its URL and create it
try:
channel, uploads = serializers.get_channel_from_rss_url(
serializer.validated_data["url"]
)
except serializers.FeedFetchException as e:
return response.Response({"detail": str(e)}, status=400,)
subscription = federation_models.Follow(actor=request.user.actor)
subscription.fid = subscription.get_federation_id()
subscription, created = SubscriptionsViewSet.queryset.get_or_create(
target=channel.actor,
actor=request.user.actor,
defaults={
"approved": True,
"fid": subscription.fid,
"uuid": subscription.uuid,
},
)
# prefetch stuff
subscription = SubscriptionsViewSet.queryset.get(pk=subscription.pk)
return response.Response(
serializers.SubscriptionSerializer(subscription).data, status=201
)
def get_serializer_context(self):
context = super().get_serializer_context()
context["subscriptions_count"] = self.action in [
......
......@@ -310,13 +310,21 @@ def render_plain_text(html):
return bleach.clean(html, tags=[], strip=True)
def same_content(old, text=None, content_type=None):
return old.text == text and old.content_type == content_type
@transaction.atomic
def attach_content(obj, field, content_data):
from . import models
content_data = content_data or {}
existing = getattr(obj, "{}_id".format(field))
if existing:
if same_content(getattr(obj, field), **content_data):
# optimization to avoid a delete/save if possible
return getattr(obj, field)
getattr(obj, field).delete()
setattr(obj, field, None)
......@@ -376,3 +384,15 @@ def attach_file(obj, field, file_data, fetch=False):
setattr(obj, field, attachment)
obj.save(update_fields=[field])
return attachment
def get_mimetype_from_ext(path):
parts = path.lower().split(".")
ext = parts[-1]
match = {
"jpeg": "image/jpeg",
"jpg": "image/jpeg",
"png": "image/png",
"gif": "image/gif",
}
return match.get(ext)
......@@ -163,6 +163,10 @@ class AttachmentViewSet(
@transaction.atomic
def proxy(self, request, *args, **kwargs):
instance = self.get_object()
if not settings.EXTERNAL_MEDIA_PROXY_ENABLED:
r = response.Response(status=302)
r["Location"] = instance.url
return r
size = request.GET.get("next", "original").lower()
if size not in ["original", "medium_square_crop"]:
......
......@@ -42,21 +42,32 @@ def get_actor(fid, skip_cache=False):
return serializer.save(last_fetch_date=timezone.now())
def get_service_actor():
_CACHE = {}
def get_service_actor(cache=True):
if cache and "service_actor" in _CACHE:
return _CACHE["service_actor"]
name, domain = (
settings.FEDERATION_SERVICE_ACTOR_USERNAME,
settings.FEDERATION_HOSTNAME,
)
try:
return models.Actor.objects.select_related().get(
actor = models.Actor.objects.select_related().get(
preferred_username=name, domain__name=domain
)
except models.Actor.DoesNotExist:
pass
else:
_CACHE["service_actor"] = actor
return actor
args = users_models.get_actor_data(name)
private, public = keys.get_key_pair()
args["private_key"] = private.decode("utf-8")
args["public_key"] = public.decode("utf-8")
args["type"] = "Service"
return models.Actor.objects.create(**args)
actor = models.Actor.objects.create(**args)
_CACHE["service_actor"] = actor
return actor
......@@ -311,6 +311,7 @@ def fetch(fetch_obj):
auth = signing.get_auth(actor.private_key, actor.private_key_id)
else:
auth = None
auth = None
try:
if url.startswith("webfinger://"):
# we first grab the correpsonding webfinger representation
......
......@@ -13,7 +13,16 @@ from funkwhale_api.moderation import models as moderation_models
from funkwhale_api.music import models as music_models
from funkwhale_api.music import utils as music_utils
from . import activity, authentication, models, renderers, serializers, utils, webfinger
from . import (
actors,
activity,
authentication,
models,
renderers,
serializers,
utils,
webfinger,
)
def redirect_to_html(public_url):
......@@ -61,6 +70,10 @@ class ActorViewSet(FederationMixin, mixins.RetrieveModelMixin, viewsets.GenericV
queryset = models.Actor.objects.local().select_related("user")
serializer_class = serializers.ActorSerializer
def get_queryset(self):
queryset = super().get_queryset()
return queryset.exclude(channel__attributed_to=actors.get_service_actor())
def retrieve(self, request, *args, **kwargs):
instance = self.get_object()
if utils.should_redirect_ap_to_html(request.headers.get("accept")):
......
......@@ -23,6 +23,13 @@ class TrackAdmin(admin.ModelAdmin):
list_select_related = ["album__artist", "artist"]
@admin.register(models.TrackActor)
class TrackActorAdmin(admin.ModelAdmin):
list_display = ["actor", "track", "upload", "internal"]
search_fields = ["actor__preferred_username", "track__name"]
list_select_related = ["actor", "track"]
@admin.register(models.ImportBatch)
class ImportBatchAdmin(admin.ModelAdmin):
list_display = ["submitted_by", "creation_date", "import_request", "status"]
......
......@@ -786,9 +786,13 @@ class Upload(models.Model):
with remote_response as r:
remote_response.raise_for_status()
extension = utils.get_ext_from_type(self.mimetype)
title = " - ".join(
[self.track.title, self.track.album.title, self.track.artist.name]
)
title_parts = []
title_parts.append(self.track.title)
if self.track.album:
title_parts.append(self.track.album.title)
title_parts.append(self.track.artist.name)
title = " - ".join(title_parts)
filename = "{}.{}".format(title, extension)
tmp_file = tempfile.TemporaryFile()
for chunk in r.iter_content(chunk_size=512):
......@@ -1126,7 +1130,7 @@ class LibraryQuerySet(models.QuerySet):
)
def viewable_by(self, actor):
from funkwhale_api.federation.models import LibraryFollow
from funkwhale_api.federation.models import LibraryFollow, Follow
if actor is None:
return self.filter(privacy_level="everyone")
......@@ -1136,11 +1140,17 @@ class LibraryQuerySet(models.QuerySet):
followed_libraries = LibraryFollow.objects.filter(
actor=actor, approved=True
).values_list("target", flat=True)
followed_channels_libraries = (
Follow.objects.exclude(target__channel=None)
.filter(actor=actor, approved=True,)
.values_list("target__channel__library", flat=True)
)
return self.filter(
me_query
| instance_query
| models.Q(privacy_level="everyone")
| models.Q(pk__in=followed_libraries)
| models.Q(pk__in=followed_channels_libraries)
)
......@@ -1174,7 +1184,7 @@ class Library(federation_models.FederationMixin):
return "/library/{}".format(self.uuid)
def save(self, **kwargs):
if not self.pk and not self.fid and self.actor.get_user():
if not self.pk and not self.fid and self.actor.is_local:
self.fid = self.get_federation_id()
self.followers_url = self.fid + "/followers"
......@@ -1266,7 +1276,11 @@ class TrackActor(models.Model):
).values_list("id", "track")
objs = []
if library.privacy_level == "me":
follow_queryset = library.received_follows.filter(approved=True).exclude(
if library.get_channel():
follow_queryset = library.channel.actor.received_follows
else:
follow_queryset = library.received_follows
follow_queryset = follow_queryset.filter(approved=True).exclude(
actor__user__isnull=True
)
if actor_ids:
......
......@@ -79,3 +79,4 @@ click>=7,<8
service_identity==18.1.0
markdown>=3.2,<4
bleach>=3,<4
feedparser==6.0.0b3
......@@ -31,3 +31,4 @@ env =
FUNKWHALE_SPA_HTML_ROOT=http://noop/
PROXY_MEDIA=true
MUSIC_USE_DENORMALIZATION=true
EXTERNAL_MEDIA_PROXY_ENABLED=true
This diff is collapsed.
......@@ -11,7 +11,7 @@ from funkwhale_api.music import serializers
@pytest.mark.parametrize("attribute", ["uuid", "actor.full_username"])
def test_channel_detail(attribute, spa_html, no_api_auth, client, factories, settings):
channel = factories["audio.Channel"]()
channel = factories["audio.Channel"](library__privacy_level="everyone")
factories["music.Upload"](playable=True, library=channel.library)
url = "/channels/{}".format(utils.recursive_getattr(channel, attribute))
detail_url = "/channels/{}".format(channel.actor.full_username)
......
import datetime
import pytest
from funkwhale_api.audio import tasks
def test_fetch_rss_feeds(factories, settings, now, mocker):
settings.PODCASTS_RSS_FEED_REFRESH_DELAY = 5
prunable_date = now - datetime.timedelta(
seconds=settings.PODCASTS_RSS_FEED_REFRESH_DELAY
)
fetch_rss_feed = mocker.patch.object(tasks.fetch_rss_feed, "delay")
channels = [
# recent, not fetched
factories["audio.Channel"](actor__last_fetch_date=now, external=True),
# old but not external, not fetched
factories["audio.Channel"](actor__last_fetch_date=prunable_date),
# old and external, fetched !
factories["audio.Channel"](actor__last_fetch_date=prunable_date, external=True),
factories["audio.Channel"](actor__last_fetch_date=prunable_date, external=True),
]
tasks.fetch_rss_feeds()
assert fetch_rss_feed.call_count == 2
fetch_rss_feed.assert_any_call(rss_url=channels[2].rss_url)
fetch_rss_feed.assert_any_call(rss_url=channels[3].rss_url)
def test_fetch_rss_feed(factories, mocker):
channel = factories["audio.Channel"](external=True)
get_channel_from_rss_url = mocker.patch.object(
tasks.serializers, "get_channel_from_rss_url"
)
tasks.fetch_rss_feed(channel.rss_url)
get_channel_from_rss_url.assert_called_once_with(channel.rss_url)
def test_fetch_rss_feed_blocked_is_deleted(factories, mocker):
channel = factories["audio.Channel"](external=True)
mocker.patch.object(
tasks.serializers,
"get_channel_from_rss_url",
side_effect=tasks.serializers.BlockedFeedException(),
)
tasks.fetch_rss_feed(channel.rss_url)
with pytest.raises(channel.DoesNotExist):
channel.refresh_from_db()
......@@ -251,6 +251,19 @@ def test_channel_rss_feed(factories, api_client, preferences):
assert response["Content-Type"] == "application/rss+xml"
def test_channel_rss_feed_redirects_for_external(factories, api_client, preferences):
preferences["common__api_authentication_required"] = False
channel = factories["audio.Channel"](external=True)
factories["music.Upload"](library=channel.library, playable=True)
url = reverse("api:v1:channels-rss", kwargs={"composite": channel.uuid})
response = api_client.get(url)
assert response.status_code == 302
assert response["Location"] == channel.rss_url
def test_channel_rss_feed_remote(factories, api_client, preferences):
preferences["common__api_authentication_required"] = False
channel = factories["audio.Channel"]()
......@@ -291,3 +304,65 @@ def test_channel_metadata_choices(factories, api_client):
assert response.status_code == 200
assert response.data == expected
def test_subscribe_to_rss_feed_existing_channel(
factories, logged_in_api_client, mocker
):
actor = logged_in_api_client.user.create_actor()
rss_url = "http://example.test/rss.url"
channel = factories["audio.Channel"](rss_url=rss_url, external=True)
url = reverse("api:v1:channels-rss_subscribe")
response = logged_in_api_client.post(url, {"url": rss_url})
assert response.status_code == 201
subscription = actor.emitted_follows.select_related(
"target__channel__artist__description",
"target__channel__artist__attachment_cover",
).latest("id")
assert subscription.target == channel.actor
assert subscription.approved is True
assert subscription.fid == subscription.get_federation_id()
setattr(subscription.target.channel.artist, "_tracks_count", 0)
setattr(subscription.target.channel.artist, "_prefetched_tagged_items", [])
expected = serializers.SubscriptionSerializer(subscription).data
assert response.data == expected
def test_subscribe_to_rss_feed_existing_subscription(
factories, logged_in_api_client, mocker
):
actor = logged_in_api_client.user.create_actor()
rss_url = "http://example.test/rss.url"
channel = factories["audio.Channel"](rss_url=rss_url, external=True)
factories["federation.Follow"](target=channel.actor, approved=True, actor=actor)
url = reverse("api:v1:channels-rss_subscribe")
response = logged_in_api_client.post(url, {"url": rss_url})
assert response.status_code == 201
assert channel.actor.received_follows.count() == 1
def test_subscribe_to_rss_creates_channel(factories, logged_in_api_client, mocker):
logged_in_api_client.user.create_actor()
rss_url = "http://example.test/rss.url"
channel = factories["audio.Channel"]()
get_channel_from_rss_url = mocker.patch.object(
serializers, "get_channel_from_rss_url", return_value=(channel, [])
)
url = reverse("api:v1:channels-rss_subscribe")
response = logged_in_api_client.post(url, {"url": rss_url})
assert response.status_code == 201
assert response.data["channel"]["uuid"] == channel.uuid
get_channel_from_rss_url.assert_called_once_with(rss_url)
......@@ -87,6 +87,8 @@ def cache():
"""
yield django_cache
django_cache.clear()
if "service_actor" in actors._CACHE:
del actors._CACHE["service_actor"]
@pytest.fixture(autouse=True)
......
......@@ -1444,7 +1444,7 @@ def test_channel_actor_outbox_serializer(factories):
def test_channel_upload_serializer(factories):
channel = factories["audio.Channel"]()
channel = factories["audio.Channel"](library__privacy_level="everyone")
content = factories["common.Content"]()
upload = factories["music.Upload"](
playable=True,
......
......@@ -186,6 +186,18 @@ def test_music_library_retrieve_excludes_channel_libraries(factories, api_client
assert response.status_code == 404
def test_actor_retrieve_excludes_channel_with_private_library(factories, api_client):
channel = factories["audio.Channel"](external=True, library__privacy_level="me")