From 08262529ecb0d14ab4529f10d0de1bbbac044107 Mon Sep 17 00:00:00 2001 From: Eliot Berriot <contact@eliotberriot.com> Date: Thu, 20 Jun 2019 10:27:07 +0200 Subject: [PATCH] MRF / Dropping of incoming/outgoing messages based on allow-list --- api/config/settings/common.py | 60 ++++++- api/config/settings/production.py | 46 ------ api/funkwhale_api/federation/activity.py | 64 ++++++-- api/funkwhale_api/federation/admin.py | 2 +- api/funkwhale_api/federation/factories.py | 1 + api/funkwhale_api/federation/mrf_policies.py | 15 ++ api/funkwhale_api/federation/utils.py | 25 ++- api/funkwhale_api/moderation/apps.py | 13 ++ .../dynamic_preferences_registry.py | 2 +- .../moderation/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../management/commands/mrf_check.py | 117 ++++++++++++++ api/funkwhale_api/moderation/mrf.py | 78 ++++++++++ api/funkwhale_api/moderation/mrf_policies.py | 47 ++++++ api/setup.cfg | 1 + api/tests/conftest.py | 15 ++ api/tests/federation/test_activity.py | 147 +++++++++++++++++- api/tests/federation/test_utils.py | 26 ++++ api/tests/moderation/test_commands.py | 77 +++++++++ api/tests/moderation/test_mrf.py | 67 ++++++++ api/tests/moderation/test_mrf_policies.py | 63 ++++++++ dev.yml | 2 + docs/admin/index.rst | 1 + docs/admin/mrf.rst | 117 ++++++++++++++ 24 files changed, 897 insertions(+), 89 deletions(-) create mode 100644 api/funkwhale_api/federation/mrf_policies.py create mode 100644 api/funkwhale_api/moderation/apps.py create mode 100644 api/funkwhale_api/moderation/management/__init__.py create mode 100644 api/funkwhale_api/moderation/management/commands/__init__.py create mode 100644 api/funkwhale_api/moderation/management/commands/mrf_check.py create mode 100644 api/funkwhale_api/moderation/mrf.py create mode 100644 api/funkwhale_api/moderation/mrf_policies.py create mode 100644 api/tests/moderation/test_commands.py create mode 100644 api/tests/moderation/test_mrf.py create mode 100644 api/tests/moderation/test_mrf_policies.py create mode 100644 docs/admin/mrf.rst diff --git a/api/config/settings/common.py b/api/config/settings/common.py index ab060b448d..8f2e00ade7 100644 --- a/api/config/settings/common.py +++ b/api/config/settings/common.py @@ -11,7 +11,8 @@ https://docs.djangoproject.com/en/dev/ref/settings/ from __future__ import absolute_import, unicode_literals import datetime -import logging +import logging.config +import sys from urllib.parse import urlsplit @@ -20,13 +21,44 @@ from celery.schedules import crontab from funkwhale_api import __version__ -logger = logging.getLogger(__name__) +logger = logging.getLogger("funkwhale_api.config") ROOT_DIR = environ.Path(__file__) - 3 # (/a/b/myfile.py - 3 = /) APPS_DIR = ROOT_DIR.path("funkwhale_api") env = environ.Env() + +LOGLEVEL = env("LOGLEVEL", default="info").upper() +LOGGING_CONFIG = None +logging.config.dictConfig( + { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "console": {"format": "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"} + }, + "handlers": { + "console": {"class": "logging.StreamHandler", "formatter": "console"}, + # # Add Handler for Sentry for `warning` and above + # 'sentry': { + # 'level': 'WARNING', + # 'class': 'raven.contrib.django.raven_compat.handlers.SentryHandler', + # }, + }, + "loggers": { + "funkwhale_api": { + "level": LOGLEVEL, + "handlers": ["console"], + # required to avoid double logging with root logger + "propagate": False, + }, + "": {"level": "WARNING", "handlers": ["console"]}, + }, + } +) + env_file = env("ENV_FILE", default=None) if env_file: + logger.info("Loading specified env file at %s", env_file) # we have an explicitely specified env file # so we try to load and it fail loudly if it does not exist env.read_env(env_file) @@ -49,6 +81,11 @@ else: logger.info("Loaded env file at %s/.env", path) break +FUNKWHALE_PLUGINS_PATH = env( + "FUNKWHALE_PLUGINS_PATH", default="/srv/funkwhale/plugins/" +) +sys.path.append(FUNKWHALE_PLUGINS_PATH) + FUNKWHALE_HOSTNAME = None FUNKWHALE_HOSTNAME_SUFFIX = env("FUNKWHALE_HOSTNAME_SUFFIX", default=None) FUNKWHALE_HOSTNAME_PREFIX = env("FUNKWHALE_HOSTNAME_PREFIX", default=None) @@ -146,7 +183,8 @@ if RAVEN_ENABLED: "release": __version__, } THIRD_PARTY_APPS += ("raven.contrib.django.raven_compat",) - + logging.getLogger("").addHandler("sentry") + logging.getLogger("funkwhale_api").addHandler("sentry") # Apps specific for this project go here. LOCAL_APPS = ( @@ -160,7 +198,7 @@ LOCAL_APPS = ( "funkwhale_api.requests", "funkwhale_api.favorites", "funkwhale_api.federation", - "funkwhale_api.moderation", + "funkwhale_api.moderation.apps.ModerationConfig", "funkwhale_api.radios", "funkwhale_api.history", "funkwhale_api.playlists", @@ -169,7 +207,19 @@ LOCAL_APPS = ( # See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps -INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS + +PLUGINS = [p for p in env.list("FUNKWHALE_PLUGINS", default=[]) if p] +if PLUGINS: + logger.info("Running with the following plugins enabled: %s", ", ".join(PLUGINS)) +else: + logger.info("Running with no plugins") + +INSTALLED_APPS = ( + DJANGO_APPS + + THIRD_PARTY_APPS + + LOCAL_APPS + + tuple(["{}.apps.Plugin".format(p) for p in PLUGINS]) +) # MIDDLEWARE CONFIGURATION # ------------------------------------------------------------------------------ diff --git a/api/config/settings/production.py b/api/config/settings/production.py index 26def9fd03..1ee9b8f7e6 100644 --- a/api/config/settings/production.py +++ b/api/config/settings/production.py @@ -73,50 +73,4 @@ TEMPLATES[0]["OPTIONS"]["loaders"] = [ # ------------------------------------------------------------------------------ # Heroku URL does not pass the DB number, so we parse it in - -# LOGGING CONFIGURATION -# ------------------------------------------------------------------------------ -# See: https://docs.djangoproject.com/en/dev/ref/settings/#logging -# A sample logging configuration. The only tangible logging -# performed by this configuration is to send an email to -# the site admins on every HTTP 500 error when DEBUG=False. -# See http://docs.djangoproject.com/en/dev/topics/logging for -# more details on how to customize your logging configuration. -LOGGING = { - "version": 1, - "disable_existing_loggers": False, - "filters": {"require_debug_false": {"()": "django.utils.log.RequireDebugFalse"}}, - "formatters": { - "verbose": { - "format": "%(levelname)s %(asctime)s %(module)s " - "%(process)d %(thread)d %(message)s" - } - }, - "handlers": { - "mail_admins": { - "level": "ERROR", - "filters": ["require_debug_false"], - "class": "django.utils.log.AdminEmailHandler", - }, - "console": { - "level": "DEBUG", - "class": "logging.StreamHandler", - "formatter": "verbose", - }, - }, - "loggers": { - "django.request": { - "handlers": ["mail_admins"], - "level": "ERROR", - "propagate": True, - }, - "django.security.DisallowedHost": { - "level": "ERROR", - "handlers": ["console", "mail_admins"], - "propagate": True, - }, - }, -} - - # Your production stuff: Below this line define 3rd party library settings diff --git a/api/funkwhale_api/federation/activity.py b/api/funkwhale_api/federation/activity.py index 979b8aa1be..0b34e9141f 100644 --- a/api/funkwhale_api/federation/activity.py +++ b/api/funkwhale_api/federation/activity.py @@ -1,5 +1,6 @@ import uuid import logging +import urllib.parse from django.core.cache import cache from django.conf import settings @@ -122,32 +123,38 @@ def receive(activity, on_behalf_of): from . import serializers from . import tasks from .routes import inbox + from funkwhale_api.moderation import mrf + logger.debug( + "[federation] Received activity from %s : %s", on_behalf_of.fid, activity + ) # we ensure the activity has the bare minimum structure before storing # it in our database serializer = serializers.BaseActivitySerializer( data=activity, context={"actor": on_behalf_of, "local_recipients": True} ) serializer.is_valid(raise_exception=True) - if not inbox.get_matching_handlers(activity): - # discard unhandlable activity - return - if should_reject( - fid=serializer.validated_data.get("id"), - actor_id=serializer.validated_data["actor"].fid, - payload=activity, - ): + payload, updated = mrf.inbox.apply(activity, sender_id=on_behalf_of.fid) + if not payload: logger.info( - "[federation] Discarding activity due to instance policies %s", + "[federation] Discarding activity due to mrf %s", serializer.validated_data.get("id"), ) return + + if not inbox.get_matching_handlers(payload): + # discard unhandlable activity + logger.debug( + "[federation] No matching route found for activity, discarding: %s", payload + ) + return + try: - copy = serializer.save() + copy = serializer.save(payload=payload, type=payload["type"]) except IntegrityError: logger.warning( - "[federation] Discarding already elivered activity %s", + "[federation] Discarding already delivered activity %s", serializer.validated_data.get("id"), ) return @@ -283,9 +290,19 @@ class OutboxRouter(Router): and may yield data that should be persisted in the Activity model for further delivery. """ + from funkwhale_api.common import preferences from . import models from . import tasks + allow_list_enabled = preferences.get("moderation__allow_list_enabled") + allowed_domains = None + if allow_list_enabled: + allowed_domains = set( + models.Domain.objects.filter(allowed=True).values_list( + "name", flat=True + ) + ) + for route, handler in self.routes: if not match_route(route, routing): continue @@ -314,10 +331,10 @@ class OutboxRouter(Router): a = models.Activity(**activity_data) a.uuid = uuid.uuid4() to_inbox_items, to_deliveries, new_to = prepare_deliveries_and_inbox_items( - to, "to" + to, "to", allowed_domains=allowed_domains ) cc_inbox_items, cc_deliveries, new_cc = prepare_deliveries_and_inbox_items( - cc, "cc" + cc, "cc", allowed_domains=allowed_domains ) if not any( [to_inbox_items, to_deliveries, cc_inbox_items, cc_deliveries] @@ -374,7 +391,14 @@ def match_route(route, payload): return True -def prepare_deliveries_and_inbox_items(recipient_list, type): +def is_allowed_url(url, allowed_domains): + return ( + allowed_domains is None + or urllib.parse.urlparse(url).hostname in allowed_domains + ) + + +def prepare_deliveries_and_inbox_items(recipient_list, type, allowed_domains=None): """ Given a list of recipients ( either actor instances, public adresses, a dictionnary with a "type" and "target" @@ -384,10 +408,12 @@ def prepare_deliveries_and_inbox_items(recipient_list, type): """ from . import models + if allowed_domains is not None: + allowed_domains = set(allowed_domains) + allowed_domains.add(settings.FEDERATION_HOSTNAME) local_recipients = set() remote_inbox_urls = set() urls = [] - for r in recipient_list: if isinstance(r, models.Actor): if r.is_local: @@ -432,7 +458,13 @@ def prepare_deliveries_and_inbox_items(recipient_list, type): values = actors.values("shared_inbox_url", "inbox_url") for v in values: remote_inbox_urls.add(v["shared_inbox_url"] or v["inbox_url"]) - deliveries = [models.Delivery(inbox_url=url) for url in remote_inbox_urls] + + deliveries = [ + models.Delivery(inbox_url=url) + for url in remote_inbox_urls + if is_allowed_url(url, allowed_domains) + ] + urls = [url for url in urls if is_allowed_url(url, allowed_domains)] inbox_items = [ models.InboxItem(actor=actor, type=type) for actor in local_recipients ] diff --git a/api/funkwhale_api/federation/admin.py b/api/funkwhale_api/federation/admin.py index 40f7b4f69a..8e66708cfc 100644 --- a/api/funkwhale_api/federation/admin.py +++ b/api/funkwhale_api/federation/admin.py @@ -41,7 +41,7 @@ class FetchAdmin(admin.ModelAdmin): @admin.register(models.Activity) class ActivityAdmin(admin.ModelAdmin): - list_display = ["type", "fid", "url", "actor", "creation_date"] + list_display = ["uuid", "type", "fid", "url", "actor", "creation_date"] search_fields = ["payload", "fid", "url", "actor__domain__name"] list_filter = ["type", "actor__domain__name"] actions = [redeliver_activities] diff --git a/api/funkwhale_api/federation/factories.py b/api/funkwhale_api/federation/factories.py index 14bb4e8c96..95d68779b9 100644 --- a/api/funkwhale_api/federation/factories.py +++ b/api/funkwhale_api/federation/factories.py @@ -70,6 +70,7 @@ def create_user(actor): class DomainFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory): name = factory.Faker("domain_name") nodeinfo_fetch_date = factory.LazyFunction(lambda: timezone.now()) + allowed = None class Meta: model = "federation.Domain" diff --git a/api/funkwhale_api/federation/mrf_policies.py b/api/funkwhale_api/federation/mrf_policies.py new file mode 100644 index 0000000000..220c0d3cba --- /dev/null +++ b/api/funkwhale_api/federation/mrf_policies.py @@ -0,0 +1,15 @@ +from funkwhale_api.moderation import mrf + + +from . import activity + + +@mrf.inbox.register(name="instance_policies") +def instance_policies(payload, **kwargs): + reject = activity.should_reject( + fid=payload.get("id"), + actor_id=kwargs.get("sender_id", payload.get("id")), + payload=payload, + ) + if reject: + raise mrf.Discard() diff --git a/api/funkwhale_api/federation/utils.py b/api/funkwhale_api/federation/utils.py index 8f73c57350..c66a972665 100644 --- a/api/funkwhale_api/federation/utils.py +++ b/api/funkwhale_api/federation/utils.py @@ -4,7 +4,7 @@ from django.conf import settings from django.db.models import Q from funkwhale_api.common import session -from funkwhale_api.moderation import models as moderation_models +from funkwhale_api.moderation import mrf from . import exceptions from . import signing @@ -64,10 +64,10 @@ def slugify_username(username): def retrieve_ap_object( fid, actor, serializer_class=None, queryset=None, apply_instance_policies=True ): - from . import activity - - policies = moderation_models.InstancePolicy.objects.active().filter(block_all=True) - if apply_instance_policies and policies.matching_url(fid): + # we have a duplicate check here because it's less expensive to do those checks + # twice than to trigger a HTTP request + payload, updated = mrf.inbox.apply({"id": fid}) + if not payload: raise exceptions.BlockedActorOrDomain() if queryset: try: @@ -94,15 +94,12 @@ def retrieve_ap_object( response.raise_for_status() data = response.json() - # we match against moderation policies here again, because the FID of the returned - # object may not be the same as the URL used to access it - try: - id = data["id"] - except KeyError: - pass - else: - if apply_instance_policies and activity.should_reject(fid=id, payload=data): - raise exceptions.BlockedActorOrDomain() + # we match against mrf here again, because new data may yield different + # results + data, updated = mrf.inbox.apply(data) + if not data: + raise exceptions.BlockedActorOrDomain() + if not serializer_class: return data serializer = serializer_class(data=data, context={"fetch_actor": actor}) diff --git a/api/funkwhale_api/moderation/apps.py b/api/funkwhale_api/moderation/apps.py new file mode 100644 index 0000000000..840393af10 --- /dev/null +++ b/api/funkwhale_api/moderation/apps.py @@ -0,0 +1,13 @@ +from django.apps import AppConfig, apps + +from . import mrf + + +class ModerationConfig(AppConfig): + name = "funkwhale_api.moderation" + + def ready(self): + super().ready() + + app_names = [app.name for app in apps.app_configs.values()] + mrf.inbox.autodiscover(app_names) diff --git a/api/funkwhale_api/moderation/dynamic_preferences_registry.py b/api/funkwhale_api/moderation/dynamic_preferences_registry.py index ff4201b578..8d8237cbb4 100644 --- a/api/funkwhale_api/moderation/dynamic_preferences_registry.py +++ b/api/funkwhale_api/moderation/dynamic_preferences_registry.py @@ -1,7 +1,7 @@ from dynamic_preferences import types from dynamic_preferences.registries import global_preferences_registry -moderation = types.Section("Moderation") +moderation = types.Section("moderation") @global_preferences_registry.register diff --git a/api/funkwhale_api/moderation/management/__init__.py b/api/funkwhale_api/moderation/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/funkwhale_api/moderation/management/commands/__init__.py b/api/funkwhale_api/moderation/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/funkwhale_api/moderation/management/commands/mrf_check.py b/api/funkwhale_api/moderation/management/commands/mrf_check.py new file mode 100644 index 0000000000..b518daa08c --- /dev/null +++ b/api/funkwhale_api/moderation/management/commands/mrf_check.py @@ -0,0 +1,117 @@ +import json +import sys +import uuid +import logging + +from django.core.management.base import BaseCommand, CommandError +from django.core import validators + +from django.conf import settings + +from funkwhale_api.common import session +from funkwhale_api.federation import models +from funkwhale_api.moderation import mrf + + +def is_uuid(v): + try: + uuid.UUID(v) + except ValueError: + return False + return True + + +def is_url(v): + validator = validators.URLValidator() + try: + validator(v) + except (ValueError, validators.ValidationError): + return False + + return True + + +class Command(BaseCommand): + help = "Check a given message against all or a specific MRF rule" + + def add_arguments(self, parser): + parser.add_argument( + "type", + type=str, + choices=["inbox"], + help=("The type of MRF. Only inbox is supported at the moment"), + ) + parser.add_argument( + "input", + nargs="?", + help=( + "The path to a file containing JSON data. Use - to read from stdin. " + "If no input is provided, registered MRF policies will be listed " + "instead.", + ), + ) + parser.add_argument( + "--policy", + "-p", + dest="policies", + nargs="+", + default=False, + help="Restrict to a list of MRF policies that will be applied, in that order", + ) + + def handle(self, *args, **options): + logger = logging.getLogger("funkwhale.mrf") + logger.setLevel(logging.DEBUG) + logger.addHandler(logging.StreamHandler(stream=sys.stderr)) + + input = options["input"] + if not input: + registry = getattr(mrf, options["type"]) + self.stdout.write( + "No input given, listing registered policies for '{}' MRF:".format( + options["type"] + ) + ) + for name in registry.keys(): + self.stdout.write("- {}".format(name)) + return + raw_content = None + content = None + if input == "-": + raw_content = sys.stdin.read() + elif is_uuid(input): + self.stderr.write("UUID provided, retrieving payload from db") + content = models.Activity.objects.get(uuid=input).payload + elif is_url(input): + response = session.get_session().get( + input, + timeout=5, + verify=settings.EXTERNAL_REQUESTS_VERIFY_SSL, + headers={"Content-Type": "application/activity+json"}, + ) + response.raise_for_status() + content = response.json() + else: + with open(input, "rb") as f: + raw_content = f.read() + content = json.loads(raw_content) if content is None else content + + policies = options["policies"] or [] + registry = getattr(mrf, options["type"]) + for policy in policies: + if policy not in registry: + raise CommandError( + "Unknown policy '{}' for MRF '{}'".format(policy, options["type"]) + ) + + payload, updated = registry.apply(content, policies=policies) + if not payload: + self.stderr.write("Payload was discarded by MRF") + elif updated: + self.stderr.write("Payload was modified by MRF") + self.stderr.write("Initial payload:\n") + self.stdout.write(json.dumps(content, indent=2, sort_keys=True)) + self.stderr.write("Modified payload:\n") + self.stdout.write(json.dumps(payload, indent=2, sort_keys=True)) + else: + self.stderr.write("Payload left untouched by MRF") diff --git a/api/funkwhale_api/moderation/mrf.py b/api/funkwhale_api/moderation/mrf.py new file mode 100644 index 0000000000..207b63e5d7 --- /dev/null +++ b/api/funkwhale_api/moderation/mrf.py @@ -0,0 +1,78 @@ +""" +Inspired from the MRF logic from Pleroma, see https://docs-develop.pleroma.social/mrf.html +To support pluggable / customizable moderation using a programming language if +our exposed features aren't enough. +""" + +import logging + +import persisting_theory + +logger = logging.getLogger("funkwhale.mrf") + + +class MRFException(Exception): + pass + + +class Discard(MRFException): + pass + + +class Skip(MRFException): + pass + + +class Registry(persisting_theory.Registry): + look_into = "mrf_policies" + + def __init__(self, name=""): + self.name = name + + super().__init__() + + def apply(self, payload, **kwargs): + policy_names = kwargs.pop("policies", []) + if not policy_names: + policies = self.items() + else: + logger.debug( + "[MRF.%s] Running restricted list of policies %s…", + self.name, + ", ".join(policy_names), + ) + policies = [(name, self[name]) for name in policy_names] + updated = False + for policy_name, policy in policies: + logger.debug("[MRF.%s] Applying mrf policy '%s'…", self.name, policy_name) + try: + new_payload = policy(payload, **kwargs) + except Skip as e: + logger.debug( + "[MRF.%s] Skipped policy %s because '%s'", + self.name, + policy_name, + str(e), + ) + continue + except Discard as e: + logger.info( + "[MRF.%s] Discarded message per policy '%s' because '%s'", + self.name, + policy_name, + str(e), + ) + return (None, False) + except Exception: + logger.exception( + "[MRF.%s] Error while applying policy '%s'!", self.name, policy_name + ) + continue + if new_payload: + updated = True + payload = new_payload + + return payload, updated + + +inbox = Registry("inbox") diff --git a/api/funkwhale_api/moderation/mrf_policies.py b/api/funkwhale_api/moderation/mrf_policies.py new file mode 100644 index 0000000000..ec6b2f6d89 --- /dev/null +++ b/api/funkwhale_api/moderation/mrf_policies.py @@ -0,0 +1,47 @@ +import urllib.parse + +from funkwhale_api.common import preferences +from funkwhale_api.common import utils +from funkwhale_api.federation import models as federation_models +from funkwhale_api.moderation import mrf + + +@mrf.inbox.register(name="allow_list") +def check_allow_list(payload, **kwargs): + """ + A MRF policy that only works when the moderation__allow_list_enabled + setting is on. + + It will extract domain names from the activity ID, actor ID and activity object ID + and discard the activity if any of those domain names isn't on the allow list. + """ + if not preferences.get("moderation__allow_list_enabled"): + raise mrf.Skip("Allow-listing is disabled") + + allowed_domains = set( + federation_models.Domain.objects.filter(allowed=True).values_list( + "name", flat=True + ) + ) + + relevant_ids = [ + payload.get("actor"), + kwargs.get("sender_id", payload.get("id")), + utils.recursive_getattr(payload, "object.id", permissive=True), + ] + + relevant_domains = set( + [ + domain + for domain in [urllib.parse.urlparse(i).hostname for i in relevant_ids if i] + if domain + ] + ) + + if relevant_domains - allowed_domains: + + raise mrf.Discard( + "These domains are not allowed: {}".format( + ", ".join(relevant_domains - allowed_domains) + ) + ) diff --git a/api/setup.cfg b/api/setup.cfg index 431c4f1ee5..3f7d2f7f07 100644 --- a/api/setup.cfg +++ b/api/setup.cfg @@ -25,3 +25,4 @@ env = CREATE_IMAGE_THUMBNAILS=False FORCE_HTTPS_URLS=False FUNKWHALE_SPA_HTML_ROOT=http://noop/ + PROXY_MEDIA=true diff --git a/api/tests/conftest.py b/api/tests/conftest.py index a1baedcc66..d5b87e724f 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -29,6 +29,7 @@ from rest_framework.test import APIClient, APIRequestFactory from funkwhale_api.activity import record from funkwhale_api.federation import actors +from funkwhale_api.moderation import mrf pytest_plugins = "aiohttp.pytest_plugin" @@ -422,3 +423,17 @@ def a_responses(): @pytest.fixture def service_actor(db): return actors.get_service_actor() + + +@pytest.fixture +def mrf_inbox_registry(mocker): + registry = mrf.Registry() + mocker.patch("funkwhale_api.moderation.mrf.inbox", registry) + return registry + + +@pytest.fixture +def mrf_outbox_registry(mocker): + registry = mrf.Registry() + mocker.patch("funkwhale_api.moderation.mrf.outbox", registry) + return registry diff --git a/api/tests/federation/test_activity.py b/api/tests/federation/test_activity.py index aaeebbb87c..ccc27c7dfd 100644 --- a/api/tests/federation/test_activity.py +++ b/api/tests/federation/test_activity.py @@ -13,10 +13,13 @@ from funkwhale_api.federation import ( ) -def test_receive_validates_basic_attributes_and_stores_activity(factories, now, mocker): +def test_receive_validates_basic_attributes_and_stores_activity( + mrf_inbox_registry, factories, now, mocker +): mocker.patch.object( activity.InboxRouter, "get_matching_handlers", return_value=True ) + mrf_inbox_registry_apply = mocker.spy(mrf_inbox_registry, "apply") mocked_dispatch = mocker.patch("funkwhale_api.common.utils.on_commit") local_to_actor = factories["users.User"]().create_actor() local_cc_actor = factories["users.User"]().create_actor() @@ -31,6 +34,7 @@ def test_receive_validates_basic_attributes_and_stores_activity(factories, now, } copy = activity.receive(activity=a, on_behalf_of=remote_actor) + mrf_inbox_registry_apply.assert_called_once_with(a, sender_id=a["actor"]) assert copy.payload == a assert copy.creation_date >= now @@ -49,6 +53,63 @@ def test_receive_validates_basic_attributes_and_stores_activity(factories, now, assert ii.is_read is False +def test_receive_uses_mrf_returned_payload(mrf_inbox_registry, factories, now, mocker): + mocker.patch.object( + activity.InboxRouter, "get_matching_handlers", return_value=True + ) + + def patched_apply(payload, **kwargs): + payload["type"] = "SomethingElse" + return payload, True + + mrf_inbox_registry_apply = mocker.patch.object( + mrf_inbox_registry, "apply", side_effect=patched_apply + ) + mocked_dispatch = mocker.patch("funkwhale_api.common.utils.on_commit") + local_to_actor = factories["users.User"]().create_actor() + remote_actor = factories["federation.Actor"]() + a = { + "@context": [], + "actor": remote_actor.fid, + "type": "Noop", + "id": "https://test.activity", + "to": [local_to_actor.fid], + } + + copy = activity.receive(activity=a, on_behalf_of=remote_actor) + mrf_inbox_registry_apply.assert_called_once_with(a, sender_id=a["actor"]) + + expected = a.copy() + expected["type"] = "SomethingElse" + assert copy.payload == expected + assert copy.creation_date >= now + assert copy.actor == remote_actor + assert copy.fid == a["id"] + assert copy.type == "SomethingElse" + mocked_dispatch.assert_called_once_with( + tasks.dispatch_inbox.delay, activity_id=copy.pk + ) + + +def test_receive_mrf_skip(mrf_inbox_registry, factories, now, mocker): + mocker.patch.object( + activity.InboxRouter, "get_matching_handlers", return_value=True + ) + mocker.patch.object(mrf_inbox_registry, "apply", return_value=(None, False)) + local_to_actor = factories["users.User"]().create_actor() + remote_actor = factories["federation.Actor"]() + a = { + "@context": [], + "actor": remote_actor.fid, + "type": "Noop", + "id": "https://test.activity", + "to": [local_to_actor.fid], + } + + copy = activity.receive(activity=a, on_behalf_of=remote_actor) + assert copy is None + + def test_receive_calls_should_reject(factories, now, mocker): should_reject = mocker.patch.object(activity, "should_reject", return_value=True) mocker.patch.object( @@ -287,7 +348,7 @@ def test_route_matching(route, payload, expected): assert activity.match_route(route, payload) is expected -def test_outbox_router_dispatch(mocker, factories, now): +def test_outbox_router_dispatch(mocker, factories, preferences, now): router = activity.OutboxRouter() actor = factories["federation.Actor"]() r1 = factories["federation.Actor"]() @@ -331,7 +392,39 @@ def test_outbox_router_dispatch(mocker, factories, now): assert delivery.is_delivered is False -def test_prepare_deliveries_and_inbox_items(factories): +def test_outbox_router_dispatch_allow_list(mocker, factories, preferences, now): + preferences["moderation__allow_list_enabled"] = True + router = activity.OutboxRouter() + actor = factories["federation.Actor"]() + r1 = factories["federation.Actor"](domain__allowed=True) + r2 = factories["federation.Actor"]() + prepare_deliveries_and_inbox_items = mocker.spy( + activity, "prepare_deliveries_and_inbox_items" + ) + + def handler(context): + yield { + "payload": { + "type": "Noop", + "actor": actor.fid, + "summary": context["summary"], + "to": [r1], + "cc": [r2], + }, + "actor": actor, + } + + router.connect({"type": "Noop"}, handler) + router.dispatch({"type": "Noop"}, {"summary": "hello"}) + prepare_deliveries_and_inbox_items.assert_any_call( + [r1], "to", allowed_domains=set([r1.domain_id]) + ) + prepare_deliveries_and_inbox_items.assert_any_call( + [r2], "cc", allowed_domains=set([r1.domain_id]) + ) + + +def test_prepare_deliveries_and_inbox_items(factories, preferences): local_actor1 = factories["federation.Actor"]( local=True, shared_inbox_url="https://testlocal.inbox" ) @@ -385,7 +478,7 @@ def test_prepare_deliveries_and_inbox_items(factories): ] inbox_items, deliveries, urls = activity.prepare_deliveries_and_inbox_items( - recipients, "to" + recipients, "to", allowed_domains=None ) expected_inbox_items = sorted( [ @@ -436,6 +529,32 @@ def test_prepare_deliveries_and_inbox_items(factories): assert inbox_item.type == "to" +def test_prepare_deliveries_and_inbox_items_allow_list(factories, preferences): + preferences["moderation__allow_list_enabled"] = True + remote_actor1 = factories["federation.Actor"](domain__allowed=True) + remote_actor2 = factories["federation.Actor"](domain__allowed=False) + + recipients = [remote_actor1, remote_actor2] + + inbox_items, deliveries, urls = activity.prepare_deliveries_and_inbox_items( + recipients, "to", allowed_domains=set([remote_actor1.domain_id]) + ) + expected_inbox_items = [] + + expected_deliveries = [models.Delivery(inbox_url=remote_actor1.inbox_url)] + + expected_urls = [remote_actor1.fid] + + assert urls == expected_urls + assert len(expected_inbox_items) == len(inbox_items) + assert len(expected_deliveries) == len(deliveries) + + for delivery, expected_delivery in zip( + sorted(deliveries, key=lambda v: v.inbox_url), expected_deliveries + ): + assert delivery.inbox_url == expected_delivery.inbox_url + + def test_prepare_deliveries_and_inbox_items_instances_with_followers(factories): domain1 = factories["federation.Domain"](with_service_actor=True) @@ -460,7 +579,7 @@ def test_prepare_deliveries_and_inbox_items_instances_with_followers(factories): recipients = [activity.PUBLIC_ADDRESS, {"type": "instances_with_followers"}] inbox_items, deliveries, urls = activity.prepare_deliveries_and_inbox_items( - recipients, "to" + recipients, "to", allowed_domains=None ) expected_deliveries = sorted( @@ -483,6 +602,20 @@ def test_prepare_deliveries_and_inbox_items_instances_with_followers(factories): assert delivery.inbox_url == expected_delivery.inbox_url +@pytest.mark.parametrize( + "url, allowed_domains, expected", + [ + ("https://domain.example/test", None, True), + ("https://domain.example/test", [], False), + ("https://allowed.example/test", ["allowed.example"], True), + ("https://domain.example/test", ["allowed.example"], False), + ("https://social.allowed.example/test", ["allowed.example"], False), + ], +) +def test_is_allowed_url(url, allowed_domains, expected): + assert activity.is_allowed_url(url, allowed_domains) is expected + + def test_should_rotate_actor_key(settings, cache, now): actor_id = 42 settings.ACTOR_KEY_ROTATION_DELAY = 10 @@ -507,7 +640,9 @@ def test_schedule_key_rotation(cache, mocker): assert cache.get(activity.ACTOR_KEY_ROTATION_LOCK_CACHE_KEY.format(actor_id), True) -def test_outbox_dispatch_rotate_key_on_delete(mocker, factories, cache, settings): +def test_outbox_dispatch_rotate_key_on_delete( + mocker, factories, cache, settings, preferences +): router = activity.OutboxRouter() actor = factories["federation.Actor"]() r1 = factories["federation.Actor"]() diff --git a/api/tests/federation/test_utils.py b/api/tests/federation/test_utils.py index 9aa850728e..83c5e4f7e0 100644 --- a/api/tests/federation/test_utils.py +++ b/api/tests/federation/test_utils.py @@ -72,6 +72,32 @@ def test_retrieve_ap_object_honor_instance_policy_domain(factories): utils.retrieve_ap_object(fid, actor=None) +def test_retrieve_ap_object_honor_mrf_inbox_before_http( + mrf_inbox_registry, factories, mocker +): + apply = mocker.patch.object(mrf_inbox_registry, "apply", return_value=(None, False)) + fid = "http://domain/test" + with pytest.raises(exceptions.BlockedActorOrDomain): + utils.retrieve_ap_object(fid, actor=None) + + apply.assert_called_once_with({"id": fid}) + + +def test_retrieve_ap_object_honor_mrf_inbox_after_http( + r_mock, mrf_inbox_registry, factories, mocker +): + apply = mocker.patch.object( + mrf_inbox_registry, "apply", side_effect=[(True, False), (None, False)] + ) + payload = {"id": "http://domain/test", "actor": "hello"} + r_mock.get(payload["id"], json=payload) + with pytest.raises(exceptions.BlockedActorOrDomain): + utils.retrieve_ap_object(payload["id"], actor=None) + + apply.assert_any_call({"id": payload["id"]}) + apply.assert_any_call(payload) + + def test_retrieve_ap_object_honor_instance_policy_different_url_and_id( r_mock, factories ): diff --git a/api/tests/moderation/test_commands.py b/api/tests/moderation/test_commands.py new file mode 100644 index 0000000000..9984983ba0 --- /dev/null +++ b/api/tests/moderation/test_commands.py @@ -0,0 +1,77 @@ +import json + +from django.core.management import call_command + + +def test_mrf_check_inbox_all(mocker, mrf_inbox_registry, tmpfile): + payload = {"hello": "world"} + tmpfile.write(json.dumps(payload).encode()) + tmpfile.flush() + apply = mocker.spy(mrf_inbox_registry, "apply") + policy1 = mocker.Mock() + policy2 = mocker.Mock(return_value={"hello": "noop"}) + mrf_inbox_registry.register(name="policy1")(policy1) + mrf_inbox_registry.register(name="policy2")(policy2) + + call_command("mrf_check", "inbox", tmpfile.name) + + apply.assert_called_once_with(payload, policies=[]) + policy1.assert_called_once_with(payload) + policy2.assert_called_once_with(policy1.return_value) + + +def test_mrf_check_inbox_list(mocker, mrf_inbox_registry): + apply = mocker.spy(mrf_inbox_registry, "apply") + policy1 = mocker.Mock() + policy2 = mocker.Mock(return_value={"hello": "noop"}) + mrf_inbox_registry.register(name="policy1")(policy1) + mrf_inbox_registry.register(name="policy2")(policy2) + + call_command("mrf_check", "inbox") + + apply.assert_not_called() + + +def test_mrf_check_inbox_restrict_policies(mocker, mrf_inbox_registry, tmpfile): + payload = {"hello": "world"} + tmpfile.write(json.dumps(payload).encode()) + tmpfile.flush() + apply = mocker.spy(mrf_inbox_registry, "apply") + policy1 = mocker.Mock() + policy2 = mocker.Mock() + policy3 = mocker.Mock(return_value={"hello": "noop"}) + mrf_inbox_registry.register(name="policy1")(policy1) + mrf_inbox_registry.register(name="policy2")(policy2) + mrf_inbox_registry.register(name="policy3")(policy3) + + call_command("mrf_check", "inbox", tmpfile.name, policies=["policy1", "policy3"]) + + apply.assert_called_once_with(payload, policies=["policy1", "policy3"]) + policy1.assert_called_once_with(payload) + policy2.assert_not_called() + policy3.assert_called_once_with(policy1.return_value) + + +def test_mrf_check_inbox_db_activity(factories, mocker, mrf_inbox_registry): + payload = {"hello": "world"} + activity = factories["federation.Activity"](payload=payload) + + policy1 = mocker.Mock(return_value={"hello": "noop"}) + mrf_inbox_registry.register(name="policy1")(policy1) + + call_command("mrf_check", "inbox", activity.uuid) + + policy1.assert_called_once_with(payload) + + +def test_mrf_check_inbox_url(r_mock, mocker, mrf_inbox_registry): + payload = {"hello": "world"} + url = "http://test.hello/path" + r_mock.get(url, json=payload) + + policy1 = mocker.Mock(return_value={"hello": "noop"}) + mrf_inbox_registry.register(name="policy1")(policy1) + + call_command("mrf_check", "inbox", url) + + policy1.assert_called_once_with(payload) diff --git a/api/tests/moderation/test_mrf.py b/api/tests/moderation/test_mrf.py new file mode 100644 index 0000000000..15a4385a24 --- /dev/null +++ b/api/tests/moderation/test_mrf.py @@ -0,0 +1,67 @@ +from funkwhale_api.moderation import mrf + + +def test_mrf_inbox_registry_apply_empty(mrf_inbox_registry, mocker): + payload = {"hello": "world"} + new_payload, updated = mrf_inbox_registry.apply(payload) + + assert new_payload == payload + assert updated is False + + +def test_mrf_inbox_registry_apply_simple(mrf_inbox_registry, mocker): + rule = mocker.Mock(return_value="test") + payload = {"hello": "world"} + mrf_inbox_registry.register(rule, name="rule") + + new_payload, updated = mrf_inbox_registry.apply(payload) + + assert new_payload == "test" + assert updated is True + + +def test_mrf_inbox_registry_apply_skipped(mrf_inbox_registry, mocker): + rule = mocker.Mock(side_effect=mrf.Skip()) + payload = {"hello": "world"} + mrf_inbox_registry.register(rule, name="rule") + + new_payload, updated = mrf_inbox_registry.apply(payload) + + assert new_payload == payload + assert updated is False + + +def test_mrf_inbox_registry_apply_discard(mrf_inbox_registry, mocker): + rule1 = mocker.Mock(return_value=None) + rule2 = mocker.Mock(side_effect=mrf.Discard()) + + mrf_inbox_registry.register(rule1, name="rule1") + mrf_inbox_registry.register(rule2, name="rule2") + + payload = {"hello": "world"} + assert mrf_inbox_registry.apply(payload, arg1="value1") == (None, False) + + rule1.assert_called_once_with(payload, arg1="value1") + rule2.assert_called_once_with(payload, arg1="value1") + + +def test_mrf_inbox_registry_use_returned_payload(mrf_inbox_registry, mocker): + rule1 = mocker.Mock(return_value="payload1") + rule2 = mocker.Mock(return_value="payload2") + + mrf_inbox_registry.register(rule1, name="rule1") + mrf_inbox_registry.register(rule2, name="rule2") + + payload = {"hello": "world"} + + assert mrf_inbox_registry.apply(payload) == ("payload2", True) + rule1.assert_called_once_with(payload) + rule2.assert_called_once_with("payload1") + + +def test_mrf_inbox_registry_skip_errors(mrf_inbox_registry, mocker): + rule1 = mocker.Mock(side_effect=Exception()) + + mrf_inbox_registry.register(rule1, name="rule1") + + assert mrf_inbox_registry.apply("payload") == ("payload", False) diff --git a/api/tests/moderation/test_mrf_policies.py b/api/tests/moderation/test_mrf_policies.py new file mode 100644 index 0000000000..877123e6ba --- /dev/null +++ b/api/tests/moderation/test_mrf_policies.py @@ -0,0 +1,63 @@ +import pytest + +from funkwhale_api.moderation import mrf +from funkwhale_api.moderation import mrf_policies + + +@pytest.mark.parametrize( + "enabled, payload, kwargs, allowed_domains, expected", + [ + # allow listing enabled, domain on allowed list -> nothing happens + ( + True, + {"id": "http://allowed.example"}, + {"sender_id": "http://allowed.example/actor"}, + ["allowed.example"], + None, + ), + # allow listing enabled, domain NOT on allowed list -> message discarded + ( + True, + {"id": "http://notallowed.example"}, + {"sender_id": "http://notallowed.example/actor"}, + ["allowed.example"], + mrf.Discard, + ), + # allow listing disabled -> policy skipped + ( + False, + {"id": "http://allowed.example"}, + {"sender_id": "http://allowed.example/actor"}, + [], + mrf.Skip, + ), + # multiple domains to check, one is not allowed -> message discarded + ( + True, + {"id": "http://allowed.example"}, + {"sender_id": "http://notallowed.example/actor"}, + ["allowed.example"], + mrf.Discard, + ), + # multiple domains to check, all allowed -> nothing happens + ( + True, + {"id": "http://allowed.example"}, + {"sender_id": "http://anotherallowed.example/actor"}, + ["allowed.example", "anotherallowed.example"], + None, + ), + ], +) +def test_allow_list_policy( + enabled, payload, kwargs, expected, allowed_domains, preferences, factories +): + preferences["moderation__allow_list_enabled"] = enabled + for d in allowed_domains: + factories["federation.Domain"](name=d, allowed=True) + + if expected: + with pytest.raises(expected): + mrf_policies.check_allow_list(payload, **kwargs) + else: + assert mrf_policies.check_allow_list(payload, **kwargs) == expected diff --git a/dev.yml b/dev.yml index 7c58b91059..7e4adc8ffb 100644 --- a/dev.yml +++ b/dev.yml @@ -53,6 +53,7 @@ services: volumes: - ./api:/app - "${MUSIC_DIRECTORY_SERVE_PATH-./data/music}:/music:ro" + - "./data/plugins:/srv/funkwhale/plugins" environment: - "FUNKWHALE_HOSTNAME=${FUNKWHALE_HOSTNAME-localhost}" - "FUNKWHALE_HOSTNAME_SUFFIX=funkwhale.test" @@ -90,6 +91,7 @@ services: volumes: - ./api:/app - "${MUSIC_DIRECTORY_SERVE_PATH-./data/music}:/music:ro" + - "./data/plugins:/srv/funkwhale/plugins" networks: - internal nginx: diff --git a/docs/admin/index.rst b/docs/admin/index.rst index 8d80e3e0ad..fa66eeca62 100644 --- a/docs/admin/index.rst +++ b/docs/admin/index.rst @@ -26,6 +26,7 @@ Administration commands url upgrading + mrf Troubleshooting Issues ---------------------- diff --git a/docs/admin/mrf.rst b/docs/admin/mrf.rst new file mode 100644 index 0000000000..1d905e72b1 --- /dev/null +++ b/docs/admin/mrf.rst @@ -0,0 +1,117 @@ +Message Rewrite Facility (MRF) +============================== + +Funkwhale includes a feature that mimics `Pleroma's Message Rewrite Facility <https://docs-develop.pleroma.social/mrf.html>`_. +Using the MRF, instance admins can write and configure custom and automated moderation rules +that couldn't be implemented otherwise using :doc:`our other built-in moderation tools <../moderator/index>`. + +Architecture +------------ + +The MRF is a pluggable system that will process messages and forward those to the list +of registered policies, in turn. Each policy can mutate the message, leave it as is, or discard it entirely. + +Some of our built-in moderation tools are actually implemented as a MRF policy, e.g: + +- Allow-list, when checking incoming messages (`code <https://dev.funkwhale.audio/funkwhale/funkwhale/blob/develop/api/funkwhale_api/moderation/mrf_policies.py>`_) +- Domain and user blocking, when checking incoming messages (`code <https://dev.funkwhale.audio/funkwhale/funkwhale/blob/develop/api/funkwhale_api/federation/mrf_policies.py>`_) + +.. note:: + + While Pleroma MRF policies can also affect outgoing messages, this is not supported yet in Funkwhale. + + +Disclaimer +---------- + +Writing custom MRF can impact negatively the performance and stability of your pod, as well as message +delivery. Your policy will be called everytime a message is delivered, so ensure you don't execute +any slow operation here. + +Please note that the Funkwhale developers consider custom MRF policy modules to fall under the purview of the AGPL. As such, you are obligated to release the sources to your custom MRF policy modules upon request. + +Writing your first MRF policy +----------------------------- + +MRF Policies are written as Python 3 functions that take at least one ``payload`` parameter. +This payload is the raw ActivityPub message, received via HTTP, after the HTTP signature check. + +In the example below we write a policy that discards all Follow requests from listed domains: + +.. code-block:: python + + import urllib.parse + from funkwhale_api.moderation import mrf + + BLOCKED_FOLLOW_DOMAINS = ['domain1.com', 'botdomain.org'] + + # registering the policy is required to have it applied + # the name can be anything you want, it will appear in the mrf logs + @mrf.inbox.register(name='blocked_follow_domains') + def blocked_follow_domains_policy(payload, **kwargs): + actor_id = payload.get('actor') + domain = urllib.parse.urlparse(actor_id).hostname + if domain not in BLOCKED_FOLLOW_DOMAINS: + # raising mrf.Skip isn't strictly necessary but it provides + # for info in the debug logs. Otherwise, you can simply return + raise mrf.Skip("This domain isn't blocked") + + activity_type = payload.get('type') + object_type = payload.get('object', {}).get('type') + + if object_type == 'Follow' and activity_type == 'Create': + raise mrf.Discard('Follow from blocked domain') + + +This code must be stored in a Funkwhale plugin. To create one, just execute the following: + +.. code-block:: shell + + # plugin name must contain only ASCII letters, numbers and undercores + export PLUGIN_NAME="myplugin" + # this is the default path where Funkwhale will look for plugins + # if you want to use another path, update this path and ensure + # your PLUGINS_PATH is also included in your .env + export PLUGINS_PATH="/srv/funkwhale/plugins/" + mkdir -p $PLUGINS_PATH/$PLUGIN_NAME + cd $PLUGINS_PATH/$PLUGIN_NAME + + touch __init__.py # required to make the plugin a valid Python package + # create the required apps.py file to register our plugin in Funkwhale + cat > apps.py <<EOF + from django.apps import AppConfig + + class Plugin(AppConfig): + name = "$PLUGIN_NAME" + + EOF + +Once you have a Funkwhale plugin, simply put your MRF policy code inside a ``mrf_policies.py`` +file whithin the plugin directory. Then enable the plugin in your ``.env`` by +adding its name to the coma-separated list of ``FUNKWHALE_PLUGINS`` (add the variable if it's not there). + + +Testing a MRF policy +-------------------- + +To make the job of writing and debugging MRF policies easier, we provide a management +command: + +.. code-block:: shell + + python manage.py mrf_check --help + # list registered MRF policies + python manage.py mrf_check --list + + # check how our MRF would handle a legit follow + export MRF_MESSAGE='{"actor": "https://normal.domain/@alice", "type": "Create", "object": {"type": "Follow"}}' + echo $MRF_MESSAGE | python manage.py mrf_check inbox - -p blocked_follow_domains + + # check how our MRF would handle a problematic follow + export MRF_MESSAGE='{"actor": "https://botdomain.org/@bob", "type": "Create", "object": {"type": "Follow"}}' + echo $MRF_MESSAGE | python manage.py mrf_check inbox - -p blocked_follow_domains + + # check against an activity already present in the database + # you can get the UUID of activities by visiting /api/admin/federation/activity + export ACTIVITY_UUID="06208aea-c687-4e8b-aefd-22f1c3f76039" + echo $MRF_MESSAGE | python manage.py mrf_check inbox $ACTIVITY_UUID -p blocked_follow_domains -- GitLab