Commit 08262529 authored by Eliot Berriot's avatar Eliot Berriot 💬

MRF / Dropping of incoming/outgoing messages based on allow-list

parent 371dc012
......@@ -11,7 +11,8 @@ https://docs.djangoproject.com/en/dev/ref/settings/
from __future__ import absolute_import, unicode_literals
import datetime
import logging
import logging.config
import sys
from urllib.parse import urlsplit
......@@ -20,13 +21,44 @@ from celery.schedules import crontab
from funkwhale_api import __version__
logger = logging.getLogger(__name__)
logger = logging.getLogger("funkwhale_api.config")
ROOT_DIR = environ.Path(__file__) - 3 # (/a/b/myfile.py - 3 = /)
APPS_DIR = ROOT_DIR.path("funkwhale_api")
env = environ.Env()
LOGLEVEL = env("LOGLEVEL", default="info").upper()
LOGGING_CONFIG = None
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"console": {"format": "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"}
},
"handlers": {
"console": {"class": "logging.StreamHandler", "formatter": "console"},
# # Add Handler for Sentry for `warning` and above
# 'sentry': {
# 'level': 'WARNING',
# 'class': 'raven.contrib.django.raven_compat.handlers.SentryHandler',
# },
},
"loggers": {
"funkwhale_api": {
"level": LOGLEVEL,
"handlers": ["console"],
# required to avoid double logging with root logger
"propagate": False,
},
"": {"level": "WARNING", "handlers": ["console"]},
},
}
)
env_file = env("ENV_FILE", default=None)
if env_file:
logger.info("Loading specified env file at %s", env_file)
# we have an explicitely specified env file
# so we try to load and it fail loudly if it does not exist
env.read_env(env_file)
......@@ -49,6 +81,11 @@ else:
logger.info("Loaded env file at %s/.env", path)
break
FUNKWHALE_PLUGINS_PATH = env(
"FUNKWHALE_PLUGINS_PATH", default="/srv/funkwhale/plugins/"
)
sys.path.append(FUNKWHALE_PLUGINS_PATH)
FUNKWHALE_HOSTNAME = None
FUNKWHALE_HOSTNAME_SUFFIX = env("FUNKWHALE_HOSTNAME_SUFFIX", default=None)
FUNKWHALE_HOSTNAME_PREFIX = env("FUNKWHALE_HOSTNAME_PREFIX", default=None)
......@@ -146,7 +183,8 @@ if RAVEN_ENABLED:
"release": __version__,
}
THIRD_PARTY_APPS += ("raven.contrib.django.raven_compat",)
logging.getLogger("").addHandler("sentry")
logging.getLogger("funkwhale_api").addHandler("sentry")
# Apps specific for this project go here.
LOCAL_APPS = (
......@@ -160,7 +198,7 @@ LOCAL_APPS = (
"funkwhale_api.requests",
"funkwhale_api.favorites",
"funkwhale_api.federation",
"funkwhale_api.moderation",
"funkwhale_api.moderation.apps.ModerationConfig",
"funkwhale_api.radios",
"funkwhale_api.history",
"funkwhale_api.playlists",
......@@ -169,7 +207,19 @@ LOCAL_APPS = (
# See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS
PLUGINS = [p for p in env.list("FUNKWHALE_PLUGINS", default=[]) if p]
if PLUGINS:
logger.info("Running with the following plugins enabled: %s", ", ".join(PLUGINS))
else:
logger.info("Running with no plugins")
INSTALLED_APPS = (
DJANGO_APPS
+ THIRD_PARTY_APPS
+ LOCAL_APPS
+ tuple(["{}.apps.Plugin".format(p) for p in PLUGINS])
)
# MIDDLEWARE CONFIGURATION
# ------------------------------------------------------------------------------
......
......@@ -73,50 +73,4 @@ TEMPLATES[0]["OPTIONS"]["loaders"] = [
# ------------------------------------------------------------------------------
# Heroku URL does not pass the DB number, so we parse it in
# LOGGING CONFIGURATION
# ------------------------------------------------------------------------------
# See: https://docs.djangoproject.com/en/dev/ref/settings/#logging
# A sample logging configuration. The only tangible logging
# performed by this configuration is to send an email to
# the site admins on every HTTP 500 error when DEBUG=False.
# See http://docs.djangoproject.com/en/dev/topics/logging for
# more details on how to customize your logging configuration.
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"filters": {"require_debug_false": {"()": "django.utils.log.RequireDebugFalse"}},
"formatters": {
"verbose": {
"format": "%(levelname)s %(asctime)s %(module)s "
"%(process)d %(thread)d %(message)s"
}
},
"handlers": {
"mail_admins": {
"level": "ERROR",
"filters": ["require_debug_false"],
"class": "django.utils.log.AdminEmailHandler",
},
"console": {
"level": "DEBUG",
"class": "logging.StreamHandler",
"formatter": "verbose",
},
},
"loggers": {
"django.request": {
"handlers": ["mail_admins"],
"level": "ERROR",
"propagate": True,
},
"django.security.DisallowedHost": {
"level": "ERROR",
"handlers": ["console", "mail_admins"],
"propagate": True,
},
},
}
# Your production stuff: Below this line define 3rd party library settings
import uuid
import logging
import urllib.parse
from django.core.cache import cache
from django.conf import settings
......@@ -122,32 +123,38 @@ def receive(activity, on_behalf_of):
from . import serializers
from . import tasks
from .routes import inbox
from funkwhale_api.moderation import mrf
logger.debug(
"[federation] Received activity from %s : %s", on_behalf_of.fid, activity
)
# we ensure the activity has the bare minimum structure before storing
# it in our database
serializer = serializers.BaseActivitySerializer(
data=activity, context={"actor": on_behalf_of, "local_recipients": True}
)
serializer.is_valid(raise_exception=True)
if not inbox.get_matching_handlers(activity):
# discard unhandlable activity
return
if should_reject(
fid=serializer.validated_data.get("id"),
actor_id=serializer.validated_data["actor"].fid,
payload=activity,
):
payload, updated = mrf.inbox.apply(activity, sender_id=on_behalf_of.fid)
if not payload:
logger.info(
"[federation] Discarding activity due to instance policies %s",
"[federation] Discarding activity due to mrf %s",
serializer.validated_data.get("id"),
)
return
if not inbox.get_matching_handlers(payload):
# discard unhandlable activity
logger.debug(
"[federation] No matching route found for activity, discarding: %s", payload
)
return
try:
copy = serializer.save()
copy = serializer.save(payload=payload, type=payload["type"])
except IntegrityError:
logger.warning(
"[federation] Discarding already elivered activity %s",
"[federation] Discarding already delivered activity %s",
serializer.validated_data.get("id"),
)
return
......@@ -283,9 +290,19 @@ class OutboxRouter(Router):
and may yield data that should be persisted in the Activity model
for further delivery.
"""
from funkwhale_api.common import preferences
from . import models
from . import tasks
allow_list_enabled = preferences.get("moderation__allow_list_enabled")
allowed_domains = None
if allow_list_enabled:
allowed_domains = set(
models.Domain.objects.filter(allowed=True).values_list(
"name", flat=True
)
)
for route, handler in self.routes:
if not match_route(route, routing):
continue
......@@ -314,10 +331,10 @@ class OutboxRouter(Router):
a = models.Activity(**activity_data)
a.uuid = uuid.uuid4()
to_inbox_items, to_deliveries, new_to = prepare_deliveries_and_inbox_items(
to, "to"
to, "to", allowed_domains=allowed_domains
)
cc_inbox_items, cc_deliveries, new_cc = prepare_deliveries_and_inbox_items(
cc, "cc"
cc, "cc", allowed_domains=allowed_domains
)
if not any(
[to_inbox_items, to_deliveries, cc_inbox_items, cc_deliveries]
......@@ -374,7 +391,14 @@ def match_route(route, payload):
return True
def prepare_deliveries_and_inbox_items(recipient_list, type):
def is_allowed_url(url, allowed_domains):
return (
allowed_domains is None
or urllib.parse.urlparse(url).hostname in allowed_domains
)
def prepare_deliveries_and_inbox_items(recipient_list, type, allowed_domains=None):
"""
Given a list of recipients (
either actor instances, public adresses, a dictionnary with a "type" and "target"
......@@ -384,10 +408,12 @@ def prepare_deliveries_and_inbox_items(recipient_list, type):
"""
from . import models
if allowed_domains is not None:
allowed_domains = set(allowed_domains)
allowed_domains.add(settings.FEDERATION_HOSTNAME)
local_recipients = set()
remote_inbox_urls = set()
urls = []
for r in recipient_list:
if isinstance(r, models.Actor):
if r.is_local:
......@@ -432,7 +458,13 @@ def prepare_deliveries_and_inbox_items(recipient_list, type):
values = actors.values("shared_inbox_url", "inbox_url")
for v in values:
remote_inbox_urls.add(v["shared_inbox_url"] or v["inbox_url"])
deliveries = [models.Delivery(inbox_url=url) for url in remote_inbox_urls]
deliveries = [
models.Delivery(inbox_url=url)
for url in remote_inbox_urls
if is_allowed_url(url, allowed_domains)
]
urls = [url for url in urls if is_allowed_url(url, allowed_domains)]
inbox_items = [
models.InboxItem(actor=actor, type=type) for actor in local_recipients
]
......
......@@ -41,7 +41,7 @@ class FetchAdmin(admin.ModelAdmin):
@admin.register(models.Activity)
class ActivityAdmin(admin.ModelAdmin):
list_display = ["type", "fid", "url", "actor", "creation_date"]
list_display = ["uuid", "type", "fid", "url", "actor", "creation_date"]
search_fields = ["payload", "fid", "url", "actor__domain__name"]
list_filter = ["type", "actor__domain__name"]
actions = [redeliver_activities]
......
......@@ -70,6 +70,7 @@ def create_user(actor):
class DomainFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
name = factory.Faker("domain_name")
nodeinfo_fetch_date = factory.LazyFunction(lambda: timezone.now())
allowed = None
class Meta:
model = "federation.Domain"
......
from funkwhale_api.moderation import mrf
from . import activity
@mrf.inbox.register(name="instance_policies")
def instance_policies(payload, **kwargs):
reject = activity.should_reject(
fid=payload.get("id"),
actor_id=kwargs.get("sender_id", payload.get("id")),
payload=payload,
)
if reject:
raise mrf.Discard()
......@@ -4,7 +4,7 @@ from django.conf import settings
from django.db.models import Q
from funkwhale_api.common import session
from funkwhale_api.moderation import models as moderation_models
from funkwhale_api.moderation import mrf
from . import exceptions
from . import signing
......@@ -64,10 +64,10 @@ def slugify_username(username):
def retrieve_ap_object(
fid, actor, serializer_class=None, queryset=None, apply_instance_policies=True
):
from . import activity
policies = moderation_models.InstancePolicy.objects.active().filter(block_all=True)
if apply_instance_policies and policies.matching_url(fid):
# we have a duplicate check here because it's less expensive to do those checks
# twice than to trigger a HTTP request
payload, updated = mrf.inbox.apply({"id": fid})
if not payload:
raise exceptions.BlockedActorOrDomain()
if queryset:
try:
......@@ -94,15 +94,12 @@ def retrieve_ap_object(
response.raise_for_status()
data = response.json()
# we match against moderation policies here again, because the FID of the returned
# object may not be the same as the URL used to access it
try:
id = data["id"]
except KeyError:
pass
else:
if apply_instance_policies and activity.should_reject(fid=id, payload=data):
raise exceptions.BlockedActorOrDomain()
# we match against mrf here again, because new data may yield different
# results
data, updated = mrf.inbox.apply(data)
if not data:
raise exceptions.BlockedActorOrDomain()
if not serializer_class:
return data
serializer = serializer_class(data=data, context={"fetch_actor": actor})
......
from django.apps import AppConfig, apps
from . import mrf
class ModerationConfig(AppConfig):
name = "funkwhale_api.moderation"
def ready(self):
super().ready()
app_names = [app.name for app in apps.app_configs.values()]
mrf.inbox.autodiscover(app_names)
from dynamic_preferences import types
from dynamic_preferences.registries import global_preferences_registry
moderation = types.Section("Moderation")
moderation = types.Section("moderation")
@global_preferences_registry.register
......
import json
import sys
import uuid
import logging
from django.core.management.base import BaseCommand, CommandError
from django.core import validators
from django.conf import settings
from funkwhale_api.common import session
from funkwhale_api.federation import models
from funkwhale_api.moderation import mrf
def is_uuid(v):
try:
uuid.UUID(v)
except ValueError:
return False
return True
def is_url(v):
validator = validators.URLValidator()
try:
validator(v)
except (ValueError, validators.ValidationError):
return False
return True
class Command(BaseCommand):
help = "Check a given message against all or a specific MRF rule"
def add_arguments(self, parser):
parser.add_argument(
"type",
type=str,
choices=["inbox"],
help=("The type of MRF. Only inbox is supported at the moment"),
)
parser.add_argument(
"input",
nargs="?",
help=(
"The path to a file containing JSON data. Use - to read from stdin. "
"If no input is provided, registered MRF policies will be listed "
"instead.",
),
)
parser.add_argument(
"--policy",
"-p",
dest="policies",
nargs="+",
default=False,
help="Restrict to a list of MRF policies that will be applied, in that order",
)
def handle(self, *args, **options):
logger = logging.getLogger("funkwhale.mrf")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(stream=sys.stderr))
input = options["input"]
if not input:
registry = getattr(mrf, options["type"])
self.stdout.write(
"No input given, listing registered policies for '{}' MRF:".format(
options["type"]
)
)
for name in registry.keys():
self.stdout.write("- {}".format(name))
return
raw_content = None
content = None
if input == "-":
raw_content = sys.stdin.read()
elif is_uuid(input):
self.stderr.write("UUID provided, retrieving payload from db")
content = models.Activity.objects.get(uuid=input).payload
elif is_url(input):
response = session.get_session().get(
input,
timeout=5,
verify=settings.EXTERNAL_REQUESTS_VERIFY_SSL,
headers={"Content-Type": "application/activity+json"},
)
response.raise_for_status()
content = response.json()
else:
with open(input, "rb") as f:
raw_content = f.read()
content = json.loads(raw_content) if content is None else content
policies = options["policies"] or []
registry = getattr(mrf, options["type"])
for policy in policies:
if policy not in registry:
raise CommandError(
"Unknown policy '{}' for MRF '{}'".format(policy, options["type"])
)
payload, updated = registry.apply(content, policies=policies)
if not payload:
self.stderr.write("Payload was discarded by MRF")
elif updated:
self.stderr.write("Payload was modified by MRF")
self.stderr.write("Initial payload:\n")
self.stdout.write(json.dumps(content, indent=2, sort_keys=True))
self.stderr.write("Modified payload:\n")
self.stdout.write(json.dumps(payload, indent=2, sort_keys=True))
else:
self.stderr.write("Payload left untouched by MRF")
"""
Inspired from the MRF logic from Pleroma, see https://docs-develop.pleroma.social/mrf.html
To support pluggable / customizable moderation using a programming language if
our exposed features aren't enough.
"""
import logging
import persisting_theory
logger = logging.getLogger("funkwhale.mrf")
class MRFException(Exception):
pass
class Discard(MRFException):
pass
class Skip(MRFException):
pass
class Registry(persisting_theory.Registry):
look_into = "mrf_policies"
def __init__(self, name=""):
self.name = name
super().__init__()
def apply(self, payload, **kwargs):
policy_names = kwargs.pop("policies", [])
if not policy_names:
policies = self.items()
else:
logger.debug(
"[MRF.%s] Running restricted list of policies %s…",
self.name,
", ".join(policy_names),
)
policies = [(name, self[name]) for name in policy_names]
updated = False
for policy_name, policy in policies:
logger.debug("[MRF.%s] Applying mrf policy '%s'…", self.name, policy_name)
try:
new_payload = policy(payload, **kwargs)
except Skip as e:
logger.debug(
"[MRF.%s] Skipped policy %s because '%s'",
self.name,
policy_name,
str(e),
)
continue
except Discard as e:
logger.info(
"[MRF.%s] Discarded message per policy '%s' because '%s'",
self.name,
policy_name,
str(e),
)
return (None, False)
except Exception:
logger.exception(
"[MRF.%s] Error while applying policy '%s'!", self.name, policy_name
)
continue
if new_payload:
updated = True
payload = new_payload
return payload, updated
inbox = Registry("inbox")
import urllib.parse
from funkwhale_api.common import preferences
from funkwhale_api.common import utils
from funkwhale_api.federation import models as federation_models
from funkwhale_api.moderation import mrf
@mrf.inbox.register(name="allow_list")
def check_allow_list(payload, **kwargs):
"""
A MRF policy that only works when the moderation__allow_list_enabled
setting is on.
It will extract domain names from the activity ID, actor ID and activity object ID
and discard the activity if any of those domain names isn't on the allow list.
"""
if not preferences.get("moderation__allow_list_enabled"):
raise mrf.Skip("Allow-listing is disabled")
allowed_domains = set(
federation_models.Domain.objects.filter(allowed=True).values_list(
"name", flat=True
)
)
relevant_ids = [
payload.get("actor"),
kwargs.get("sender_id", payload.get("id")),
utils.recursive_getattr(payload, "object.id", permissive=True),
]
relevant_domains = set(
[
domain
for domain in [urllib.parse.urlparse(i).hostname for i in relevant_ids if i]
if domain
]
)
if relevant_domains - allowed_domains:
raise mrf.Discard(
"These domains are not allowed: {}".format(
", ".join(relevant_domains - allowed_domains)
)
)
......@@ -25,3 +25,4 @@ env =
CREATE_IMAGE_THUMBNAILS=False
FORCE_HTTPS_URLS=False
FUNKWHALE_SPA_HTML_ROOT=http://noop/
PROXY_MEDIA=true
......@@ -29,6 +29,7 @@ from rest_framework.test import APIClient, APIRequestFactory
from funkwhale_api.activity import record
from funkwhale_api.federation import actors
from funkwhale_api.moderation import mrf
pytest_plugins = "aiohttp.pytest_plugin"
......@@ -422,3 +423,17 @@ def a_responses():
@pytest.fixture
def service_actor(db):
return actors.get_service_actor()
@pytest.fixture
def mrf_inbox_registry(mocker):
registry = mrf.Registry()
mocker.patch("funkwhale_api.moderation.mrf.inbox", registry)
return registry
@pytest.fixture
def mrf_outbox_registry(mocker):
registry = mrf.Registry()
mocker.patch("funkwhale_api.moderation.mrf.outbox", registry)
return registry
......@@ -13,10 +13,13 @@ from funkwhale_api.federation import (
)
def test_receive_validates_basic_attributes_and_stores_activity(factories, now, mocker):
def test_receive_validates_basic_attributes_and_stores_activity(
mrf_inbox_registry, factories, now, mocker
):
mocker.patch.object(
activity.InboxRouter, "get_matching_handlers", return_value=True
)
mrf_inbox_registry_apply = mocker.spy(mrf_inbox_registry, "apply")
mocked_dispatch = mocker.patch("funkwhale_api.common.utils.on_commit")
local_to_actor = factories["users.User"]().create_actor()
local_cc_actor = factories["users.User"]().create_actor()
......@@ -31,6 +34,7 @@ def test_receive_validates_basic_attributes_and_stores_activity(factories, now,
}
copy = activity.receive(activity=a, on_behalf_of=remote_actor)
mrf_inbox_registry_apply.assert_called_once_with(a, sender_id=a["actor"])
assert copy.payload == a
assert copy.creation_date >= now
......@@ -49,6 +53,63 @@ def test_receive_validates_basic_attributes_and_stores_activity(factories, now,
assert ii.is_read is False
def test_receive_uses_mrf_returned_payload(mrf_inbox_registry, factories, now, mocker):
mocker.patch.object(
activity.InboxRouter, "get_matching_handlers", return_value=True
)
def patched_apply(payload, **kwargs):
payload["type"] = "SomethingElse"
return payload, True
mrf_inbox_registry_apply = mocker.patch.object(
mrf_inbox_registry, "apply", side_effect=patched_apply
)
mocked_dispatch = mocker.patch("funkwhale_api.common.utils.on_commit")
local_to_actor = factories["use