Commit 08262529 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

MRF / Dropping of incoming/outgoing messages based on allow-list

parent 371dc012
......@@ -11,7 +11,8 @@ https://docs.djangoproject.com/en/dev/ref/settings/
from __future__ import absolute_import, unicode_literals
import datetime
import logging
import logging.config
import sys
from urllib.parse import urlsplit
......@@ -20,13 +21,44 @@ from celery.schedules import crontab
from funkwhale_api import __version__
logger = logging.getLogger(__name__)
logger = logging.getLogger("funkwhale_api.config")
ROOT_DIR = environ.Path(__file__) - 3 # (/a/b/myfile.py - 3 = /)
APPS_DIR = ROOT_DIR.path("funkwhale_api")
env = environ.Env()
LOGLEVEL = env("LOGLEVEL", default="info").upper()
LOGGING_CONFIG = None
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"console": {"format": "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"}
},
"handlers": {
"console": {"class": "logging.StreamHandler", "formatter": "console"},
# # Add Handler for Sentry for `warning` and above
# 'sentry': {
# 'level': 'WARNING',
# 'class': 'raven.contrib.django.raven_compat.handlers.SentryHandler',
# },
},
"loggers": {
"funkwhale_api": {
"level": LOGLEVEL,
"handlers": ["console"],
# required to avoid double logging with root logger
"propagate": False,
},
"": {"level": "WARNING", "handlers": ["console"]},
},
}
)
env_file = env("ENV_FILE", default=None)
if env_file:
logger.info("Loading specified env file at %s", env_file)
# we have an explicitely specified env file
# so we try to load and it fail loudly if it does not exist
env.read_env(env_file)
......@@ -49,6 +81,11 @@ else:
logger.info("Loaded env file at %s/.env", path)
break
FUNKWHALE_PLUGINS_PATH = env(
"FUNKWHALE_PLUGINS_PATH", default="/srv/funkwhale/plugins/"
)
sys.path.append(FUNKWHALE_PLUGINS_PATH)
FUNKWHALE_HOSTNAME = None
FUNKWHALE_HOSTNAME_SUFFIX = env("FUNKWHALE_HOSTNAME_SUFFIX", default=None)
FUNKWHALE_HOSTNAME_PREFIX = env("FUNKWHALE_HOSTNAME_PREFIX", default=None)
......@@ -146,7 +183,8 @@ if RAVEN_ENABLED:
"release": __version__,
}
THIRD_PARTY_APPS += ("raven.contrib.django.raven_compat",)
logging.getLogger("").addHandler("sentry")
logging.getLogger("funkwhale_api").addHandler("sentry")
# Apps specific for this project go here.
LOCAL_APPS = (
......@@ -160,7 +198,7 @@ LOCAL_APPS = (
"funkwhale_api.requests",
"funkwhale_api.favorites",
"funkwhale_api.federation",
"funkwhale_api.moderation",
"funkwhale_api.moderation.apps.ModerationConfig",
"funkwhale_api.radios",
"funkwhale_api.history",
"funkwhale_api.playlists",
......@@ -169,7 +207,19 @@ LOCAL_APPS = (
# See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS
PLUGINS = [p for p in env.list("FUNKWHALE_PLUGINS", default=[]) if p]
if PLUGINS:
logger.info("Running with the following plugins enabled: %s", ", ".join(PLUGINS))
else:
logger.info("Running with no plugins")
INSTALLED_APPS = (
DJANGO_APPS
+ THIRD_PARTY_APPS
+ LOCAL_APPS
+ tuple(["{}.apps.Plugin".format(p) for p in PLUGINS])
)
# MIDDLEWARE CONFIGURATION
# ------------------------------------------------------------------------------
......
......@@ -73,50 +73,4 @@ TEMPLATES[0]["OPTIONS"]["loaders"] = [
# ------------------------------------------------------------------------------
# Heroku URL does not pass the DB number, so we parse it in
# LOGGING CONFIGURATION
# ------------------------------------------------------------------------------
# See: https://docs.djangoproject.com/en/dev/ref/settings/#logging
# A sample logging configuration. The only tangible logging
# performed by this configuration is to send an email to
# the site admins on every HTTP 500 error when DEBUG=False.
# See http://docs.djangoproject.com/en/dev/topics/logging for
# more details on how to customize your logging configuration.
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"filters": {"require_debug_false": {"()": "django.utils.log.RequireDebugFalse"}},
"formatters": {
"verbose": {
"format": "%(levelname)s %(asctime)s %(module)s "
"%(process)d %(thread)d %(message)s"
}
},
"handlers": {
"mail_admins": {
"level": "ERROR",
"filters": ["require_debug_false"],
"class": "django.utils.log.AdminEmailHandler",
},
"console": {
"level": "DEBUG",
"class": "logging.StreamHandler",
"formatter": "verbose",
},
},
"loggers": {
"django.request": {
"handlers": ["mail_admins"],
"level": "ERROR",
"propagate": True,
},
"django.security.DisallowedHost": {
"level": "ERROR",
"handlers": ["console", "mail_admins"],
"propagate": True,
},
},
}
# Your production stuff: Below this line define 3rd party library settings
import uuid
import logging
import urllib.parse
from django.core.cache import cache
from django.conf import settings
......@@ -122,32 +123,38 @@ def receive(activity, on_behalf_of):
from . import serializers
from . import tasks
from .routes import inbox
from funkwhale_api.moderation import mrf
logger.debug(
"[federation] Received activity from %s : %s", on_behalf_of.fid, activity
)
# we ensure the activity has the bare minimum structure before storing
# it in our database
serializer = serializers.BaseActivitySerializer(
data=activity, context={"actor": on_behalf_of, "local_recipients": True}
)
serializer.is_valid(raise_exception=True)
if not inbox.get_matching_handlers(activity):
# discard unhandlable activity
return
if should_reject(
fid=serializer.validated_data.get("id"),
actor_id=serializer.validated_data["actor"].fid,
payload=activity,
):
payload, updated = mrf.inbox.apply(activity, sender_id=on_behalf_of.fid)
if not payload:
logger.info(
"[federation] Discarding activity due to instance policies %s",
"[federation] Discarding activity due to mrf %s",
serializer.validated_data.get("id"),
)
return
if not inbox.get_matching_handlers(payload):
# discard unhandlable activity
logger.debug(
"[federation] No matching route found for activity, discarding: %s", payload
)
return
try:
copy = serializer.save()
copy = serializer.save(payload=payload, type=payload["type"])
except IntegrityError:
logger.warning(
"[federation] Discarding already elivered activity %s",
"[federation] Discarding already delivered activity %s",
serializer.validated_data.get("id"),
)
return
......@@ -283,9 +290,19 @@ class OutboxRouter(Router):
and may yield data that should be persisted in the Activity model
for further delivery.
"""
from funkwhale_api.common import preferences
from . import models
from . import tasks
allow_list_enabled = preferences.get("moderation__allow_list_enabled")
allowed_domains = None
if allow_list_enabled:
allowed_domains = set(
models.Domain.objects.filter(allowed=True).values_list(
"name", flat=True
)
)
for route, handler in self.routes:
if not match_route(route, routing):
continue
......@@ -314,10 +331,10 @@ class OutboxRouter(Router):
a = models.Activity(**activity_data)
a.uuid = uuid.uuid4()
to_inbox_items, to_deliveries, new_to = prepare_deliveries_and_inbox_items(
to, "to"
to, "to", allowed_domains=allowed_domains
)
cc_inbox_items, cc_deliveries, new_cc = prepare_deliveries_and_inbox_items(
cc, "cc"
cc, "cc", allowed_domains=allowed_domains
)
if not any(
[to_inbox_items, to_deliveries, cc_inbox_items, cc_deliveries]
......@@ -374,7 +391,14 @@ def match_route(route, payload):
return True
def prepare_deliveries_and_inbox_items(recipient_list, type):
def is_allowed_url(url, allowed_domains):
return (
allowed_domains is None
or urllib.parse.urlparse(url).hostname in allowed_domains
)
def prepare_deliveries_and_inbox_items(recipient_list, type, allowed_domains=None):
"""
Given a list of recipients (
either actor instances, public adresses, a dictionnary with a "type" and "target"
......@@ -384,10 +408,12 @@ def prepare_deliveries_and_inbox_items(recipient_list, type):
"""
from . import models
if allowed_domains is not None:
allowed_domains = set(allowed_domains)
allowed_domains.add(settings.FEDERATION_HOSTNAME)
local_recipients = set()
remote_inbox_urls = set()
urls = []
for r in recipient_list:
if isinstance(r, models.Actor):
if r.is_local:
......@@ -432,7 +458,13 @@ def prepare_deliveries_and_inbox_items(recipient_list, type):
values = actors.values("shared_inbox_url", "inbox_url")
for v in values:
remote_inbox_urls.add(v["shared_inbox_url"] or v["inbox_url"])
deliveries = [models.Delivery(inbox_url=url) for url in remote_inbox_urls]
deliveries = [
models.Delivery(inbox_url=url)
for url in remote_inbox_urls
if is_allowed_url(url, allowed_domains)
]
urls = [url for url in urls if is_allowed_url(url, allowed_domains)]
inbox_items = [
models.InboxItem(actor=actor, type=type) for actor in local_recipients
]
......
......@@ -41,7 +41,7 @@ class FetchAdmin(admin.ModelAdmin):
@admin.register(models.Activity)
class ActivityAdmin(admin.ModelAdmin):
list_display = ["type", "fid", "url", "actor", "creation_date"]
list_display = ["uuid", "type", "fid", "url", "actor", "creation_date"]
search_fields = ["payload", "fid", "url", "actor__domain__name"]
list_filter = ["type", "actor__domain__name"]
actions = [redeliver_activities]
......
......@@ -70,6 +70,7 @@ def create_user(actor):
class DomainFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
name = factory.Faker("domain_name")
nodeinfo_fetch_date = factory.LazyFunction(lambda: timezone.now())
allowed = None
class Meta:
model = "federation.Domain"
......
from funkwhale_api.moderation import mrf
from . import activity
@mrf.inbox.register(name="instance_policies")
def instance_policies(payload, **kwargs):
reject = activity.should_reject(
fid=payload.get("id"),
actor_id=kwargs.get("sender_id", payload.get("id")),
payload=payload,
)
if reject:
raise mrf.Discard()
......@@ -4,7 +4,7 @@ from django.conf import settings
from django.db.models import Q
from funkwhale_api.common import session
from funkwhale_api.moderation import models as moderation_models
from funkwhale_api.moderation import mrf
from . import exceptions
from . import signing
......@@ -64,10 +64,10 @@ def slugify_username(username):
def retrieve_ap_object(
fid, actor, serializer_class=None, queryset=None, apply_instance_policies=True
):
from . import activity
policies = moderation_models.InstancePolicy.objects.active().filter(block_all=True)
if apply_instance_policies and policies.matching_url(fid):
# we have a duplicate check here because it's less expensive to do those checks
# twice than to trigger a HTTP request
payload, updated = mrf.inbox.apply({"id": fid})
if not payload:
raise exceptions.BlockedActorOrDomain()
if queryset:
try:
......@@ -94,15 +94,12 @@ def retrieve_ap_object(
response.raise_for_status()
data = response.json()
# we match against moderation policies here again, because the FID of the returned
# object may not be the same as the URL used to access it
try:
id = data["id"]
except KeyError:
pass
else:
if apply_instance_policies and activity.should_reject(fid=id, payload=data):
raise exceptions.BlockedActorOrDomain()
# we match against mrf here again, because new data may yield different
# results
data, updated = mrf.inbox.apply(data)
if not data:
raise exceptions.BlockedActorOrDomain()
if not serializer_class:
return data
serializer = serializer_class(data=data, context={"fetch_actor": actor})
......
from django.apps import AppConfig, apps
from . import mrf
class ModerationConfig(AppConfig):
name = "funkwhale_api.moderation"
def ready(self):
super().ready()
app_names = [app.name for app in apps.app_configs.values()]
mrf.inbox.autodiscover(app_names)
from dynamic_preferences import types
from dynamic_preferences.registries import global_preferences_registry
moderation = types.Section("Moderation")
moderation = types.Section("moderation")
@global_preferences_registry.register
......
import json
import sys
import uuid
import logging
from django.core.management.base import BaseCommand, CommandError
from django.core import validators
from django.conf import settings
from funkwhale_api.common import session
from funkwhale_api.federation import models
from funkwhale_api.moderation import mrf
def is_uuid(v):
try:
uuid.UUID(v)
except ValueError:
return False
return True
def is_url(v):
validator = validators.URLValidator()
try:
validator(v)
except (ValueError, validators.ValidationError):
return False
return True
class Command(BaseCommand):
help = "Check a given message against all or a specific MRF rule"
def add_arguments(self, parser):
parser.add_argument(
"type",
type=str,
choices=["inbox"],
help=("The type of MRF. Only inbox is supported at the moment"),
)
parser.add_argument(
"input",
nargs="?",
help=(
"The path to a file containing JSON data. Use - to read from stdin. "
"If no input is provided, registered MRF policies will be listed "
"instead.",
),
)
parser.add_argument(
"--policy",
"-p",
dest="policies",
nargs="+",
default=False,
help="Restrict to a list of MRF policies that will be applied, in that order",
)
def handle(self, *args, **options):
logger = logging.getLogger("funkwhale.mrf")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(stream=sys.stderr))
input = options["input"]
if not input:
registry = getattr(mrf, options["type"])
self.stdout.write(
"No input given, listing registered policies for '{}' MRF:".format(
options["type"]
)
)
for name in registry.keys():
self.stdout.write("- {}".format(name))
return
raw_content = None
content = None
if input == "-":
raw_content = sys.stdin.read()
elif is_uuid(input):
self.stderr.write("UUID provided, retrieving payload from db")
content = models.Activity.objects.get(uuid=input).payload
elif is_url(input):
response = session.get_session().get(
input,
timeout=5,
verify=settings.EXTERNAL_REQUESTS_VERIFY_SSL,
headers={"Content-Type": "application/activity+json"},
)
response.raise_for_status()
content = response.json()
else:
with open(input, "rb") as f:
raw_content = f.read()
content = json.loads(raw_content) if content is None else content
policies = options["policies"] or []
registry = getattr(mrf, options["type"])
for policy in policies:
if policy not in registry:
raise CommandError(
"Unknown policy '{}' for MRF '{}'".format(policy, options["type"])
)
payload, updated = registry.apply(content, policies=policies)
if not payload:
self.stderr.write("Payload was discarded by MRF")
elif updated:
self.stderr.write("Payload was modified by MRF")
self.stderr.write("Initial payload:\n")
self.stdout.write(json.dumps(content, indent=2, sort_keys=True))
self.stderr.write("Modified payload:\n")
self.stdout.write(json.dumps(payload, indent=2, sort_keys=True))
else:
self.stderr.write("Payload left untouched by MRF")
"""
Inspired from the MRF logic from Pleroma, see https://docs-develop.pleroma.social/mrf.html
To support pluggable / customizable moderation using a programming language if
our exposed features aren't enough.
"""
import logging
import persisting_theory
logger = logging.getLogger("funkwhale.mrf")
class MRFException(Exception):
pass
class Discard(MRFException):
pass
class Skip(MRFException):
pass
class Registry(persisting_theory.Registry):
look_into = "mrf_policies"
def __init__(self, name=""):
self.name = name
super().__init__()
def apply(self, payload, **kwargs):
policy_names = kwargs.pop("policies", [])
if not policy_names:
policies = self.items()
else:
logger.debug(
"[MRF.%s] Running restricted list of policies %s…",
self.name,
", ".join(policy_names),
)
policies = [(name, self[name]) for name in policy_names]
updated = False
for policy_name, policy in policies:
logger.debug("[MRF.%s] Applying mrf policy '%s'…", self.name, policy_name)
try:
new_payload = policy(payload, **kwargs)
except Skip as e:
logger.debug(
"[MRF.%s] Skipped policy %s because '%s'",
self.name,
policy_name,
str(e),
)
continue
except Discard as e:
logger.info(
"[MRF.%s] Discarded message per policy '%s' because '%s'",
self.name,
policy_name,
str(e),
)
return (None, False)
except Exception:
logger.exception(
"[MRF.%s] Error while applying policy '%s'!", self.name, policy_name
)
continue
if new_payload:
updated = True
payload = new_payload
return payload, updated
inbox = Registry("inbox")
import urllib.parse
from funkwhale_api.common import preferences
from funkwhale_api.common import utils
from funkwhale_api.federation import models as federation_models