Verified Commit 4e7f1e63 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Denormalized audio permission logic in a separate table to enhance performance

parent afbf7151
......@@ -851,7 +851,9 @@ MUSIC_DIRECTORY_PATH = env("MUSIC_DIRECTORY_PATH", default=None)
MUSIC_DIRECTORY_SERVE_PATH = env(
"MUSIC_DIRECTORY_SERVE_PATH", default=MUSIC_DIRECTORY_PATH
)
# When this is set to default=True, we need to reenable migration music/0042
# to ensure data is populated correctly on existing pods
MUSIC_USE_DENORMALIZATION = env.bool("MUSIC_USE_DENORMALIZATION", default=False)
USERS_INVITATION_EXPIRATION_DAYS = env.int(
"USERS_INVITATION_EXPIRATION_DAYS", default=14
)
......
......@@ -41,6 +41,22 @@ DEBUG_TOOLBAR_CONFIG = {
"SHOW_TOOLBAR_CALLBACK": lambda request: True,
"JQUERY_URL": "/staticfiles/admin/js/vendor/jquery/jquery.js",
}
# DEBUG_TOOLBAR_PANELS = [
# 'debug_toolbar.panels.versions.VersionsPanel',
# 'debug_toolbar.panels.timer.TimerPanel',
# 'debug_toolbar.panels.settings.SettingsPanel',
# 'debug_toolbar.panels.headers.HeadersPanel',
# 'debug_toolbar.panels.request.RequestPanel',
# 'debug_toolbar.panels.sql.SQLPanel',
# 'debug_toolbar.panels.staticfiles.StaticFilesPanel',
# 'debug_toolbar.panels.templates.TemplatesPanel',
# 'debug_toolbar.panels.cache.CachePanel',
# 'debug_toolbar.panels.signals.SignalsPanel',
# 'debug_toolbar.panels.logging.LoggingPanel',
# 'debug_toolbar.panels.redirects.RedirectsPanel',
# 'debug_toolbar.panels.profiling.ProfilingPanel',
# 'debug_toolbar_line_profiler.panel.ProfilingPanel'
# ]
# django-extensions
# ------------------------------------------------------------------------------
......
......@@ -165,6 +165,9 @@ class MusicLibraryFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
class Meta:
model = "music.Library"
class Params:
local = factory.Trait(actor=factory.SubFactory(ActorFactory, local=True))
@registry.register
class LibraryScanFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
......
......@@ -9,6 +9,8 @@ from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import ObjectDoesNotExist
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models
from django.db.models.signals import post_save, pre_save, post_delete
from django.dispatch import receiver
from django.utils import timezone
from django.urls import reverse
......@@ -554,3 +556,41 @@ class LibraryTrack(models.Model):
def get_metadata(self, key):
return self.metadata.get(key)
@receiver(pre_save, sender=LibraryFollow)
def set_approved_updated(sender, instance, update_fields, **kwargs):
if not instance.pk or not instance.actor.is_local:
return
if update_fields is not None and "approved" not in update_fields:
return
db_value = instance.__class__.objects.filter(pk=instance.pk).values_list(
"approved", flat=True
)[0]
if db_value != instance.approved:
# Needed to update denormalized permissions
setattr(instance, "_approved_updated", True)
@receiver(post_save, sender=LibraryFollow)
def update_denormalization_follow_approved(sender, instance, created, **kwargs):
from funkwhale_api.music import models as music_models
updated = getattr(instance, "_approved_updated", False)
if (created or updated) and instance.actor.is_local:
music_models.TrackActor.create_entries(
instance.target,
actor_ids=[instance.actor.pk],
delete_existing=not instance.approved,
)
@receiver(post_delete, sender=LibraryFollow)
def update_denormalization_follow_deleted(sender, instance, **kwargs):
from funkwhale_api.music import models as music_models
if instance.actor.is_local:
music_models.TrackActor.objects.filter(
actor=instance.actor, upload__in=instance.target.uploads.all()
).delete()
from argparse import RawTextHelpFormatter
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import transaction
from django.db.models import Q
from funkwhale_api.music.models import TrackActor, Library
from funkwhale_api.federation.models import Actor
class Command(BaseCommand):
help = """
Rebuild audio permission table. You shouldn't have to do this by hand, but if you face
any weird things (tracks still shown when they shouldn't, or tracks not shown when they should),
this may help.
"""
def create_parser(self, *args, **kwargs):
parser = super().create_parser(*args, **kwargs)
parser.formatter_class = RawTextHelpFormatter
return parser
def add_arguments(self, parser):
parser.add_argument(
"username",
nargs='*',
help="Rebuild only for given users",
)
@transaction.atomic
def handle(self, *args, **options):
actor_ids = []
if options['username']:
actors = Actor.objects.all().local(True)
actor_ids = list(actors.filter(preferred_username__in=options['username']).values_list('id', flat=True))
if len(actor_ids) < len(options['username']):
raise CommandError('Invalid username')
print('Emptying permission table for specified users…')
qs = TrackActor.objects.all().filter(Q(actor__pk__in=actor_ids) | Q(actor=None))
qs._raw_delete(qs.db)
else:
print('Emptying permission table…')
qs = TrackActor.objects.all()
qs._raw_delete(qs.db)
libraries = Library.objects.all()
objs = []
total_libraries = len(libraries)
for i, library in enumerate(libraries):
print('[{}/{}] Populating permission table for library {}'.format(i + 1, total_libraries, library.pk))
objs += TrackActor.get_objs(
library=library,
actor_ids=actor_ids,
upload_and_track_ids=[],
)
print('Commiting changes…')
TrackActor.objects.bulk_create(objs, batch_size=5000, ignore_conflicts=True)
# Generated by Django 2.2.5 on 2019-10-21 13:18
from django.conf import settings
import django.contrib.postgres.fields.jsonb
import django.core.serializers.json
from django.db import migrations, models
import django.db.models.deletion
import funkwhale_api.music.models
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('music', '0039_auto_20190423_0820'),
]
operations = [
migrations.CreateModel(
name='TrackActor',
fields=[
('id', models.BigAutoField(primary_key=True, serialize=False)),
('internal', models.BooleanField(db_index=True, default=False)),
('track', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='track_actor_items', to='music.Track')),
('upload', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='track_actor_items', to='music.Upload')),
('actor', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='track_actor_items', to='federation.Actor')),
],
options={
'unique_together': {('track', 'actor', 'internal', 'upload')},
},
),
]
# Generated by Django 2.2.5 on 2019-10-21 17:05
import django.contrib.postgres.fields.jsonb
import django.core.serializers.json
from django.db import migrations, models
import django.db.models.deletion
import funkwhale_api.music.models
class Migration(migrations.Migration):
dependencies = [
('music', '0040_auto_20191021_1318'),
]
operations = [
migrations.AlterField(
model_name='album',
name='release_date',
field=models.DateField(blank=True, db_index=True, null=True),
),
migrations.AlterField(
model_name='upload',
name='from_activity',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='federation.Activity'),
),
migrations.AlterField(
model_name='upload',
name='import_details',
field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, default=funkwhale_api.music.models.empty_dict, encoder=django.core.serializers.json.DjangoJSONEncoder, max_length=50000),
),
migrations.AlterField(
model_name='upload',
name='import_metadata',
field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, default=funkwhale_api.music.models.empty_dict, encoder=django.core.serializers.json.DjangoJSONEncoder, max_length=50000),
),
migrations.AlterField(
model_name='upload',
name='metadata',
field=django.contrib.postgres.fields.jsonb.JSONField(blank=True, default=funkwhale_api.music.models.empty_dict, encoder=django.core.serializers.json.DjangoJSONEncoder, max_length=50000),
),
migrations.AlterField(
model_name='uploadversion',
name='mimetype',
field=models.CharField(choices=[('video/ogg', 'ogg'), ('audio/ogg', 'ogg'), ('audio/opus', 'opus'), ('audio/mpeg', 'mp3'), ('audio/x-m4a', 'aac'), ('audio/x-m4a', 'm4a'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50),
),
]
# -*- coding: utf-8 -*-
"""
This migration is disabled until settings.MUSIC_USE_DENORMALIZATION is set to default=True
"""
from __future__ import unicode_literals
from django.db import migrations
from funkwhale_api.music.utils import guess_mimetype
def denormalize(apps, schema_editor):
Upload = apps.get_model("music", "Upload")
if not Upload.objects.count():
print('Skipping…')
from funkwhale_api.music.models import TrackActor, Library
libraries = Library.objects.all()
objs = []
total_libraries = len(libraries)
for i, library in enumerate(libraries):
print('[{}/{}] Populating permission table for library {}'.format(i+1, total_libraries, library.pk))
objs += TrackActor.get_objs(
library=library,
actor_ids=[],
upload_and_track_ids=[],
)
print('Commiting changes…')
TrackActor.objects.bulk_create(objs, batch_size=5000, ignore_conflicts=True)
def rewind(apps, schema_editor):
pass
class Migration(migrations.Migration):
dependencies = [("music", "0041_auto_20191021_1705")]
operations = [migrations.RunPython(denormalize, rewind)]
......@@ -14,7 +14,7 @@ from django.contrib.postgres.fields import JSONField
from django.core.files.base import ContentFile
from django.core.serializers.json import DjangoJSONEncoder
from django.db import models, transaction
from django.db.models.signals import post_save
from django.db.models.signals import post_save, pre_save
from django.dispatch import receiver
from django.urls import reverse
from django.utils import timezone
......@@ -185,8 +185,8 @@ class ArtistQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
return self.annotate(is_playable_by_actor=subquery)
def playable_by(self, actor, include=True):
tracks = Track.objects.playable_by(actor, include)
matches = self.filter(tracks__in=tracks).values_list("pk")
tracks = Track.objects.playable_by(actor)
matches = self.filter(pk__in=tracks.values("artist_id")).values_list("pk")
if include:
return self.filter(pk__in=matches)
else:
......@@ -269,8 +269,8 @@ class AlbumQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
return self.annotate(is_playable_by_actor=subquery)
def playable_by(self, actor, include=True):
tracks = Track.objects.playable_by(actor, include)
matches = self.filter(tracks__in=tracks).values_list("pk")
tracks = Track.objects.playable_by(actor)
matches = self.filter(pk__in=tracks.values("album_id")).values_list("pk")
if include:
return self.filter(pk__in=matches)
else:
......@@ -418,6 +418,7 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
return self.select_related().select_related("album__artist", "artist")
def annotate_playable_by_actor(self, actor):
files = (
Upload.objects.playable_by(actor)
.filter(track=models.OuterRef("id"))
......@@ -428,6 +429,15 @@ class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
return self.annotate(is_playable_by_actor=subquery)
def playable_by(self, actor, include=True):
if settings.MUSIC_USE_DENORMALIZATION:
if actor is not None:
query = models.Q(actor=None) | models.Q(actor=actor)
else:
query = models.Q(actor=None, internal=False)
if not include:
query = ~query
return self.filter(pk__in=TrackActor.objects.filter(query).values("track"))
files = Upload.objects.playable_by(actor, include)
matches = self.filter(uploads__in=files).values_list("pk")
if include:
......@@ -1147,11 +1157,134 @@ class LibraryScan(models.Model):
modification_date = models.DateTimeField(null=True, blank=True)
class TrackActor(models.Model):
"""
Denormalization table to store all playable tracks for a given user
Empty user means the track is public or internal (cf internal flag too)
"""
id = models.BigAutoField(primary_key=True)
actor = models.ForeignKey(
"federation.Actor",
on_delete=models.CASCADE,
related_name="track_actor_items",
blank=True,
null=True,
)
track = models.ForeignKey(
Track, on_delete=models.CASCADE, related_name="track_actor_items"
)
upload = models.ForeignKey(
Upload, on_delete=models.CASCADE, related_name="track_actor_items"
)
internal = models.BooleanField(default=False, db_index=True)
class Meta:
unique_together = ("track", "actor", "internal", "upload")
@classmethod
def get_objs(cls, library, actor_ids, upload_and_track_ids):
upload_and_track_ids = upload_and_track_ids or library.uploads.filter(
import_status="finished", track__isnull=False
).values_list("id", "track")
objs = []
if library.privacy_level == "me":
follow_queryset = library.received_follows.filter(approved=True).exclude(
actor__user__isnull=True
)
if actor_ids:
follow_queryset = follow_queryset.filter(actor__pk__in=actor_ids)
final_actor_ids = list(follow_queryset.values_list("actor", flat=True))
owner = library.actor if library.actor.is_local else None
if owner and (not actor_ids or owner in final_actor_ids):
final_actor_ids.append(owner.pk)
for actor_id in final_actor_ids:
for upload_id, track_id in upload_and_track_ids:
objs.append(
cls(actor_id=actor_id, track_id=track_id, upload_id=upload_id)
)
elif library.privacy_level == "instance":
for upload_id, track_id in upload_and_track_ids:
objs.append(
cls(
actor_id=None,
track_id=track_id,
upload_id=upload_id,
internal=True,
)
)
elif library.privacy_level == "everyone":
for upload_id, track_id in upload_and_track_ids:
objs.append(cls(actor_id=None, track_id=track_id, upload_id=upload_id))
return objs
@classmethod
def create_entries(
cls, library, delete_existing=True, actor_ids=None, upload_and_track_ids=None
):
if not settings.MUSIC_USE_DENORMALIZATION:
# skip
return
if delete_existing:
to_delete = cls.objects.filter(upload__library=library)
if actor_ids:
to_delete = to_delete.filter(actor__pk__in=actor_ids)
# we don't use .delete() here because we don't want signals to fire
to_delete._raw_delete(to_delete.db)
objs = cls.get_objs(
library, actor_ids=actor_ids, upload_and_track_ids=upload_and_track_ids
)
return cls.objects.bulk_create(objs, ignore_conflicts=True, batch_size=5000)
@receiver(post_save, sender=ImportJob)
def update_batch_status(sender, instance, **kwargs):
instance.batch.update_status()
@receiver(post_save, sender=Upload)
def update_denormalization_track_actor(sender, instance, created, **kwargs):
if (
created
and settings.MUSIC_USE_DENORMALIZATION
and instance.track_id
and instance.import_status == "finished"
):
TrackActor.create_entries(
instance.library,
delete_existing=False,
upload_and_track_ids=[(instance.pk, instance.track_id)],
)
@receiver(pre_save, sender=Library)
def set_privacy_level_updated(sender, instance, update_fields, **kwargs):
if not instance.pk:
return
if update_fields is not None and "privacy_level" not in update_fields:
return
db_value = instance.__class__.objects.filter(pk=instance.pk).values_list(
"privacy_level", flat=True
)[0]
if db_value != instance.privacy_level:
# Needed to update denormalized permissions
setattr(instance, "_privacy_level_updated", True)
@receiver(post_save, sender=Library)
def update_denormalization_track_user_library_privacy_level(
sender, instance, created, **kwargs
):
if created:
return
updated = getattr(instance, "_privacy_level_updated", False)
if updated:
TrackActor.create_entries(instance)
@receiver(post_save, sender=ImportBatch)
def update_request_status(sender, instance, created, **kwargs):
update_fields = kwargs.get("update_fields", []) or []
......
......@@ -170,7 +170,7 @@ def fail_import(upload, error_code, detail=None, **fields):
),
"upload",
)
def process_upload(upload):
def process_upload(upload, update_denormalization=True):
import_metadata = upload.import_metadata or {}
old_status = upload.import_status
audio_file = upload.get_audio_file()
......@@ -249,6 +249,13 @@ def process_upload(upload):
]
)
if update_denormalization:
models.TrackActor.create_entries(
library=upload.library,
upload_and_track_ids=[(upload.pk, upload.track_id)],
delete_existing=False,
)
# update album cover, if needed
if not track.album.cover:
update_album_cover(
......
......@@ -14,3 +14,4 @@ profiling
asynctest==0.12.2
aioresponses==0.6.0
https://github.com/dmclain/django-debug-toolbar-line-profiler/archive/master.zip
......@@ -26,3 +26,4 @@ env =
FORCE_HTTPS_URLS=False
FUNKWHALE_SPA_HTML_ROOT=http://noop/
PROXY_MEDIA=true
MUSIC_USE_DENORMALIZATION=true
......@@ -2,6 +2,7 @@ import pytest
from django import db
from funkwhale_api.federation import models
from funkwhale_api.music import models as music_models
def test_cannot_duplicate_actor(factories):
......@@ -174,3 +175,83 @@ def test_can_create_fetch_for_object(factories):
assert fetch.status == "pending"
assert fetch.detail == {}
assert fetch.object == track
@pytest.mark.parametrize(
"initial_approved, updated_approved, initial_playable_tracks, updated_playable_tracks",
[
(
True,
False,
{"owner": [0], "follower": [0], "local_actor": [], None: []},
{"owner": [0], "follower": [], "local_actor": [], None: []},
),
(
False,
True,
{"owner": [0], "follower": [], "local_actor": [], None: []},
{"owner": [0], "follower": [0], "local_actor": [], None: []},
),
],
)
def test_update_library_follow_approved_create_entries(
initial_approved,
updated_approved,
initial_playable_tracks,
updated_playable_tracks,
factories,
):
actors = {
"owner": factories["federation.Actor"](local=True),
"follower": factories["federation.Actor"](local=True),
"local_actor": factories["federation.Actor"](local=True),
None: None,
}
library = factories["music.Library"](actor=actors["owner"], privacy_level="me")
tracks = [
factories["music.Upload"](playable=True, library=library).track,
factories["music.Upload"](library=library, import_status="pending").track,
]
follow = factories["federation.LibraryFollow"](
target=library, actor=actors["follower"], approved=initial_approved
)
for actor_name, expected in initial_playable_tracks.items():
actor = actors[actor_name]
expected_tracks = [tracks[i] for i in expected]
assert list(music_models.Track.objects.playable_by(actor)) == expected_tracks
follow.approved = updated_approved
follow.save()
for actor_name, expected in updated_playable_tracks.items():
actor = actors[actor_name]
expected_tracks = [tracks[i] for i in expected]
assert list(music_models.Track.objects.playable_by(actor)) == expected_tracks
def test_update_library_follow_delete_delete_denormalization_entries(factories,):
updated_playable_tracks = {"owner": [0], "follower": []}
actors = {
"owner": factories["federation.Actor"](local=True),
"follower": factories["federation.Actor"](local=True),
}
library = factories["music.Library"](actor=actors["owner"], privacy_level="me")
tracks = [
factories["music.Upload"](playable=True, library=library).track,
factories["music.Upload"](library=library, import_status="pending").track,
]