Skip to content
Snippets Groups Projects
Verified Commit 5916a1ba authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Fix #777: Added a prune_library management command to remove obsolete metadata

parent 96010917
No related branches found
No related tags found
No related merge requests found
from argparse import RawTextHelpFormatter
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from django.db import transaction
from funkwhale_api.music import models, tasks
class Command(BaseCommand):
help = """
Remove tracks, albums and artists that are not associated with any file from the instance library:
- Tracks without uploads are deleted, if the --tracks flag is passed
- Albums without tracks are deleted, if the --albums flag is passed
- Artists without albums are deleted, if the --artists flag is passed
Tracks with associated favorites, playlists or listening won't be deleted
by default, unless you pass the corresponding --ignore-* flags.
"""
def create_parser(self, *args, **kwargs):
parser = super().create_parser(*args, **kwargs)
parser.formatter_class = RawTextHelpFormatter
return parser
def add_arguments(self, parser):
parser.add_argument(
"--no-dry-run",
action="store_false",
dest="dry_run",
default=True,
help="Disable dry run mode and apply pruning for real on the database",
)
parser.add_argument(
"--artists",
action="store_true",
dest="prune_artists",
default=False,
help="Prune artists without albums/tracks",
)
parser.add_argument(
"--albums",
action="store_true",
dest="prune_albums",
default=False,
help="Prune albums without tracks",
)
parser.add_argument(
"--tracks",
action="store_true",
dest="prune_tracks",
default=False,
help="Prune tracks without uploads",
)
parser.add_argument(
"--ignore-favorites",
action="store_false",
dest="exclude_favorites",
default=True,
help="Allow favorited tracks to be pruned",
)
parser.add_argument(
"--ignore-playlists",
action="store_false",
dest="exclude_playlists",
default=True,
help="Allow tracks included in playlists to be pruned",
)
parser.add_argument(
"--ignore-listenings",
action="store_false",
dest="exclude_listenings",
default=True,
help="Allow tracks with listening history to be pruned",
)
@transaction.atomic
def handle(self, *args, **options):
if not any(
[options["prune_albums"], options["prune_tracks"], options["prune_artists"]]
):
raise CommandError(
"You need to provide at least one of the --tracks, --albums or --artists flags"
)
if options["dry_run"]:
self.stdout.write("Dry-run on, will not commit anything")
else:
self.stdout.write("Dry-run off, *pruning for real*")
self.stdout.write("")
if options["prune_tracks"]:
prunable = tasks.get_prunable_tracks(
exclude_favorites=options["exclude_favorites"],
exclude_playlists=options["exclude_playlists"],
exclude_listenings=options["exclude_listenings"],
)
pruned_total = prunable.count()
total = models.Track.objects.count()
if options["dry_run"]:
self.stdout.write(
"Would prune {}/{} tracks".format(pruned_total, total)
)
else:
self.stdout.write("Deleting {}/{} tracks…".format(pruned_total, total))
prunable.delete()
if options["prune_albums"]:
prunable = tasks.get_prunable_albums()
pruned_total = prunable.count()
total = models.Album.objects.count()
if options["dry_run"]:
self.stdout.write(
"Would prune {}/{} albums".format(pruned_total, total)
)
else:
self.stdout.write("Deleting {}/{} albums…".format(pruned_total, total))
prunable.delete()
if options["prune_artists"]:
prunable = tasks.get_prunable_artists()
pruned_total = prunable.count()
total = models.Artist.objects.count()
if options["dry_run"]:
self.stdout.write(
"Would prune {}/{} artists".format(pruned_total, total)
)
else:
self.stdout.write("Deleting {}/{} artists…".format(pruned_total, total))
prunable.delete()
self.stdout.write("")
if options["dry_run"]:
self.stdout.write(
"Nothing was pruned, rerun this command with --no-dry-run to apply the changes"
)
else:
self.stdout.write("Pruning completed!")
self.stdout.write("")
......@@ -568,3 +568,31 @@ def clean_transcoding_cache():
.order_by("id")
)
return candidates.delete()
def get_prunable_tracks(
exclude_favorites=True, exclude_playlists=True, exclude_listenings=True
):
"""
Returns a list of tracks with no associated uploads,
excluding the one that were listened/favorited/included in playlists.
"""
queryset = models.Track.objects.all()
queryset = queryset.filter(uploads__isnull=True)
if exclude_favorites:
queryset = queryset.filter(track_favorites__isnull=True)
if exclude_playlists:
queryset = queryset.filter(playlist_tracks__isnull=True)
if exclude_listenings:
queryset = queryset.filter(listenings__isnull=True)
return queryset
def get_prunable_albums():
return models.Album.objects.filter(tracks__isnull=True)
def get_prunable_artists():
return models.Artist.objects.filter(tracks__isnull=True, albums__isnull=True)
import os
import pytest
from funkwhale_api.music.management.commands import fix_uploads
from funkwhale_api.music.management.commands import prune_library
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
......@@ -73,3 +75,78 @@ def test_fix_uploads_mimetype(factories, mocker):
assert upload1.mimetype == "audio/mpeg"
assert upload2.mimetype == "audio/something"
def test_prune_library_dry_run(factories):
prunable = factories["music.Track"]()
not_prunable = factories["music.Track"]()
c = prune_library.Command()
options = {
"prune_artists": True,
"prune_albums": True,
"prune_tracks": True,
"exclude_favorites": False,
"exclude_listenings": False,
"exclude_playlists": False,
"dry_run": True,
}
c.handle(**options)
for t in [prunable, not_prunable]:
# nothing pruned, because dry run
t.refresh_from_db()
def test_prune_library(factories, mocker):
prunable_track = factories["music.Track"]()
not_prunable_track = factories["music.Track"]()
prunable_tracks = prunable_track.__class__.objects.filter(pk=prunable_track.pk)
get_prunable_tracks = mocker.patch(
"funkwhale_api.music.tasks.get_prunable_tracks", return_value=prunable_tracks
)
prunable_album = factories["music.Album"]()
not_prunable_album = factories["music.Album"]()
prunable_albums = prunable_album.__class__.objects.filter(pk=prunable_album.pk)
get_prunable_albums = mocker.patch(
"funkwhale_api.music.tasks.get_prunable_albums", return_value=prunable_albums
)
prunable_artist = factories["music.Artist"]()
not_prunable_artist = factories["music.Artist"]()
prunable_artists = prunable_artist.__class__.objects.filter(pk=prunable_artist.pk)
get_prunable_artists = mocker.patch(
"funkwhale_api.music.tasks.get_prunable_artists", return_value=prunable_artists
)
c = prune_library.Command()
options = {
"exclude_favorites": mocker.Mock(),
"exclude_listenings": mocker.Mock(),
"exclude_playlists": mocker.Mock(),
"prune_artists": True,
"prune_albums": True,
"prune_tracks": True,
"dry_run": False,
}
c.handle(**options)
get_prunable_tracks.assert_called_once_with(
exclude_favorites=options["exclude_favorites"],
exclude_listenings=options["exclude_listenings"],
exclude_playlists=options["exclude_playlists"],
)
get_prunable_albums.assert_called_once()
get_prunable_artists.assert_called_once()
with pytest.raises(prunable_track.DoesNotExist):
prunable_track.refresh_from_db()
with pytest.raises(prunable_album.DoesNotExist):
prunable_album.refresh_from_db()
with pytest.raises(prunable_artist.DoesNotExist):
prunable_artist.refresh_from_db()
for o in [not_prunable_track, not_prunable_album, not_prunable_artist]:
o.refresh_from_db()
......@@ -637,3 +637,72 @@ def test_clean_transcoding_cache(preferences, now, factories):
with pytest.raises(u1.__class__.DoesNotExist):
u1.refresh_from_db()
def test_get_prunable_tracks(factories):
prunable_track = factories["music.Track"]()
# non prunable tracks
factories["music.Upload"]()
factories["favorites.TrackFavorite"]()
factories["history.Listening"]()
factories["playlists.PlaylistTrack"]()
assert list(tasks.get_prunable_tracks()) == [prunable_track]
def test_get_prunable_tracks_include_favorites(factories):
prunable_track = factories["music.Track"]()
favorited = factories["favorites.TrackFavorite"]().track
# non prunable tracks
factories["favorites.TrackFavorite"](track__playable=True)
factories["music.Upload"]()
factories["history.Listening"]()
factories["playlists.PlaylistTrack"]()
qs = tasks.get_prunable_tracks(exclude_favorites=False).order_by("id")
assert list(qs) == [prunable_track, favorited]
def test_get_prunable_tracks_include_playlists(factories):
prunable_track = factories["music.Track"]()
in_playlist = factories["playlists.PlaylistTrack"]().track
# non prunable tracks
factories["favorites.TrackFavorite"]()
factories["music.Upload"]()
factories["history.Listening"]()
factories["playlists.PlaylistTrack"](track__playable=True)
qs = tasks.get_prunable_tracks(exclude_playlists=False).order_by("id")
assert list(qs) == [prunable_track, in_playlist]
def test_get_prunable_tracks_include_listenings(factories):
prunable_track = factories["music.Track"]()
listened = factories["history.Listening"]().track
# non prunable tracks
factories["favorites.TrackFavorite"]()
factories["music.Upload"]()
factories["history.Listening"](track__playable=True)
factories["playlists.PlaylistTrack"]()
qs = tasks.get_prunable_tracks(exclude_listenings=False).order_by("id")
assert list(qs) == [prunable_track, listened]
def test_get_prunable_albums(factories):
prunable_album = factories["music.Album"]()
# non prunable album
factories["music.Track"]().album
assert list(tasks.get_prunable_albums()) == [prunable_album]
def test_get_prunable_artists(factories):
prunable_artist = factories["music.Artist"]()
# non prunable artist
non_prunable_artist = factories["music.Artist"]()
non_prunable_album_artist = factories["music.Artist"]()
factories["music.Track"](artist=non_prunable_artist)
factories["music.Track"](album__artist=non_prunable_album_artist)
assert list(tasks.get_prunable_artists()) == [prunable_artist]
Added a prune_library management command to remove obsolete metadata from the database (#777)
......@@ -34,3 +34,13 @@ enabled in a future release).
If you want to start building an app on top of Funkwhale's API, please check-out
`https://docs.funkwhale.audio/api.html`_ and `https://docs.funkwhale.audio/developers/authentication.html`_.
Prune library command
^^^^^^^^^^^^^^^^^^^^^
Users are often surprised by Funkwhale's tendency to keep track, album and artist
metadata even if no associated files exist.
To help with that, we now offer a ``prune_library`` management command you can run
to purge your database from obsolete entry. `Please refer to our documentation
for usage instructions <https://docs.funkwhale.audio/admin/commands.html#pruning-library>`_.
Management commands
===================
Pruning library
---------------
Because Funkwhale is a multi-user and federated audio server, we don't delete any artist, album
and track objects in the database when you delete the corresponding files.
This is on purpose, because those objects may be referenced in user playlists, favorites,
listening history or on other instances, or other users could have upload files matching
linked to those entities in their own private libraries.
Therefore, Funkwhale has a really conservative approach and doesn't delete metadata when
audio files are deleted.
This behaviour can be problematic in some situations though, e.g. if you imported
a lot of wrongly tagged files, then deleted the files to reimport them later.
To help with that, we provide a management you can run on the server and that will effectively
prune you library from track, album and artist metadata that is not tied to any file:
.. code-block:: sh
# print help
python manage.py prune_library --help
# prune tracks with no uploads
python manage.py prune_library --tracks
# prune albums with no tracks
python manage.py prune_library --albums
# prune artists with no tracks/albums
python manage.py prune_library --artists
# prune everything (tracks, albums and artists)
python manage.py prune_library --tracks --albums --artists
The ``prune_library`` command will not delete anything by default, and only gives
you an estimate of how many database objects would be affected by the pruning.
Once you have reviewed the output and are comfortable with the changes, you should rerun
the command with the ``--no-dry-run`` flag to disable dry run mode and actually apply
the changes on the database.
.. warning::
Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup,
there will be no way to retrieve the deleted data.
.. note::
The command will exclude tracks that are favorited, included in playlists or listening
history by default. If you want to include those in the pruning process as well,
add the corresponding ``--ignore-favorites``, ``--ignore-playlists`` and ``--ignore-listenings``
flags.
......@@ -22,6 +22,7 @@ Administration
:maxdepth: 2
django
commands
url
upgrading
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment