From aea8e4fc59911abcaa198c0da28318302594b69f Mon Sep 17 00:00:00 2001 From: Eliot Berriot <contact@eliotberriot.com> Date: Wed, 18 Dec 2019 07:52:09 +0100 Subject: [PATCH] Resolve "Tagging artists/albums genres when importing music files" --- api/config/settings/common.py | 14 +++++++ api/funkwhale_api/cli/library.py | 50 +++++++++++++++++++++++++ api/funkwhale_api/cli/main.py | 1 + api/funkwhale_api/federation/utils.py | 9 +++++ api/funkwhale_api/music/tasks.py | 46 +++++++++++++++++++++++ api/funkwhale_api/tags/tasks.py | 54 +++++++++++++++++++++++++++ api/tests/cli/test_main.py | 11 ++++++ api/tests/federation/test_utils.py | 34 +++++++++++++++++ api/tests/music/test_tasks.py | 51 +++++++++++++++++++++++++ api/tests/tags/test_tasks.py | 35 +++++++++++++++++ changes/changelog.d/988.enhancement | 1 + docs/admin/commands.rst | 27 ++++++++++++++ 12 files changed, 333 insertions(+) create mode 100644 api/funkwhale_api/cli/library.py create mode 100644 api/funkwhale_api/tags/tasks.py create mode 100644 api/tests/tags/test_tasks.py create mode 100644 changes/changelog.d/988.enhancement diff --git a/api/config/settings/common.py b/api/config/settings/common.py index 2fb7b496c3..d67a15290f 100644 --- a/api/config/settings/common.py +++ b/api/config/settings/common.py @@ -599,6 +599,20 @@ CELERY_BEAT_SCHEDULE = { }, } +if env.bool("ADD_ALBUM_TAGS_FROM_TRACKS", default=True): + CELERY_BEAT_SCHEDULE["music.albums_set_tags_from_tracks"] = { + "task": "music.albums_set_tags_from_tracks", + "schedule": crontab(minute="0", hour="4", day_of_week="4"), + "options": {"expires": 60 * 60 * 2}, + } + +if env.bool("ADD_ARTIST_TAGS_FROM_TRACKS", default=True): + CELERY_BEAT_SCHEDULE["music.artists_set_tags_from_tracks"] = { + "task": "music.artists_set_tags_from_tracks", + "schedule": crontab(minute="0", hour="4", day_of_week="4"), + "options": {"expires": 60 * 60 * 2}, + } + NODEINFO_REFRESH_DELAY = env.int("NODEINFO_REFRESH_DELAY", default=3600 * 24) diff --git a/api/funkwhale_api/cli/library.py b/api/funkwhale_api/cli/library.py new file mode 100644 index 0000000000..da4dd67ad1 --- /dev/null +++ b/api/funkwhale_api/cli/library.py @@ -0,0 +1,50 @@ +import click + +from funkwhale_api.music import tasks + +from . import base + + +def handler_add_tags_from_tracks( + artists=False, albums=False, +): + result = None + if artists: + result = tasks.artists_set_tags_from_tracks() + elif albums: + result = tasks.albums_set_tags_from_tracks() + else: + raise click.BadOptionUsage("You must specify artists or albums") + + if result is None: + click.echo(" No relevant tags found") + else: + click.echo(" Relevant tags added to {} objects".format(len(result))) + + +@base.cli.group() +def albums(): + """Manage albums""" + pass + + +@base.cli.group() +def artists(): + """Manage artists""" + pass + + +@albums.command(name="add-tags-from-tracks") +def albums_add_tags_from_tracks(): + """ + Associate tags to album with no genre tags, assuming identical tags are found on the album tracks + """ + handler_add_tags_from_tracks(albums=True) + + +@artists.command(name="add-tags-from-tracks") +def artists_add_tags_from_tracks(): + """ + Associate tags to artists with no genre tags, assuming identical tags are found on the artist tracks + """ + handler_add_tags_from_tracks(artists=True) diff --git a/api/funkwhale_api/cli/main.py b/api/funkwhale_api/cli/main.py index a51cca1fd0..9bad5a8eb8 100644 --- a/api/funkwhale_api/cli/main.py +++ b/api/funkwhale_api/cli/main.py @@ -2,6 +2,7 @@ import click import sys from . import base +from . import library # noqa from . import users # noqa from rest_framework.exceptions import ValidationError diff --git a/api/funkwhale_api/federation/utils.py b/api/funkwhale_api/federation/utils.py index 74e9fef692..59b63e2cee 100644 --- a/api/funkwhale_api/federation/utils.py +++ b/api/funkwhale_api/federation/utils.py @@ -118,6 +118,15 @@ def get_domain_query_from_url(domain, url_field="fid"): return query +def local_qs(queryset, url_field="fid", include=True): + query = get_domain_query_from_url( + domain=settings.FEDERATION_HOSTNAME, url_field=url_field + ) + if not include: + query = ~query + return queryset.filter(query) + + def is_local(url): if not url: return True diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index 153dbe495e..71f7106868 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -14,7 +14,9 @@ from requests.exceptions import RequestException from funkwhale_api.common import channels, preferences from funkwhale_api.federation import routes from funkwhale_api.federation import library as lb +from funkwhale_api.federation import utils as federation_utils from funkwhale_api.tags import models as tags_models +from funkwhale_api.tags import tasks as tags_tasks from funkwhale_api.taskapp import celery from . import licenses @@ -668,6 +670,50 @@ def clean_transcoding_cache(): return candidates.delete() +@celery.app.task(name="music.albums_set_tags_from_tracks") +@transaction.atomic +def albums_set_tags_from_tracks(ids=None, dry_run=False): + qs = models.Album.objects.filter(tagged_items__isnull=True).order_by("id") + qs = federation_utils.local_qs(qs) + qs = qs.values_list("id", flat=True) + if ids is not None: + qs = qs.filter(pk__in=ids) + data = tags_tasks.get_tags_from_foreign_key( + ids=qs, foreign_key_model=models.Track, foreign_key_attr="album", + ) + logger.info("Found automatic tags for %s albums…", len(data)) + if dry_run: + logger.info("Running in dry-run mode, not commiting") + return + + tags_tasks.add_tags_batch( + data, model=models.Album, + ) + return data + + +@celery.app.task(name="music.artists_set_tags_from_tracks") +@transaction.atomic +def artists_set_tags_from_tracks(ids=None, dry_run=False): + qs = models.Artist.objects.filter(tagged_items__isnull=True).order_by("id") + qs = federation_utils.local_qs(qs) + qs = qs.values_list("id", flat=True) + if ids is not None: + qs = qs.filter(pk__in=ids) + data = tags_tasks.get_tags_from_foreign_key( + ids=qs, foreign_key_model=models.Track, foreign_key_attr="artist", + ) + logger.info("Found automatic tags for %s artists…", len(data)) + if dry_run: + logger.info("Running in dry-run mode, not commiting") + return + + tags_tasks.add_tags_batch( + data, model=models.Artist, + ) + return data + + def get_prunable_tracks( exclude_favorites=True, exclude_playlists=True, exclude_listenings=True ): diff --git a/api/funkwhale_api/tags/tasks.py b/api/funkwhale_api/tags/tasks.py new file mode 100644 index 0000000000..13ced9d2d5 --- /dev/null +++ b/api/funkwhale_api/tags/tasks.py @@ -0,0 +1,54 @@ +import collections + +from django.contrib.contenttypes.models import ContentType + +from . import models + + +def get_tags_from_foreign_key( + ids, foreign_key_model, foreign_key_attr, tagged_items_attr="tagged_items" +): + """ + Cf #988, this is useful to tag an artist with #Rock if all its tracks are tagged with + #Rock, for instance. + """ + data = {} + objs = foreign_key_model.objects.filter( + **{"{}__pk__in".format(foreign_key_attr): ids} + ).order_by("-id") + objs = objs.only("id", "{}_id".format(foreign_key_attr)).prefetch_related( + tagged_items_attr + ) + + for obj in objs.iterator(): + # loop on all objects, store the objs tags + counter on the corresponding foreign key + row_data = data.setdefault( + getattr(obj, "{}_id".format(foreign_key_attr)), + {"total_objs": 0, "tags": []}, + ) + row_data["total_objs"] += 1 + for ti in getattr(obj, tagged_items_attr).all(): + row_data["tags"].append(ti.tag_id) + + # now, keep only tags that are present on all objects, i.e tags where the count + # matches total_objs + final_data = {} + for key, row_data in data.items(): + counter = collections.Counter(row_data["tags"]) + tags_to_keep = sorted( + [t for t, c in counter.items() if c >= row_data["total_objs"]] + ) + if tags_to_keep: + final_data[key] = tags_to_keep + return final_data + + +def add_tags_batch(data, model, tagged_items_attr="tagged_items"): + model_ct = ContentType.objects.get_for_model(model) + tagged_items = [ + models.TaggedItem(tag_id=tag_id, content_type=model_ct, object_id=obj_id) + for obj_id, tag_ids in data.items() + for tag_id in tag_ids + ] + + return models.TaggedItem.objects.bulk_create(tagged_items, batch_size=2000) diff --git a/api/tests/cli/test_main.py b/api/tests/cli/test_main.py index 4cea9e414c..1e1a35e609 100644 --- a/api/tests/cli/test_main.py +++ b/api/tests/cli/test_main.py @@ -3,6 +3,7 @@ import pytest from click.testing import CliRunner from funkwhale_api.cli import main +from funkwhale_api.cli import library from funkwhale_api.cli import users @@ -102,6 +103,16 @@ from funkwhale_api.cli import users ) ], ), + ( + ("albums", "add-tags-from-tracks"), + tuple(), + [(library, "handler_add_tags_from_tracks", {"albums": True})], + ), + ( + ("artists", "add-tags-from-tracks"), + tuple(), + [(library, "handler_add_tags_from_tracks", {"artists": True})], + ), ], ) def test_cli(cmd, args, handlers, mocker): diff --git a/api/tests/federation/test_utils.py b/api/tests/federation/test_utils.py index 83c5e4f7e0..9502fb6c36 100644 --- a/api/tests/federation/test_utils.py +++ b/api/tests/federation/test_utils.py @@ -138,3 +138,37 @@ def test_retrieve_with_serializer(db, r_mock): result = utils.retrieve_ap_object(fid, actor=None, serializer_class=S) assert result == {"persisted": "object"} + + +@pytest.mark.parametrize( + "factory_name, fids, kwargs, expected_indexes", + [ + ( + "music.Artist", + ["https://local.domain/test", "http://local.domain/"], + {}, + [0, 1], + ), + ( + "music.Artist", + ["https://local.domain/test", "http://notlocal.domain/"], + {}, + [0], + ), + ( + "music.Artist", + ["https://local.domain/test", "http://notlocal.domain/"], + {"include": False}, + [1], + ), + ], +) +def test_local_qs(factory_name, fids, kwargs, expected_indexes, factories, settings): + settings.FEDERATION_HOSTNAME = "local.domain" + objs = [factories[factory_name](fid=fid) for fid in fids] + + qs = objs[0].__class__.objects.all().order_by("id") + result = utils.local_qs(qs, **kwargs) + + expected_objs = [obj for i, obj in enumerate(objs) if i in expected_indexes] + assert list(result) == expected_objs diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py index 0fadad4c53..449be0c04b 100644 --- a/api/tests/music/test_tasks.py +++ b/api/tests/music/test_tasks.py @@ -9,6 +9,7 @@ from django.utils import timezone from funkwhale_api.federation import serializers as federation_serializers from funkwhale_api.federation import jsonld +from funkwhale_api.federation import utils as federation_utils from funkwhale_api.music import licenses, metadata, models, signals, tasks DATA_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -1049,3 +1050,53 @@ def test_process_upload_skips_import_metadata_if_invalid(factories, mocker): get_track_from_import_metadata.assert_called_once_with( expected_final_metadata, attributed_to=upload.library.actor ) + + +def test_tag_albums_from_tracks(queryset_equal_queries, factories, mocker): + get_tags_from_foreign_key = mocker.patch( + "funkwhale_api.tags.tasks.get_tags_from_foreign_key" + ) + add_tags_batch = mocker.patch("funkwhale_api.tags.tasks.add_tags_batch") + + expected_queryset = ( + federation_utils.local_qs( + models.Album.objects.filter(tagged_items__isnull=True) + ) + .values_list("id", flat=True) + .order_by("id") + ) + tasks.albums_set_tags_from_tracks(ids=[1, 2]) + get_tags_from_foreign_key.assert_called_once_with( + ids=expected_queryset.filter(pk__in=[1, 2]), + foreign_key_model=models.Track, + foreign_key_attr="album", + ) + + add_tags_batch.assert_called_once_with( + get_tags_from_foreign_key.return_value, model=models.Album, + ) + + +def test_tag_artists_from_tracks(queryset_equal_queries, factories, mocker): + get_tags_from_foreign_key = mocker.patch( + "funkwhale_api.tags.tasks.get_tags_from_foreign_key" + ) + add_tags_batch = mocker.patch("funkwhale_api.tags.tasks.add_tags_batch") + + expected_queryset = ( + federation_utils.local_qs( + models.Artist.objects.filter(tagged_items__isnull=True) + ) + .values_list("id", flat=True) + .order_by("id") + ) + tasks.artists_set_tags_from_tracks(ids=[1, 2]) + get_tags_from_foreign_key.assert_called_once_with( + ids=expected_queryset.filter(pk__in=[1, 2]), + foreign_key_model=models.Track, + foreign_key_attr="artist", + ) + + add_tags_batch.assert_called_once_with( + get_tags_from_foreign_key.return_value, model=models.Artist, + ) diff --git a/api/tests/tags/test_tasks.py b/api/tests/tags/test_tasks.py new file mode 100644 index 0000000000..77f2724b41 --- /dev/null +++ b/api/tests/tags/test_tasks.py @@ -0,0 +1,35 @@ +from funkwhale_api.music import models as music_models +from funkwhale_api.tags import tasks + + +def test_get_tags_from_foreign_key(factories): + rock_tag = factories["tags.Tag"](name="Rock") + rap_tag = factories["tags.Tag"](name="Rap") + artist = factories["music.Artist"]() + factories["music.Track"].create_batch(3, artist=artist, set_tags=["rock", "rap"]) + factories["music.Track"].create_batch( + 3, artist=artist, set_tags=["rock", "rap", "techno"] + ) + + result = tasks.get_tags_from_foreign_key( + ids=[artist.pk], + foreign_key_model=music_models.Track, + foreign_key_attr="artist", + ) + + assert result == {artist.pk: [rock_tag.pk, rap_tag.pk]} + + +def test_add_tags_batch(factories): + rock_tag = factories["tags.Tag"](name="Rock") + rap_tag = factories["tags.Tag"](name="Rap") + factories["tags.Tag"]() + artist = factories["music.Artist"]() + + data = {artist.pk: [rock_tag.pk, rap_tag.pk]} + + tasks.add_tags_batch( + data, model=artist.__class__, + ) + + assert artist.get_tags() == ["Rap", "Rock"] diff --git a/changes/changelog.d/988.enhancement b/changes/changelog.d/988.enhancement new file mode 100644 index 0000000000..087fc66d35 --- /dev/null +++ b/changes/changelog.d/988.enhancement @@ -0,0 +1 @@ +Added periodic background task and CLI command to associate genre tags to artists and albums based on identical tags found on corresponding tracks (#988) diff --git a/docs/admin/commands.rst b/docs/admin/commands.rst index b771e4fe9e..f562521fe8 100644 --- a/docs/admin/commands.rst +++ b/docs/admin/commands.rst @@ -168,3 +168,30 @@ database objects. Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup, there will be no way to retrieve the deleted data. + +Adding tags from tracks +----------------------- + +By default, genre tags found imported files are associated with the corresponding track. + +While you can always associate genre information with an artist or album through the web UI, +it may be tedious to do so by hand for a large number of objects. + +We offer a command you can run after an import to do this for you. It will: + +1. Find all local artists or albums with no tags +2. Get all the tags associated with the corresponding tracks +3. Associate tags that are found on all tracks to the corresponding artist or album + +..note:: + + A periodic task also runs in the background every few days to perform the same process. + +Usage: + +.. code-block:: sh + + # For albums + python manage.py fw albums add-tags-from-tracks --help + # For artists + python manage.py fw artists add-tags-from-tracks --help -- GitLab