From aea8e4fc59911abcaa198c0da28318302594b69f Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Wed, 18 Dec 2019 07:52:09 +0100
Subject: [PATCH] Resolve "Tagging artists/albums genres when importing music
 files"

---
 api/config/settings/common.py         | 14 +++++++
 api/funkwhale_api/cli/library.py      | 50 +++++++++++++++++++++++++
 api/funkwhale_api/cli/main.py         |  1 +
 api/funkwhale_api/federation/utils.py |  9 +++++
 api/funkwhale_api/music/tasks.py      | 46 +++++++++++++++++++++++
 api/funkwhale_api/tags/tasks.py       | 54 +++++++++++++++++++++++++++
 api/tests/cli/test_main.py            | 11 ++++++
 api/tests/federation/test_utils.py    | 34 +++++++++++++++++
 api/tests/music/test_tasks.py         | 51 +++++++++++++++++++++++++
 api/tests/tags/test_tasks.py          | 35 +++++++++++++++++
 changes/changelog.d/988.enhancement   |  1 +
 docs/admin/commands.rst               | 27 ++++++++++++++
 12 files changed, 333 insertions(+)
 create mode 100644 api/funkwhale_api/cli/library.py
 create mode 100644 api/funkwhale_api/tags/tasks.py
 create mode 100644 api/tests/tags/test_tasks.py
 create mode 100644 changes/changelog.d/988.enhancement

diff --git a/api/config/settings/common.py b/api/config/settings/common.py
index 2fb7b496c3..d67a15290f 100644
--- a/api/config/settings/common.py
+++ b/api/config/settings/common.py
@@ -599,6 +599,20 @@ CELERY_BEAT_SCHEDULE = {
     },
 }
 
+if env.bool("ADD_ALBUM_TAGS_FROM_TRACKS", default=True):
+    CELERY_BEAT_SCHEDULE["music.albums_set_tags_from_tracks"] = {
+        "task": "music.albums_set_tags_from_tracks",
+        "schedule": crontab(minute="0", hour="4", day_of_week="4"),
+        "options": {"expires": 60 * 60 * 2},
+    }
+
+if env.bool("ADD_ARTIST_TAGS_FROM_TRACKS", default=True):
+    CELERY_BEAT_SCHEDULE["music.artists_set_tags_from_tracks"] = {
+        "task": "music.artists_set_tags_from_tracks",
+        "schedule": crontab(minute="0", hour="4", day_of_week="4"),
+        "options": {"expires": 60 * 60 * 2},
+    }
+
 NODEINFO_REFRESH_DELAY = env.int("NODEINFO_REFRESH_DELAY", default=3600 * 24)
 
 
diff --git a/api/funkwhale_api/cli/library.py b/api/funkwhale_api/cli/library.py
new file mode 100644
index 0000000000..da4dd67ad1
--- /dev/null
+++ b/api/funkwhale_api/cli/library.py
@@ -0,0 +1,50 @@
+import click
+
+from funkwhale_api.music import tasks
+
+from . import base
+
+
+def handler_add_tags_from_tracks(
+    artists=False, albums=False,
+):
+    result = None
+    if artists:
+        result = tasks.artists_set_tags_from_tracks()
+    elif albums:
+        result = tasks.albums_set_tags_from_tracks()
+    else:
+        raise click.BadOptionUsage("You must specify artists or albums")
+
+    if result is None:
+        click.echo("  No relevant tags found")
+    else:
+        click.echo("  Relevant tags added to {} objects".format(len(result)))
+
+
+@base.cli.group()
+def albums():
+    """Manage albums"""
+    pass
+
+
+@base.cli.group()
+def artists():
+    """Manage artists"""
+    pass
+
+
+@albums.command(name="add-tags-from-tracks")
+def albums_add_tags_from_tracks():
+    """
+    Associate tags to album with no genre tags, assuming identical tags are found on the album tracks
+    """
+    handler_add_tags_from_tracks(albums=True)
+
+
+@artists.command(name="add-tags-from-tracks")
+def artists_add_tags_from_tracks():
+    """
+    Associate tags to artists with no genre tags, assuming identical tags are found on the artist tracks
+    """
+    handler_add_tags_from_tracks(artists=True)
diff --git a/api/funkwhale_api/cli/main.py b/api/funkwhale_api/cli/main.py
index a51cca1fd0..9bad5a8eb8 100644
--- a/api/funkwhale_api/cli/main.py
+++ b/api/funkwhale_api/cli/main.py
@@ -2,6 +2,7 @@ import click
 import sys
 
 from . import base
+from . import library  # noqa
 from . import users  # noqa
 
 from rest_framework.exceptions import ValidationError
diff --git a/api/funkwhale_api/federation/utils.py b/api/funkwhale_api/federation/utils.py
index 74e9fef692..59b63e2cee 100644
--- a/api/funkwhale_api/federation/utils.py
+++ b/api/funkwhale_api/federation/utils.py
@@ -118,6 +118,15 @@ def get_domain_query_from_url(domain, url_field="fid"):
     return query
 
 
+def local_qs(queryset, url_field="fid", include=True):
+    query = get_domain_query_from_url(
+        domain=settings.FEDERATION_HOSTNAME, url_field=url_field
+    )
+    if not include:
+        query = ~query
+    return queryset.filter(query)
+
+
 def is_local(url):
     if not url:
         return True
diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py
index 153dbe495e..71f7106868 100644
--- a/api/funkwhale_api/music/tasks.py
+++ b/api/funkwhale_api/music/tasks.py
@@ -14,7 +14,9 @@ from requests.exceptions import RequestException
 from funkwhale_api.common import channels, preferences
 from funkwhale_api.federation import routes
 from funkwhale_api.federation import library as lb
+from funkwhale_api.federation import utils as federation_utils
 from funkwhale_api.tags import models as tags_models
+from funkwhale_api.tags import tasks as tags_tasks
 from funkwhale_api.taskapp import celery
 
 from . import licenses
@@ -668,6 +670,50 @@ def clean_transcoding_cache():
     return candidates.delete()
 
 
+@celery.app.task(name="music.albums_set_tags_from_tracks")
+@transaction.atomic
+def albums_set_tags_from_tracks(ids=None, dry_run=False):
+    qs = models.Album.objects.filter(tagged_items__isnull=True).order_by("id")
+    qs = federation_utils.local_qs(qs)
+    qs = qs.values_list("id", flat=True)
+    if ids is not None:
+        qs = qs.filter(pk__in=ids)
+    data = tags_tasks.get_tags_from_foreign_key(
+        ids=qs, foreign_key_model=models.Track, foreign_key_attr="album",
+    )
+    logger.info("Found automatic tags for %s albums…", len(data))
+    if dry_run:
+        logger.info("Running in dry-run mode, not commiting")
+        return
+
+    tags_tasks.add_tags_batch(
+        data, model=models.Album,
+    )
+    return data
+
+
+@celery.app.task(name="music.artists_set_tags_from_tracks")
+@transaction.atomic
+def artists_set_tags_from_tracks(ids=None, dry_run=False):
+    qs = models.Artist.objects.filter(tagged_items__isnull=True).order_by("id")
+    qs = federation_utils.local_qs(qs)
+    qs = qs.values_list("id", flat=True)
+    if ids is not None:
+        qs = qs.filter(pk__in=ids)
+    data = tags_tasks.get_tags_from_foreign_key(
+        ids=qs, foreign_key_model=models.Track, foreign_key_attr="artist",
+    )
+    logger.info("Found automatic tags for %s artists…", len(data))
+    if dry_run:
+        logger.info("Running in dry-run mode, not commiting")
+        return
+
+    tags_tasks.add_tags_batch(
+        data, model=models.Artist,
+    )
+    return data
+
+
 def get_prunable_tracks(
     exclude_favorites=True, exclude_playlists=True, exclude_listenings=True
 ):
diff --git a/api/funkwhale_api/tags/tasks.py b/api/funkwhale_api/tags/tasks.py
new file mode 100644
index 0000000000..13ced9d2d5
--- /dev/null
+++ b/api/funkwhale_api/tags/tasks.py
@@ -0,0 +1,54 @@
+import collections
+
+from django.contrib.contenttypes.models import ContentType
+
+from . import models
+
+
+def get_tags_from_foreign_key(
+    ids, foreign_key_model, foreign_key_attr, tagged_items_attr="tagged_items"
+):
+    """
+    Cf #988, this is useful to tag an artist with #Rock if all its tracks are tagged with
+    #Rock, for instance.
+    """
+    data = {}
+    objs = foreign_key_model.objects.filter(
+        **{"{}__pk__in".format(foreign_key_attr): ids}
+    ).order_by("-id")
+    objs = objs.only("id", "{}_id".format(foreign_key_attr)).prefetch_related(
+        tagged_items_attr
+    )
+
+    for obj in objs.iterator():
+        # loop on all objects, store the objs tags + counter on the corresponding foreign key
+        row_data = data.setdefault(
+            getattr(obj, "{}_id".format(foreign_key_attr)),
+            {"total_objs": 0, "tags": []},
+        )
+        row_data["total_objs"] += 1
+        for ti in getattr(obj, tagged_items_attr).all():
+            row_data["tags"].append(ti.tag_id)
+
+    # now, keep only tags that are present on all objects, i.e tags where the count
+    # matches total_objs
+    final_data = {}
+    for key, row_data in data.items():
+        counter = collections.Counter(row_data["tags"])
+        tags_to_keep = sorted(
+            [t for t, c in counter.items() if c >= row_data["total_objs"]]
+        )
+        if tags_to_keep:
+            final_data[key] = tags_to_keep
+    return final_data
+
+
+def add_tags_batch(data, model, tagged_items_attr="tagged_items"):
+    model_ct = ContentType.objects.get_for_model(model)
+    tagged_items = [
+        models.TaggedItem(tag_id=tag_id, content_type=model_ct, object_id=obj_id)
+        for obj_id, tag_ids in data.items()
+        for tag_id in tag_ids
+    ]
+
+    return models.TaggedItem.objects.bulk_create(tagged_items, batch_size=2000)
diff --git a/api/tests/cli/test_main.py b/api/tests/cli/test_main.py
index 4cea9e414c..1e1a35e609 100644
--- a/api/tests/cli/test_main.py
+++ b/api/tests/cli/test_main.py
@@ -3,6 +3,7 @@ import pytest
 from click.testing import CliRunner
 
 from funkwhale_api.cli import main
+from funkwhale_api.cli import library
 from funkwhale_api.cli import users
 
 
@@ -102,6 +103,16 @@ from funkwhale_api.cli import users
                 )
             ],
         ),
+        (
+            ("albums", "add-tags-from-tracks"),
+            tuple(),
+            [(library, "handler_add_tags_from_tracks", {"albums": True})],
+        ),
+        (
+            ("artists", "add-tags-from-tracks"),
+            tuple(),
+            [(library, "handler_add_tags_from_tracks", {"artists": True})],
+        ),
     ],
 )
 def test_cli(cmd, args, handlers, mocker):
diff --git a/api/tests/federation/test_utils.py b/api/tests/federation/test_utils.py
index 83c5e4f7e0..9502fb6c36 100644
--- a/api/tests/federation/test_utils.py
+++ b/api/tests/federation/test_utils.py
@@ -138,3 +138,37 @@ def test_retrieve_with_serializer(db, r_mock):
     result = utils.retrieve_ap_object(fid, actor=None, serializer_class=S)
 
     assert result == {"persisted": "object"}
+
+
+@pytest.mark.parametrize(
+    "factory_name, fids, kwargs, expected_indexes",
+    [
+        (
+            "music.Artist",
+            ["https://local.domain/test", "http://local.domain/"],
+            {},
+            [0, 1],
+        ),
+        (
+            "music.Artist",
+            ["https://local.domain/test", "http://notlocal.domain/"],
+            {},
+            [0],
+        ),
+        (
+            "music.Artist",
+            ["https://local.domain/test", "http://notlocal.domain/"],
+            {"include": False},
+            [1],
+        ),
+    ],
+)
+def test_local_qs(factory_name, fids, kwargs, expected_indexes, factories, settings):
+    settings.FEDERATION_HOSTNAME = "local.domain"
+    objs = [factories[factory_name](fid=fid) for fid in fids]
+
+    qs = objs[0].__class__.objects.all().order_by("id")
+    result = utils.local_qs(qs, **kwargs)
+
+    expected_objs = [obj for i, obj in enumerate(objs) if i in expected_indexes]
+    assert list(result) == expected_objs
diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py
index 0fadad4c53..449be0c04b 100644
--- a/api/tests/music/test_tasks.py
+++ b/api/tests/music/test_tasks.py
@@ -9,6 +9,7 @@ from django.utils import timezone
 
 from funkwhale_api.federation import serializers as federation_serializers
 from funkwhale_api.federation import jsonld
+from funkwhale_api.federation import utils as federation_utils
 from funkwhale_api.music import licenses, metadata, models, signals, tasks
 
 DATA_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -1049,3 +1050,53 @@ def test_process_upload_skips_import_metadata_if_invalid(factories, mocker):
     get_track_from_import_metadata.assert_called_once_with(
         expected_final_metadata, attributed_to=upload.library.actor
     )
+
+
+def test_tag_albums_from_tracks(queryset_equal_queries, factories, mocker):
+    get_tags_from_foreign_key = mocker.patch(
+        "funkwhale_api.tags.tasks.get_tags_from_foreign_key"
+    )
+    add_tags_batch = mocker.patch("funkwhale_api.tags.tasks.add_tags_batch")
+
+    expected_queryset = (
+        federation_utils.local_qs(
+            models.Album.objects.filter(tagged_items__isnull=True)
+        )
+        .values_list("id", flat=True)
+        .order_by("id")
+    )
+    tasks.albums_set_tags_from_tracks(ids=[1, 2])
+    get_tags_from_foreign_key.assert_called_once_with(
+        ids=expected_queryset.filter(pk__in=[1, 2]),
+        foreign_key_model=models.Track,
+        foreign_key_attr="album",
+    )
+
+    add_tags_batch.assert_called_once_with(
+        get_tags_from_foreign_key.return_value, model=models.Album,
+    )
+
+
+def test_tag_artists_from_tracks(queryset_equal_queries, factories, mocker):
+    get_tags_from_foreign_key = mocker.patch(
+        "funkwhale_api.tags.tasks.get_tags_from_foreign_key"
+    )
+    add_tags_batch = mocker.patch("funkwhale_api.tags.tasks.add_tags_batch")
+
+    expected_queryset = (
+        federation_utils.local_qs(
+            models.Artist.objects.filter(tagged_items__isnull=True)
+        )
+        .values_list("id", flat=True)
+        .order_by("id")
+    )
+    tasks.artists_set_tags_from_tracks(ids=[1, 2])
+    get_tags_from_foreign_key.assert_called_once_with(
+        ids=expected_queryset.filter(pk__in=[1, 2]),
+        foreign_key_model=models.Track,
+        foreign_key_attr="artist",
+    )
+
+    add_tags_batch.assert_called_once_with(
+        get_tags_from_foreign_key.return_value, model=models.Artist,
+    )
diff --git a/api/tests/tags/test_tasks.py b/api/tests/tags/test_tasks.py
new file mode 100644
index 0000000000..77f2724b41
--- /dev/null
+++ b/api/tests/tags/test_tasks.py
@@ -0,0 +1,35 @@
+from funkwhale_api.music import models as music_models
+from funkwhale_api.tags import tasks
+
+
+def test_get_tags_from_foreign_key(factories):
+    rock_tag = factories["tags.Tag"](name="Rock")
+    rap_tag = factories["tags.Tag"](name="Rap")
+    artist = factories["music.Artist"]()
+    factories["music.Track"].create_batch(3, artist=artist, set_tags=["rock", "rap"])
+    factories["music.Track"].create_batch(
+        3, artist=artist, set_tags=["rock", "rap", "techno"]
+    )
+
+    result = tasks.get_tags_from_foreign_key(
+        ids=[artist.pk],
+        foreign_key_model=music_models.Track,
+        foreign_key_attr="artist",
+    )
+
+    assert result == {artist.pk: [rock_tag.pk, rap_tag.pk]}
+
+
+def test_add_tags_batch(factories):
+    rock_tag = factories["tags.Tag"](name="Rock")
+    rap_tag = factories["tags.Tag"](name="Rap")
+    factories["tags.Tag"]()
+    artist = factories["music.Artist"]()
+
+    data = {artist.pk: [rock_tag.pk, rap_tag.pk]}
+
+    tasks.add_tags_batch(
+        data, model=artist.__class__,
+    )
+
+    assert artist.get_tags() == ["Rap", "Rock"]
diff --git a/changes/changelog.d/988.enhancement b/changes/changelog.d/988.enhancement
new file mode 100644
index 0000000000..087fc66d35
--- /dev/null
+++ b/changes/changelog.d/988.enhancement
@@ -0,0 +1 @@
+Added periodic background task and CLI command to associate genre tags to artists and albums based on identical tags found on corresponding tracks (#988)
diff --git a/docs/admin/commands.rst b/docs/admin/commands.rst
index b771e4fe9e..f562521fe8 100644
--- a/docs/admin/commands.rst
+++ b/docs/admin/commands.rst
@@ -168,3 +168,30 @@ database objects.
 
     Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup,
     there will be no way to retrieve the deleted data.
+
+Adding tags from tracks
+-----------------------
+
+By default, genre tags found imported files are associated with the corresponding track.
+
+While you can always associate genre information with an artist or album through the web UI,
+it may be tedious to do so by hand for a large number of objects.
+
+We offer a command you can run after an import to do this for you. It will:
+
+1. Find all local artists or albums with no tags
+2. Get all the tags associated with the corresponding tracks
+3. Associate tags that are found on all tracks to the corresponding artist or album
+
+..note::
+
+    A periodic task also runs in the background every few days to perform the same process.
+
+Usage:
+
+.. code-block:: sh
+
+    # For albums
+    python manage.py fw albums add-tags-from-tracks --help
+    # For artists
+    python manage.py fw artists add-tags-from-tracks --help
-- 
GitLab