Skip to content
Snippets Groups Projects
Commit c2cb510e authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Merge branch '988-artist-album-tags' into 'develop'

Resolve "Tagging artists/albums genres when importing music files"

Closes #988

See merge request funkwhale/funkwhale!973
parents 7b0db234 aea8e4fc
Branches
No related tags found
No related merge requests found
...@@ -599,6 +599,20 @@ CELERY_BEAT_SCHEDULE = { ...@@ -599,6 +599,20 @@ CELERY_BEAT_SCHEDULE = {
}, },
} }
if env.bool("ADD_ALBUM_TAGS_FROM_TRACKS", default=True):
CELERY_BEAT_SCHEDULE["music.albums_set_tags_from_tracks"] = {
"task": "music.albums_set_tags_from_tracks",
"schedule": crontab(minute="0", hour="4", day_of_week="4"),
"options": {"expires": 60 * 60 * 2},
}
if env.bool("ADD_ARTIST_TAGS_FROM_TRACKS", default=True):
CELERY_BEAT_SCHEDULE["music.artists_set_tags_from_tracks"] = {
"task": "music.artists_set_tags_from_tracks",
"schedule": crontab(minute="0", hour="4", day_of_week="4"),
"options": {"expires": 60 * 60 * 2},
}
NODEINFO_REFRESH_DELAY = env.int("NODEINFO_REFRESH_DELAY", default=3600 * 24) NODEINFO_REFRESH_DELAY = env.int("NODEINFO_REFRESH_DELAY", default=3600 * 24)
......
import click
from funkwhale_api.music import tasks
from . import base
def handler_add_tags_from_tracks(
artists=False, albums=False,
):
result = None
if artists:
result = tasks.artists_set_tags_from_tracks()
elif albums:
result = tasks.albums_set_tags_from_tracks()
else:
raise click.BadOptionUsage("You must specify artists or albums")
if result is None:
click.echo(" No relevant tags found")
else:
click.echo(" Relevant tags added to {} objects".format(len(result)))
@base.cli.group()
def albums():
"""Manage albums"""
pass
@base.cli.group()
def artists():
"""Manage artists"""
pass
@albums.command(name="add-tags-from-tracks")
def albums_add_tags_from_tracks():
"""
Associate tags to album with no genre tags, assuming identical tags are found on the album tracks
"""
handler_add_tags_from_tracks(albums=True)
@artists.command(name="add-tags-from-tracks")
def artists_add_tags_from_tracks():
"""
Associate tags to artists with no genre tags, assuming identical tags are found on the artist tracks
"""
handler_add_tags_from_tracks(artists=True)
...@@ -2,6 +2,7 @@ import click ...@@ -2,6 +2,7 @@ import click
import sys import sys
from . import base from . import base
from . import library # noqa
from . import users # noqa from . import users # noqa
from rest_framework.exceptions import ValidationError from rest_framework.exceptions import ValidationError
......
...@@ -118,6 +118,15 @@ def get_domain_query_from_url(domain, url_field="fid"): ...@@ -118,6 +118,15 @@ def get_domain_query_from_url(domain, url_field="fid"):
return query return query
def local_qs(queryset, url_field="fid", include=True):
query = get_domain_query_from_url(
domain=settings.FEDERATION_HOSTNAME, url_field=url_field
)
if not include:
query = ~query
return queryset.filter(query)
def is_local(url): def is_local(url):
if not url: if not url:
return True return True
......
...@@ -14,7 +14,9 @@ from requests.exceptions import RequestException ...@@ -14,7 +14,9 @@ from requests.exceptions import RequestException
from funkwhale_api.common import channels, preferences from funkwhale_api.common import channels, preferences
from funkwhale_api.federation import routes from funkwhale_api.federation import routes
from funkwhale_api.federation import library as lb from funkwhale_api.federation import library as lb
from funkwhale_api.federation import utils as federation_utils
from funkwhale_api.tags import models as tags_models from funkwhale_api.tags import models as tags_models
from funkwhale_api.tags import tasks as tags_tasks
from funkwhale_api.taskapp import celery from funkwhale_api.taskapp import celery
from . import licenses from . import licenses
...@@ -668,6 +670,50 @@ def clean_transcoding_cache(): ...@@ -668,6 +670,50 @@ def clean_transcoding_cache():
return candidates.delete() return candidates.delete()
@celery.app.task(name="music.albums_set_tags_from_tracks")
@transaction.atomic
def albums_set_tags_from_tracks(ids=None, dry_run=False):
qs = models.Album.objects.filter(tagged_items__isnull=True).order_by("id")
qs = federation_utils.local_qs(qs)
qs = qs.values_list("id", flat=True)
if ids is not None:
qs = qs.filter(pk__in=ids)
data = tags_tasks.get_tags_from_foreign_key(
ids=qs, foreign_key_model=models.Track, foreign_key_attr="album",
)
logger.info("Found automatic tags for %s albums…", len(data))
if dry_run:
logger.info("Running in dry-run mode, not commiting")
return
tags_tasks.add_tags_batch(
data, model=models.Album,
)
return data
@celery.app.task(name="music.artists_set_tags_from_tracks")
@transaction.atomic
def artists_set_tags_from_tracks(ids=None, dry_run=False):
qs = models.Artist.objects.filter(tagged_items__isnull=True).order_by("id")
qs = federation_utils.local_qs(qs)
qs = qs.values_list("id", flat=True)
if ids is not None:
qs = qs.filter(pk__in=ids)
data = tags_tasks.get_tags_from_foreign_key(
ids=qs, foreign_key_model=models.Track, foreign_key_attr="artist",
)
logger.info("Found automatic tags for %s artists…", len(data))
if dry_run:
logger.info("Running in dry-run mode, not commiting")
return
tags_tasks.add_tags_batch(
data, model=models.Artist,
)
return data
def get_prunable_tracks( def get_prunable_tracks(
exclude_favorites=True, exclude_playlists=True, exclude_listenings=True exclude_favorites=True, exclude_playlists=True, exclude_listenings=True
): ):
......
import collections
from django.contrib.contenttypes.models import ContentType
from . import models
def get_tags_from_foreign_key(
ids, foreign_key_model, foreign_key_attr, tagged_items_attr="tagged_items"
):
"""
Cf #988, this is useful to tag an artist with #Rock if all its tracks are tagged with
#Rock, for instance.
"""
data = {}
objs = foreign_key_model.objects.filter(
**{"{}__pk__in".format(foreign_key_attr): ids}
).order_by("-id")
objs = objs.only("id", "{}_id".format(foreign_key_attr)).prefetch_related(
tagged_items_attr
)
for obj in objs.iterator():
# loop on all objects, store the objs tags + counter on the corresponding foreign key
row_data = data.setdefault(
getattr(obj, "{}_id".format(foreign_key_attr)),
{"total_objs": 0, "tags": []},
)
row_data["total_objs"] += 1
for ti in getattr(obj, tagged_items_attr).all():
row_data["tags"].append(ti.tag_id)
# now, keep only tags that are present on all objects, i.e tags where the count
# matches total_objs
final_data = {}
for key, row_data in data.items():
counter = collections.Counter(row_data["tags"])
tags_to_keep = sorted(
[t for t, c in counter.items() if c >= row_data["total_objs"]]
)
if tags_to_keep:
final_data[key] = tags_to_keep
return final_data
def add_tags_batch(data, model, tagged_items_attr="tagged_items"):
model_ct = ContentType.objects.get_for_model(model)
tagged_items = [
models.TaggedItem(tag_id=tag_id, content_type=model_ct, object_id=obj_id)
for obj_id, tag_ids in data.items()
for tag_id in tag_ids
]
return models.TaggedItem.objects.bulk_create(tagged_items, batch_size=2000)
...@@ -3,6 +3,7 @@ import pytest ...@@ -3,6 +3,7 @@ import pytest
from click.testing import CliRunner from click.testing import CliRunner
from funkwhale_api.cli import main from funkwhale_api.cli import main
from funkwhale_api.cli import library
from funkwhale_api.cli import users from funkwhale_api.cli import users
...@@ -102,6 +103,16 @@ from funkwhale_api.cli import users ...@@ -102,6 +103,16 @@ from funkwhale_api.cli import users
) )
], ],
), ),
(
("albums", "add-tags-from-tracks"),
tuple(),
[(library, "handler_add_tags_from_tracks", {"albums": True})],
),
(
("artists", "add-tags-from-tracks"),
tuple(),
[(library, "handler_add_tags_from_tracks", {"artists": True})],
),
], ],
) )
def test_cli(cmd, args, handlers, mocker): def test_cli(cmd, args, handlers, mocker):
......
...@@ -138,3 +138,37 @@ def test_retrieve_with_serializer(db, r_mock): ...@@ -138,3 +138,37 @@ def test_retrieve_with_serializer(db, r_mock):
result = utils.retrieve_ap_object(fid, actor=None, serializer_class=S) result = utils.retrieve_ap_object(fid, actor=None, serializer_class=S)
assert result == {"persisted": "object"} assert result == {"persisted": "object"}
@pytest.mark.parametrize(
"factory_name, fids, kwargs, expected_indexes",
[
(
"music.Artist",
["https://local.domain/test", "http://local.domain/"],
{},
[0, 1],
),
(
"music.Artist",
["https://local.domain/test", "http://notlocal.domain/"],
{},
[0],
),
(
"music.Artist",
["https://local.domain/test", "http://notlocal.domain/"],
{"include": False},
[1],
),
],
)
def test_local_qs(factory_name, fids, kwargs, expected_indexes, factories, settings):
settings.FEDERATION_HOSTNAME = "local.domain"
objs = [factories[factory_name](fid=fid) for fid in fids]
qs = objs[0].__class__.objects.all().order_by("id")
result = utils.local_qs(qs, **kwargs)
expected_objs = [obj for i, obj in enumerate(objs) if i in expected_indexes]
assert list(result) == expected_objs
...@@ -9,6 +9,7 @@ from django.utils import timezone ...@@ -9,6 +9,7 @@ from django.utils import timezone
from funkwhale_api.federation import serializers as federation_serializers from funkwhale_api.federation import serializers as federation_serializers
from funkwhale_api.federation import jsonld from funkwhale_api.federation import jsonld
from funkwhale_api.federation import utils as federation_utils
from funkwhale_api.music import licenses, metadata, models, signals, tasks from funkwhale_api.music import licenses, metadata, models, signals, tasks
DATA_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.dirname(os.path.abspath(__file__))
...@@ -1049,3 +1050,53 @@ def test_process_upload_skips_import_metadata_if_invalid(factories, mocker): ...@@ -1049,3 +1050,53 @@ def test_process_upload_skips_import_metadata_if_invalid(factories, mocker):
get_track_from_import_metadata.assert_called_once_with( get_track_from_import_metadata.assert_called_once_with(
expected_final_metadata, attributed_to=upload.library.actor expected_final_metadata, attributed_to=upload.library.actor
) )
def test_tag_albums_from_tracks(queryset_equal_queries, factories, mocker):
get_tags_from_foreign_key = mocker.patch(
"funkwhale_api.tags.tasks.get_tags_from_foreign_key"
)
add_tags_batch = mocker.patch("funkwhale_api.tags.tasks.add_tags_batch")
expected_queryset = (
federation_utils.local_qs(
models.Album.objects.filter(tagged_items__isnull=True)
)
.values_list("id", flat=True)
.order_by("id")
)
tasks.albums_set_tags_from_tracks(ids=[1, 2])
get_tags_from_foreign_key.assert_called_once_with(
ids=expected_queryset.filter(pk__in=[1, 2]),
foreign_key_model=models.Track,
foreign_key_attr="album",
)
add_tags_batch.assert_called_once_with(
get_tags_from_foreign_key.return_value, model=models.Album,
)
def test_tag_artists_from_tracks(queryset_equal_queries, factories, mocker):
get_tags_from_foreign_key = mocker.patch(
"funkwhale_api.tags.tasks.get_tags_from_foreign_key"
)
add_tags_batch = mocker.patch("funkwhale_api.tags.tasks.add_tags_batch")
expected_queryset = (
federation_utils.local_qs(
models.Artist.objects.filter(tagged_items__isnull=True)
)
.values_list("id", flat=True)
.order_by("id")
)
tasks.artists_set_tags_from_tracks(ids=[1, 2])
get_tags_from_foreign_key.assert_called_once_with(
ids=expected_queryset.filter(pk__in=[1, 2]),
foreign_key_model=models.Track,
foreign_key_attr="artist",
)
add_tags_batch.assert_called_once_with(
get_tags_from_foreign_key.return_value, model=models.Artist,
)
from funkwhale_api.music import models as music_models
from funkwhale_api.tags import tasks
def test_get_tags_from_foreign_key(factories):
rock_tag = factories["tags.Tag"](name="Rock")
rap_tag = factories["tags.Tag"](name="Rap")
artist = factories["music.Artist"]()
factories["music.Track"].create_batch(3, artist=artist, set_tags=["rock", "rap"])
factories["music.Track"].create_batch(
3, artist=artist, set_tags=["rock", "rap", "techno"]
)
result = tasks.get_tags_from_foreign_key(
ids=[artist.pk],
foreign_key_model=music_models.Track,
foreign_key_attr="artist",
)
assert result == {artist.pk: [rock_tag.pk, rap_tag.pk]}
def test_add_tags_batch(factories):
rock_tag = factories["tags.Tag"](name="Rock")
rap_tag = factories["tags.Tag"](name="Rap")
factories["tags.Tag"]()
artist = factories["music.Artist"]()
data = {artist.pk: [rock_tag.pk, rap_tag.pk]}
tasks.add_tags_batch(
data, model=artist.__class__,
)
assert artist.get_tags() == ["Rap", "Rock"]
Added periodic background task and CLI command to associate genre tags to artists and albums based on identical tags found on corresponding tracks (#988)
...@@ -168,3 +168,30 @@ database objects. ...@@ -168,3 +168,30 @@ database objects.
Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup, Running this command with ``--no-dry-run`` is irreversible. Unless you have a backup,
there will be no way to retrieve the deleted data. there will be no way to retrieve the deleted data.
Adding tags from tracks
-----------------------
By default, genre tags found imported files are associated with the corresponding track.
While you can always associate genre information with an artist or album through the web UI,
it may be tedious to do so by hand for a large number of objects.
We offer a command you can run after an import to do this for you. It will:
1. Find all local artists or albums with no tags
2. Get all the tags associated with the corresponding tracks
3. Associate tags that are found on all tracks to the corresponding artist or album
..note::
A periodic task also runs in the background every few days to perform the same process.
Usage:
.. code-block:: sh
# For albums
python manage.py fw albums add-tags-from-tracks --help
# For artists
python manage.py fw artists add-tags-from-tracks --help
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment