Newer
Older
import logging
import os
from django.utils import timezone
from django.db import transaction
from django.dispatch import receiver
from musicbrainzngs import ResponseError
from requests.exceptions import RequestException
from funkwhale_api.common import channels, preferences
from funkwhale_api.federation import library as lb
from funkwhale_api.tags import models as tags_models
from . import metadata
from . import signals
from . import serializers
logger = logging.getLogger(__name__)
Eliot Berriot
committed
def update_album_cover(
album, source=None, cover_data=None, musicbrainz=True, replace=False
):
if album.cover and not replace:
return
if cover_data:
return album.get_image(data=cover_data)
if source and source.startswith("file://"):
# let's look for a cover in the same directory
path = os.path.dirname(source.replace("file://", "", 1))
logger.info("[Album %s] scanning covers from %s", album.pk, path)
cover = get_cover_from_fs(path)
if cover:
return album.get_image(data=cover)
Eliot Berriot
committed
if musicbrainz and album.mbid:
try:
logger.info(
"[Album %s] Fetching cover from musicbrainz release %s",
album.pk,
str(album.mbid),
)
return album.get_image()
except ResponseError as exc:
logger.warning(
"[Album %s] cannot fetch cover from musicbrainz: %s", album.pk, str(exc)
)
IMAGE_TYPES = [("jpg", "image/jpeg"), ("png", "image/png")]
def get_cover_from_fs(dir_path):
if os.path.exists(dir_path):
for e, m in IMAGE_TYPES:
cover_path = os.path.join(dir_path, "cover.{}".format(e))
if not os.path.exists(cover_path):
continue
with open(cover_path, "rb") as c:
logger.info("Found cover at %s", cover_path)
return {"mimetype": m, "content": c.read()}
@celery.require_instance(
models.LibraryScan.objects.select_related().filter(status="pending"), "library_scan"
)
def start_library_scan(library_scan):
try:
data = lb.get_library_data(library_scan.library.fid, actor=library_scan.actor)
except Exception:
library_scan.status = "errored"
library_scan.save(update_fields=["status", "modification_date"])
raise
library_scan.modification_date = timezone.now()
library_scan.status = "scanning"
library_scan.total_files = data["totalItems"]
library_scan.save(update_fields=["status", "modification_date", "total_files"])
scan_library_page.delay(library_scan_id=library_scan.pk, page_url=data["first"])
@celery.app.task(
name="music.scan_library_page",
retry_backoff=60,
max_retries=5,
autoretry_for=[RequestException],
)
@celery.require_instance(
models.LibraryScan.objects.select_related().filter(status="scanning"),
"library_scan",
)
def scan_library_page(library_scan, page_url):
data = lb.get_library_page(library_scan.library, page_url, library_scan.actor)
for item_serializer in data["items"]:
upload = item_serializer.save(library=library_scan.library)
uploads.append(upload)
library_scan.processed_files = F("processed_files") + len(uploads)
library_scan.modification_date = timezone.now()
update_fields = ["modification_date", "processed_files"]
next_page = data.get("next")
fetch_next = next_page and next_page != page_url
if not fetch_next:
update_fields.append("status")
library_scan.status = "finished"
library_scan.save(update_fields=update_fields)
if fetch_next:
scan_library_page.delay(library_scan_id=library_scan.pk, page_url=next_page)
v = data
for k in keys:
try:
v = v[k]
except KeyError:
def __init__(self, code):
self.code = code
super().__init__(code)
Eliot Berriot
committed
def fail_import(upload, error_code, detail=None, **fields):
old_status = upload.import_status
upload.import_status = "errored"
Eliot Berriot
committed
upload.import_details = {"error_code": error_code, "detail": detail}
upload.import_details.update(fields)
upload.import_date = timezone.now()
upload.save(update_fields=["import_details", "import_status", "import_date"])
broadcast = getter(
upload.import_metadata, "funkwhale", "config", "broadcast", default=True
if broadcast:
signals.upload_import_status_updated.send(
old_status=old_status,
new_status=upload.import_status,
upload=upload,
sender=None,
)
@celery.app.task(name="music.process_upload")
@celery.require_instance(
models.Upload.objects.filter(import_status="pending").select_related(
"library__actor__user"
),
Eliot Berriot
committed
additional_data = {}
Eliot Berriot
committed
m = metadata.Metadata(audio_file)
try:
serializer = metadata.TrackMetadataSerializer(data=m)
serializer.is_valid()
except Exception:
fail_import(upload, "unknown_error")
raise
if not serializer.is_valid():
detail = serializer.errors
try:
metadata_dump = m.all()
except Exception as e:
logger.warn("Cannot dump metadata for file %s: %s", audio_file, str(e))
return fail_import(
upload, "invalid_metadata", detail=detail, file_metadata=metadata_dump
)
final_metadata = collections.ChainMap(
additional_data, serializer.validated_data, import_metadata
)
additional_data["upload_source"] = upload.source
track = get_track_from_import_metadata(
final_metadata, attributed_to=upload.library.actor
)
except UploadImportError as e:
return fail_import(upload, e.code)
fail_import(upload, "unknown_error")
raise
# under some situations, we want to skip the import (
# for instance if the user already owns the files)
owned_duplicates = get_owned_duplicates(upload, track)
upload.track = track
if owned_duplicates:
upload.import_status = "skipped"
upload.import_details = {
"code": "already_imported_in_owned_libraries",
"duplicates": list(owned_duplicates),
}
update_fields=["import_details", "import_status", "import_date", "track"]
)
sender=None,
)
return
# all is good, let's finalize the import
upload.duration = audio_data["duration"]
upload.size = audio_data["size"]
upload.bitrate = audio_data["bitrate"]
upload.import_status = "finished"
upload.import_date = timezone.now()
upload.save(
update_fields=[
"track",
"import_status",
"import_date",
"size",
"duration",
"bitrate",
]
)
Eliot Berriot
committed
# update album cover, if needed
if not track.album.cover:
update_album_cover(
track.album,
source=final_metadata.get("upload_source"),
cover_data=final_metadata.get("cover_data"),
)
broadcast = getter(
import_metadata, "funkwhale", "config", "broadcast", default=True
if broadcast:
signals.upload_import_status_updated.send(
old_status=old_status,
new_status=upload.import_status,
upload=upload,
sender=None,
)
dispatch_outbox = getter(
import_metadata, "funkwhale", "config", "dispatch_outbox", default=True
if dispatch_outbox:
routes.outbox.dispatch(
{"type": "Create", "object": {"type": "Audio"}}, context={"upload": upload}
)
def federation_audio_track_to_metadata(payload, references):
"""
Given a valid payload as returned by federation.serializers.TrackSerializer.validated_data,
returns a correct metadata payload for use with get_track_from_import_metadata.
"""
new_data = {
"title": payload["name"],
Eliot Berriot
committed
"position": payload.get("position") or 1,
"disc_number": payload.get("disc"),
"license": payload.get("license"),
"copyright": payload.get("copyright"),
"attributed_to": references.get(payload.get("attributedTo")),
Eliot Berriot
committed
"mbid": str(payload.get("musicbrainzId"))
if payload.get("musicbrainzId")
Eliot Berriot
committed
"album": {
"title": payload["album"]["name"],
"fdate": payload["album"]["published"],
"fid": payload["album"]["id"],
"attributed_to": references.get(payload["album"].get("attributedTo")),
Eliot Berriot
committed
"mbid": str(payload["album"]["musicbrainzId"])
if payload["album"].get("musicbrainzId")
else None,
"release_date": payload["album"].get("released"),
"artists": [
{
"fid": a["id"],
"name": a["name"],
"fdate": a["published"],
"attributed_to": references.get(a.get("attributedTo")),
Eliot Berriot
committed
"mbid": str(a["musicbrainzId"]) if a.get("musicbrainzId") else None,
}
for a in payload["album"]["artists"]
],
},
"artists": [
{
"fid": a["id"],
"name": a["name"],
"fdate": a["published"],
"attributed_to": references.get(a.get("attributedTo")),
Eliot Berriot
committed
"mbid": str(a["musicbrainzId"]) if a.get("musicbrainzId") else None,
}
for a in payload["artists"]
],
# federation
"fid": payload["id"],
"fdate": payload["published"],
}
cover = payload["album"].get("cover")
if cover:
new_data["cover_data"] = {"mimetype": cover["mediaType"], "url": cover["href"]}
return new_data
"""
Ensure we skip duplicate tracks to avoid wasting user/instance storage
"""
track__isnull=False, library__in=owned_libraries, track=track
)
.values_list("uuid", flat=True)
)
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
def get_best_candidate_or_create(model, query, defaults, sort_fields):
"""
Like queryset.get_or_create() but does not crash if multiple objects
are returned on the get() call
"""
candidates = model.objects.filter(query)
if candidates:
return sort_candidates(candidates, sort_fields)[0], False
return model.objects.create(**defaults), True
def sort_candidates(candidates, important_fields):
"""
Given a list of objects and a list of fields,
will return a sorted list of those objects by score.
Score is higher for objects that have a non-empty attribute
that is also present in important fields::
artist1 = Artist(mbid=None, fid=None)
artist2 = Artist(mbid="something", fid=None)
# artist2 has a mbid, so is sorted first
assert sort_candidates([artist1, artist2], ['mbid'])[0] == artist2
Only supports string fields.
"""
# map each fields to its score, giving a higher score to first fields
fields_scores = {f: i + 1 for i, f in enumerate(sorted(important_fields))}
candidates_with_scores = []
for candidate in candidates:
current_score = 0
for field, score in fields_scores.items():
v = getattr(candidate, field, "")
if v:
current_score += score
candidates_with_scores.append((candidate, current_score))
return [c for c, s in reversed(sorted(candidates_with_scores, key=lambda v: v[1]))]
def get_track_from_import_metadata(data, update_cover=False, attributed_to=None):
track = _get_track(data, attributed_to=attributed_to)
Eliot Berriot
committed
if update_cover and track and not track.album.cover:
update_album_cover(
track.album,
source=data.get("upload_source"),
cover_data=data.get("cover_data"),
)
return track
track_uuid = getter(data, "funkwhale", "track", "uuid")
if track_uuid:
# easy case, we have a reference to a uuid of a track that
# already exists in our database
try:
track = models.Track.objects.get(uuid=track_uuid)
except models.Track.DoesNotExist:
raise UploadImportError(code="track_uuid_not_found")
return track
from_activity_id = data.get("from_activity_id", None)
Eliot Berriot
committed
track_mbid = data.get("mbid", None)
album_mbid = getter(data, "album", "mbid")
track_fid = getter(data, "fid")
query = None
if album_mbid and track_mbid:
query = Q(mbid=track_mbid, album__mbid=album_mbid)
if track_fid:
query = query | Q(fid=track_fid) if query else Q(fid=track_fid)
if query:
# second easy case: we have a (track_mbid, album_mbid) pair or
# a federation uuid we can check on
try:
return sort_candidates(models.Track.objects.filter(query), ["mbid", "fid"])[
0
]
except IndexError:
pass
# get / create artist and album artist
Eliot Berriot
committed
artists = getter(data, "artists", default=[])
artist = artists[0]
artist_mbid = artist.get("mbid", None)
artist_fid = artist.get("fid", None)
artist_name = artist["name"]
query = Q(mbid=artist_mbid)
else:
query = Q(name__iexact=artist_name)
if artist_fid:
query |= Q(fid=artist_fid)
defaults = {
"name": artist_name,
"mbid": artist_mbid,
"fid": artist_fid,
"from_activity_id": from_activity_id,
"attributed_to": artist.get("attributed_to", attributed_to),
Eliot Berriot
committed
if artist.get("fdate"):
defaults["creation_date"] = artist.get("fdate")
artist = get_best_candidate_or_create(
models.Artist, query, defaults=defaults, sort_fields=["mbid", "fid"]
)[0]
album_artists = getter(data, "album", "artists", default=artists) or artists
Eliot Berriot
committed
album_artist = album_artists[0]
album_artist_name = album_artist.get("name")
if album_artist_name == artist_name:
album_artist = artist
else:
query = Q(name__iexact=album_artist_name)
Eliot Berriot
committed
album_artist_mbid = album_artist.get("mbid", None)
album_artist_fid = album_artist.get("fid", None)
if album_artist_mbid:
query |= Q(mbid=album_artist_mbid)
if album_artist_fid:
query |= Q(fid=album_artist_fid)
defaults = {
"name": album_artist_name,
"mbid": album_artist_mbid,
"fid": album_artist_fid,
"from_activity_id": from_activity_id,
"attributed_to": album_artist.get("attributed_to", attributed_to),
Eliot Berriot
committed
if album_artist.get("fdate"):
defaults["creation_date"] = album_artist.get("fdate")
album_artist = get_best_candidate_or_create(
models.Artist, query, defaults=defaults, sort_fields=["mbid", "fid"]
Eliot Berriot
committed
album = data["album"]
album_title = album["title"]
album_fid = album.get("fid", None)
query = Q(mbid=album_mbid)
else:
query = Q(title__iexact=album_title, artist=album_artist)
if album_fid:
query |= Q(fid=album_fid)
defaults = {
"title": album_title,
"artist": album_artist,
"mbid": album_mbid,
Eliot Berriot
committed
"release_date": album.get("release_date"),
"fid": album_fid,
"from_activity_id": from_activity_id,
"attributed_to": album.get("attributed_to", attributed_to),
Eliot Berriot
committed
if album.get("fdate"):
defaults["creation_date"] = album.get("fdate")
album = get_best_candidate_or_create(
models.Album, query, defaults=defaults, sort_fields=["mbid", "fid"]
# get / create track
track_title = data["title"]
Eliot Berriot
committed
position = data.get("position", 1)
query = Q(title__iexact=track_title, artist=artist, album=album, position=position)
if track_mbid:
query |= Q(mbid=track_mbid)
if track_fid:
query |= Q(fid=track_fid)
defaults = {
"title": track_title,
"album": album,
"mbid": track_mbid,
"artist": artist,
Eliot Berriot
committed
"position": position,
"disc_number": data.get("disc_number"),
"fid": track_fid,
"from_activity_id": from_activity_id,
"attributed_to": data.get("attributed_to", attributed_to),
"license": licenses.match(data.get("license"), data.get("copyright")),
"copyright": data.get("copyright"),
}
if data.get("fdate"):
defaults["creation_date"] = data.get("fdate")
track, created = get_best_candidate_or_create(
models.Track, query, defaults=defaults, sort_fields=["mbid", "fid"]
if created:
tags_models.add_tags(track, *data.get("tags", []))
@receiver(signals.upload_import_status_updated)
def broadcast_import_status_update_to_owner(old_status, new_status, upload, **kwargs):
user = upload.library.actor.get_user()
group = "user.{}.imports".format(user.pk)
channels.group_send(
group,
{
"type": "event.send",
"text": "",
"data": {
"type": "import.status_updated",
"upload": serializers.UploadForOwnerSerializer(upload).data,
"old_status": old_status,
"new_status": new_status,
},
},
)
@celery.app.task(name="music.clean_transcoding_cache")
def clean_transcoding_cache():
delay = preferences.get("music__transcoding_cache_duration")
if delay < 1:
return # cache clearing disabled
limit = timezone.now() - datetime.timedelta(minutes=delay)
candidates = (
models.UploadVersion.objects.filter(
(Q(accessed_date__lt=limit) | Q(accessed_date=None))
)
.only("audio_file", "id")
.order_by("id")
)
return candidates.delete()
Eliot Berriot
committed
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
def get_prunable_tracks(
exclude_favorites=True, exclude_playlists=True, exclude_listenings=True
):
"""
Returns a list of tracks with no associated uploads,
excluding the one that were listened/favorited/included in playlists.
"""
queryset = models.Track.objects.all()
queryset = queryset.filter(uploads__isnull=True)
if exclude_favorites:
queryset = queryset.filter(track_favorites__isnull=True)
if exclude_playlists:
queryset = queryset.filter(playlist_tracks__isnull=True)
if exclude_listenings:
queryset = queryset.filter(listenings__isnull=True)
return queryset
def get_prunable_albums():
return models.Album.objects.filter(tracks__isnull=True)
def get_prunable_artists():
return models.Artist.objects.filter(tracks__isnull=True, albums__isnull=True)
def update_library_entity(obj, data):
"""
Given an obj and some updated fields, will persist the changes on the obj
and also check if the entity need to be aliased with existing objs (i.e
if a mbid was added on the obj, and match another entity with the same mbid)
"""
for key, value in data.items():
setattr(obj, key, value)
# Todo: handle integrity error on unique fields (such as MBID)
obj.save(update_fields=list(data.keys()))
return obj