Skip to content
Snippets Groups Projects
Verified Commit 290cae9a authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Fix #231 and #219: ensure we import covers regarless of the import method

Can now import covers from track metadata and track directory as well
parent 14c8073e
No related branches found
No related tags found
No related merge requests found
......@@ -76,3 +76,4 @@ LOGGING = {
},
},
}
CSRF_TRUSTED_ORIGINS = [o for o in ALLOWED_HOSTS]
......@@ -9,7 +9,13 @@ class TagNotFound(KeyError):
pass
class UnsupportedTag(KeyError):
pass
def get_id3_tag(f, k):
if k == 'pictures':
return f.tags.getall('APIC')
# First we try to grab the standard key
try:
return f.tags[k].text[0]
......@@ -28,13 +34,39 @@ def get_id3_tag(f, k):
raise TagNotFound(k)
def clean_id3_pictures(apic):
pictures = []
for p in list(apic):
pictures.append({
'mimetype': p.mime,
'content': p.data,
'description': p.desc,
'type': p.type.real,
})
return pictures
def get_flac_tag(f, k):
if k == 'pictures':
return f.pictures
try:
return f.get(k, [])[0]
except (KeyError, IndexError):
raise TagNotFound(k)
def clean_flac_pictures(apic):
pictures = []
for p in list(apic):
pictures.append({
'mimetype': p.mime,
'content': p.data,
'description': p.desc,
'type': p.type.real,
})
return pictures
def get_mp3_recording_id(f, k):
try:
return [
......@@ -73,25 +105,15 @@ CONF = {
'field': 'TRACKNUMBER',
'to_application': convert_track_number
},
'title': {
'field': 'title'
},
'artist': {
'field': 'artist'
},
'album': {
'field': 'album'
},
'title': {},
'artist': {},
'album': {},
'date': {
'field': 'date',
'to_application': lambda v: arrow.get(v).date()
},
'musicbrainz_albumid': {
'field': 'musicbrainz_albumid'
},
'musicbrainz_artistid': {
'field': 'musicbrainz_artistid'
},
'musicbrainz_albumid': {},
'musicbrainz_artistid': {},
'musicbrainz_recordingid': {
'field': 'musicbrainz_trackid'
},
......@@ -104,15 +126,9 @@ CONF = {
'field': 'TRACKNUMBER',
'to_application': convert_track_number
},
'title': {
'field': 'title'
},
'artist': {
'field': 'artist'
},
'album': {
'field': 'album'
},
'title': {},
'artist': {},
'album': {},
'date': {
'field': 'date',
'to_application': lambda v: arrow.get(v).date()
......@@ -130,6 +146,7 @@ CONF = {
},
'MP3': {
'getter': get_id3_tag,
'clean_pictures': clean_id3_pictures,
'fields': {
'track_number': {
'field': 'TRCK',
......@@ -158,40 +175,31 @@ CONF = {
'field': 'UFID',
'getter': get_mp3_recording_id,
},
'pictures': {},
}
},
'FLAC': {
'getter': get_flac_tag,
'clean_pictures': clean_flac_pictures,
'fields': {
'track_number': {
'field': 'tracknumber',
'to_application': convert_track_number
},
'title': {
'field': 'title'
},
'artist': {
'field': 'artist'
},
'album': {
'field': 'album'
},
'title': {},
'artist': {},
'album': {},
'date': {
'field': 'date',
'to_application': lambda v: arrow.get(str(v)).date()
},
'musicbrainz_albumid': {
'field': 'musicbrainz_albumid'
},
'musicbrainz_artistid': {
'field': 'musicbrainz_artistid'
},
'musicbrainz_albumid': {},
'musicbrainz_artistid': {},
'musicbrainz_recordingid': {
'field': 'musicbrainz_trackid'
},
'test': {
'field': 'test'
},
'test': {},
'pictures': {},
}
},
}
......@@ -213,8 +221,12 @@ class Metadata(object):
return f.__class__.__name__
def get(self, key, default=NODEFAULT):
field_conf = self._conf['fields'][key]
real_key = field_conf['field']
try:
field_conf = self._conf['fields'][key]
except KeyError:
raise UnsupportedTag(
'{} is not supported for this file format'.format(key))
real_key = field_conf.get('field', key)
try:
getter = field_conf.get('getter', self._conf['getter'])
v = getter(self._file, real_key)
......@@ -230,3 +242,16 @@ class Metadata(object):
if field:
v = field.to_python(v)
return v
def get_picture(self, picture_type='cover_front'):
ptype = getattr(mutagen.id3.PictureType, picture_type.upper())
try:
pictures = self.get('pictures')
except (UnsupportedTag, TagNotFound):
return
cleaner = self._conf.get('clean_pictures', lambda v: v)
pictures = cleaner(pictures)
for p in pictures:
if p['type'] == ptype:
return p
......@@ -23,6 +23,7 @@ from funkwhale_api import downloader
from funkwhale_api import musicbrainz
from funkwhale_api.federation import utils as federation_utils
from . import importers
from . import metadata
from . import utils
......@@ -192,10 +193,20 @@ class Album(APIModelMixin):
}
objects = AlbumQuerySet.as_manager()
def get_image(self):
image_data = musicbrainz.api.images.get_front(str(self.mbid))
f = ContentFile(image_data)
self.cover.save('{0}.jpg'.format(self.mbid), f)
def get_image(self, data=None):
if data:
f = ContentFile(data['content'])
extensions = {
'image/jpeg': 'jpg',
'image/png': 'png',
'image/gif': 'gif',
}
extension = extensions.get(data['mimetype'], 'jpg')
self.cover.save('{}.{}'.format(self.uuid, extension), f)
else:
image_data = musicbrainz.api.images.get_front(str(self.mbid))
f = ContentFile(image_data)
self.cover.save('{0}.jpg'.format(self.mbid), f)
return self.cover.file
def __str__(self):
......@@ -522,6 +533,12 @@ class TrackFile(models.Model):
self.mimetype = utils.guess_mimetype(self.audio_file)
return super().save(**kwargs)
def get_metadata(self):
audio_file = self.get_audio_file()
if not audio_file:
return
return metadata.Metadata(audio_file)
IMPORT_STATUS_CHOICES = (
('pending', 'Pending'),
......
import logging
import os
from django.core.files.base import ContentFile
from musicbrainzngs import ResponseError
from funkwhale_api.common import preferences
from funkwhale_api.federation import activity
from funkwhale_api.federation import actors
......@@ -16,6 +19,8 @@ from . import models
from . import lyrics as lyrics_utils
from . import utils as music_utils
logger = logging.getLogger(__name__)
@celery.app.task(name='acoustid.set_on_track_file')
@celery.require_instance(models.TrackFile, 'track_file')
......@@ -74,6 +79,7 @@ def import_track_from_remote(library_track):
def _do_import(import_job, replace=False, use_acoustid=False):
logger.info('[Import Job %s] starting job', import_job.pk)
from_file = bool(import_job.audio_file)
mbid = import_job.mbid
acoustid_track_id = None
......@@ -91,15 +97,32 @@ def _do_import(import_job, replace=False, use_acoustid=False):
mbid = match['recordings'][0]['id']
acoustid_track_id = match['id']
if mbid:
logger.info(
'[Import Job %s] importing track from musicbrainz recording %s',
import_job.pk,
str(mbid))
track, _ = models.Track.get_or_create_from_api(mbid=mbid)
elif import_job.audio_file:
logger.info(
'[Import Job %s] importing track from uploaded track data at %s',
import_job.pk,
import_job.audio_file.path)
track = audiofile_tasks.import_track_data_from_path(
import_job.audio_file.path)
elif import_job.library_track:
logger.info(
'[Import Job %s] importing track from federated library track %s',
import_job.pk,
import_job.library_track.pk)
track = import_track_from_remote(import_job.library_track)
elif import_job.source.startswith('file://'):
tf_path = import_job.source.replace('file://', '', 1)
logger.info(
'[Import Job %s] importing track from local track data at %s',
import_job.pk,
tf_path)
track = audiofile_tasks.import_track_data_from_path(
import_job.source.replace('file://', '', 1))
tf_path)
else:
raise ValueError(
'Not enough data to process import, '
......@@ -107,8 +130,13 @@ def _do_import(import_job, replace=False, use_acoustid=False):
track_file = None
if replace:
logger.info(
'[Import Job %s] replacing existing audio file', import_job.pk)
track_file = track.files.first()
elif track.files.count() > 0:
logger.info(
'[Import Job %s] skipping, we already have a file for this track',
import_job.pk)
if import_job.audio_file:
import_job.audio_file.delete()
import_job.status = 'skipped'
......@@ -132,6 +160,9 @@ def _do_import(import_job, replace=False, use_acoustid=False):
pass
elif not import_job.audio_file and not import_job.source.startswith('file://'):
# not an implace import, and we have a source, so let's download it
logger.info(
'[Import Job %s] downloading audio file from remote',
import_job.pk)
track_file.download_file()
elif not import_job.audio_file and import_job.source.startswith('file://'):
# in place import, we set mimetype from extension
......@@ -139,23 +170,96 @@ def _do_import(import_job, replace=False, use_acoustid=False):
track_file.mimetype = music_utils.get_type_from_ext(ext)
track_file.set_audio_data()
track_file.save()
# if no cover is set on track album, we try to update it as well:
if not track.album.cover:
logger.info(
'[Import Job %s] retrieving album cover',
import_job.pk)
update_album_cover(track.album, track_file)
import_job.status = 'finished'
import_job.track_file = track_file
if import_job.audio_file:
# it's imported on the track, we don't need it anymore
import_job.audio_file.delete()
import_job.save()
logger.info(
'[Import Job %s] job finished',
import_job.pk)
return track_file
def update_album_cover(album, track_file, replace=False):
if album.cover and not replace:
return
if track_file:
# maybe the file has a cover embedded?
try:
metadata = track_file.get_metadata()
except FileNotFoundError:
metadata = None
if metadata:
cover = metadata.get_picture('cover_front')
if cover:
# best case scenario, cover is embedded in the track
logger.info(
'[Album %s] Using cover embedded in file',
album.pk)
return album.get_image(data=cover)
if track_file.source and track_file.source.startswith('file://'):
# let's look for a cover in the same directory
path = os.path.dirname(track_file.source.replace('file://', '', 1))
logger.info(
'[Album %s] scanning covers from %s',
album.pk,
path)
cover = get_cover_from_fs(path)
if cover:
return album.get_image(data=cover)
if not album.mbid:
return
try:
logger.info(
'[Album %s] Fetching cover from musicbrainz release %s',
album.pk,
str(album.mbid))
return album.get_image()
except ResponseError as exc:
logger.warning(
'[Album %s] cannot fetch cover from musicbrainz: %s',
album.pk,
str(exc))
IMAGE_TYPES = [
('jpg', 'image/jpeg'),
('png', 'image/png'),
]
def get_cover_from_fs(dir_path):
if os.path.exists(dir_path):
for e, m in IMAGE_TYPES:
cover_path = os.path.join(dir_path, 'cover.{}'.format(e))
if not os.path.exists(cover_path):
logger.debug('Cover %s does not exists', cover_path)
continue
with open(cover_path, 'rb') as c:
logger.info('Found cover at %s', cover_path)
return {
'mimetype': m,
'content': c.read(),
}
@celery.app.task(name='ImportJob.run', bind=True)
@celery.require_instance(
models.ImportJob.objects.filter(
status__in=['pending', 'errored']),
'import_job')
def import_job_run(self, import_job, replace=False, use_acoustid=False):
def mark_errored():
def mark_errored(exc):
logger.error('[Import Job %s] Error during import: %s', str(exc))
import_job.status = 'errored'
import_job.save(update_fields=['status'])
......@@ -167,9 +271,9 @@ def import_job_run(self, import_job, replace=False, use_acoustid=False):
try:
self.retry(exc=exc, countdown=30, max_retries=3)
except:
mark_errored()
mark_errored(exc)
raise
mark_errored()
mark_errored(exc)
raise
......
api/tests/music/cover.jpg

107 B

api/tests/music/cover.png

379 B

No preview for this file type
......@@ -58,6 +58,20 @@ def test_can_get_metadata_from_id3_mp3_file(field, value):
assert data.get(field) == value
@pytest.mark.parametrize('name', ['test.mp3', 'sample.flac'])
def test_can_get_pictures(name):
path = os.path.join(DATA_DIR, name)
data = metadata.Metadata(path)
pictures = data.get('pictures')
assert len(pictures) == 1
cover_data = data.get_picture('cover_front')
assert cover_data['mimetype'].startswith('image/')
assert len(cover_data['content']) > 0
assert type(cover_data['content']) == bytes
assert type(cover_data['description']) == str
@pytest.mark.parametrize('field,value', [
('title', '999,999'),
('artist', 'Nine Inch Nails'),
......
......@@ -110,3 +110,11 @@ def test_track_get_file_size_in_place(factories):
in_place=True, source='file://{}'.format(path))
assert tf.get_file_size() == 297745
def test_album_get_image_content(factories):
album = factories['music.Album']()
album.get_image(data={'content': b'test', 'mimetype':'image/jpeg'})
album.refresh_from_db()
assert album.cover.read() == b'test'
......@@ -56,6 +56,7 @@ def test_import_batch_run(factories, mocker):
mocked_job_run.assert_called_once_with(import_job_id=job.pk)
@pytest.mark.skip('Acoustid is disabled')
def test_import_job_can_run_with_file_and_acoustid(
artists, albums, tracks, preferences, factories, mocker):
preferences['providers_acoustid__api_key'] = 'test'
......@@ -183,3 +184,73 @@ def test_import_job_can_be_errored(factories, mocker, preferences):
assert job.track_file is None
assert job.status == 'errored'
def test__do_import_calls_update_album_cover_if_no_cover(factories, mocker):
path = os.path.join(DATA_DIR, 'test.ogg')
album = factories['music.Album'](cover='')
track = factories['music.Track'](album=album)
mocker.patch(
'funkwhale_api.providers.audiofile.tasks.import_track_data_from_path',
return_value=track)
mocked_update = mocker.patch(
'funkwhale_api.music.tasks.update_album_cover')
job = factories['music.FileImportJob'](
audio_file__path=path, track_file=None)
tasks.import_job_run(import_job_id=job.pk)
mocked_update.assert_called_once_with(album, track.files.first())
def test_update_album_cover_mbid(factories, mocker):
album = factories['music.Album'](cover='')
mocked_get = mocker.patch('funkwhale_api.music.models.Album.get_image')
tasks.update_album_cover(album=album, track_file=None)
mocked_get.assert_called_once_with()
def test_update_album_cover_file_data(factories, mocker):
path = os.path.join(DATA_DIR, 'test.mp3')
album = factories['music.Album'](cover='', mbid=None)
tf = factories['music.TrackFile'](track__album=album)
mocked_get = mocker.patch('funkwhale_api.music.models.Album.get_image')
mocker.patch(
'funkwhale_api.music.metadata.Metadata.get_picture',
return_value={'hello': 'world'})
tasks.update_album_cover(album=album, track_file=tf)
md = data = tf.get_metadata()
mocked_get.assert_called_once_with(
data={'hello': 'world'})
@pytest.mark.parametrize('ext,mimetype', [
('jpg', 'image/jpeg'),
('png', 'image/png'),
])
def test_update_album_cover_file_cover_separate_file(
ext, mimetype, factories, mocker):
mocker.patch('funkwhale_api.music.tasks.IMAGE_TYPES', [(ext, mimetype)])
path = os.path.join(DATA_DIR, 'test.mp3')
image_path = os.path.join(DATA_DIR, 'cover.{}'.format(ext))
with open(image_path, 'rb') as f:
image_content = f.read()
album = factories['music.Album'](cover='', mbid=None)
tf = factories['music.TrackFile'](
track__album=album,
source='file://' + image_path)
mocked_get = mocker.patch('funkwhale_api.music.models.Album.get_image')
mocker.patch(
'funkwhale_api.music.metadata.Metadata.get_picture',
return_value=None)
tasks.update_album_cover(album=album, track_file=tf)
md = data = tf.get_metadata()
mocked_get.assert_called_once_with(
data={'mimetype': mimetype, 'content': image_content})
Can now use album covers from flac/mp3 metadata and separate file in track directory (#219)
We now fetch album covers regardless of the import methods (#231)
Smarter album cover importer
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In earlier versions, covers where only imported when launching a YouTube import.
Starting from this release, covers will be imported regardless of the import mode
(file upload, youtube-dl, CLI, in-place...). Funkwhale will look for covers
in the following order:
1. In the imported file itself (FLAC/MP3 only)
2. In a cover.jpg or cover.png in the file directory
3. By fetching cover art from Musibrainz, assuming the file is tagged correctly
This will only work for newly imported tracks and albums though. In the future,
we may offer an option to refetch album covers from the interface, but in the
meantime, you can use the following snippet:
.. code-block:: python
# Store this in /tmp/update_albums.py
from funkwhale_api.music.models import Album, TrackFile
from funkwhale_api.music.tasks import update_album_cover
albums_without_covers = Album.objects.filter(cover='')
total = albums_without_covers.count()
print('Found {} albums without cover'.format(total))
for i, album in enumerate(albums_without_covers.iterator()):
print('[{}/{}] Fetching cover for {}...'.format(i+1, total, album.title))
f = TrackFile.objects.filter(track__album=album).filter(source__startswith='file://').first()
update_album_cover(album, track_file=f)
Then launch it::
# docker setups
cat /tmp/update_albums.py | docker-compose run --rm api python manage.py shell -i python
# non-docker setups
source /srv/funkwhale/load_env
source /srv/funkwhale/virtualenv/bin/activate
cat /tmp/update_albums.py | python manage.py shell -i python
# cleanup
rm /tmp/update_albums.py
.. note::
Depending on your number of albums, the previous snippet may take some time
to execute. You can interrupt it at any time using ctrl-c and relaunch it later,
as it's idempotent.
......@@ -76,6 +76,15 @@ configuration options to ensure the webserver can serve them properly:
Thus, be especially careful when you manipulate the source files.
Album covers
^^^^^^^^^^^^
Whenever possible, Funkwhale will import album cover, with the following precedence:
1. It will use the cover embedded in the audio files themeselves, if any (Flac/MP3 only)
2. It will use a cover.jpg or a cover.png file from the imported track directory, if any
3. It will fectch cover art from musicbrainz, assuming the file is tagged correctly
Getting demo tracks
^^^^^^^^^^^^^^^^^^^
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment