diff --git a/api/config/settings/local.py b/api/config/settings/local.py index 59260062985cf1e3c554bfe4459d237f5d981e11..df14945cc90538a7fe8ea39e66e3833aa809581c 100644 --- a/api/config/settings/local.py +++ b/api/config/settings/local.py @@ -76,3 +76,4 @@ LOGGING = { }, }, } +CSRF_TRUSTED_ORIGINS = [o for o in ALLOWED_HOSTS] diff --git a/api/funkwhale_api/music/metadata.py b/api/funkwhale_api/music/metadata.py index 522c64c85d11fbbe2697b707f229485d61a8529c..3637b1c8c5d78311dd2e3975cd02bd48d6f9269e 100644 --- a/api/funkwhale_api/music/metadata.py +++ b/api/funkwhale_api/music/metadata.py @@ -9,7 +9,13 @@ class TagNotFound(KeyError): pass +class UnsupportedTag(KeyError): + pass + + def get_id3_tag(f, k): + if k == 'pictures': + return f.tags.getall('APIC') # First we try to grab the standard key try: return f.tags[k].text[0] @@ -28,13 +34,39 @@ def get_id3_tag(f, k): raise TagNotFound(k) +def clean_id3_pictures(apic): + pictures = [] + for p in list(apic): + pictures.append({ + 'mimetype': p.mime, + 'content': p.data, + 'description': p.desc, + 'type': p.type.real, + }) + return pictures + + def get_flac_tag(f, k): + if k == 'pictures': + return f.pictures try: return f.get(k, [])[0] except (KeyError, IndexError): raise TagNotFound(k) +def clean_flac_pictures(apic): + pictures = [] + for p in list(apic): + pictures.append({ + 'mimetype': p.mime, + 'content': p.data, + 'description': p.desc, + 'type': p.type.real, + }) + return pictures + + def get_mp3_recording_id(f, k): try: return [ @@ -73,25 +105,15 @@ CONF = { 'field': 'TRACKNUMBER', 'to_application': convert_track_number }, - 'title': { - 'field': 'title' - }, - 'artist': { - 'field': 'artist' - }, - 'album': { - 'field': 'album' - }, + 'title': {}, + 'artist': {}, + 'album': {}, 'date': { 'field': 'date', 'to_application': lambda v: arrow.get(v).date() }, - 'musicbrainz_albumid': { - 'field': 'musicbrainz_albumid' - }, - 'musicbrainz_artistid': { - 'field': 'musicbrainz_artistid' - }, + 'musicbrainz_albumid': {}, + 'musicbrainz_artistid': {}, 'musicbrainz_recordingid': { 'field': 'musicbrainz_trackid' }, @@ -104,15 +126,9 @@ CONF = { 'field': 'TRACKNUMBER', 'to_application': convert_track_number }, - 'title': { - 'field': 'title' - }, - 'artist': { - 'field': 'artist' - }, - 'album': { - 'field': 'album' - }, + 'title': {}, + 'artist': {}, + 'album': {}, 'date': { 'field': 'date', 'to_application': lambda v: arrow.get(v).date() @@ -130,6 +146,7 @@ CONF = { }, 'MP3': { 'getter': get_id3_tag, + 'clean_pictures': clean_id3_pictures, 'fields': { 'track_number': { 'field': 'TRCK', @@ -158,40 +175,31 @@ CONF = { 'field': 'UFID', 'getter': get_mp3_recording_id, }, + 'pictures': {}, } }, 'FLAC': { 'getter': get_flac_tag, + 'clean_pictures': clean_flac_pictures, 'fields': { 'track_number': { 'field': 'tracknumber', 'to_application': convert_track_number }, - 'title': { - 'field': 'title' - }, - 'artist': { - 'field': 'artist' - }, - 'album': { - 'field': 'album' - }, + 'title': {}, + 'artist': {}, + 'album': {}, 'date': { 'field': 'date', 'to_application': lambda v: arrow.get(str(v)).date() }, - 'musicbrainz_albumid': { - 'field': 'musicbrainz_albumid' - }, - 'musicbrainz_artistid': { - 'field': 'musicbrainz_artistid' - }, + 'musicbrainz_albumid': {}, + 'musicbrainz_artistid': {}, 'musicbrainz_recordingid': { 'field': 'musicbrainz_trackid' }, - 'test': { - 'field': 'test' - }, + 'test': {}, + 'pictures': {}, } }, } @@ -213,8 +221,12 @@ class Metadata(object): return f.__class__.__name__ def get(self, key, default=NODEFAULT): - field_conf = self._conf['fields'][key] - real_key = field_conf['field'] + try: + field_conf = self._conf['fields'][key] + except KeyError: + raise UnsupportedTag( + '{} is not supported for this file format'.format(key)) + real_key = field_conf.get('field', key) try: getter = field_conf.get('getter', self._conf['getter']) v = getter(self._file, real_key) @@ -230,3 +242,16 @@ class Metadata(object): if field: v = field.to_python(v) return v + + def get_picture(self, picture_type='cover_front'): + ptype = getattr(mutagen.id3.PictureType, picture_type.upper()) + try: + pictures = self.get('pictures') + except (UnsupportedTag, TagNotFound): + return + + cleaner = self._conf.get('clean_pictures', lambda v: v) + pictures = cleaner(pictures) + for p in pictures: + if p['type'] == ptype: + return p diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index 51ece7d7b3f98b1888139a22c74a5066c1bd7862..0ba4d22c339f3798945d069c5aff801657cee5dc 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -23,6 +23,7 @@ from funkwhale_api import downloader from funkwhale_api import musicbrainz from funkwhale_api.federation import utils as federation_utils from . import importers +from . import metadata from . import utils @@ -192,10 +193,20 @@ class Album(APIModelMixin): } objects = AlbumQuerySet.as_manager() - def get_image(self): - image_data = musicbrainz.api.images.get_front(str(self.mbid)) - f = ContentFile(image_data) - self.cover.save('{0}.jpg'.format(self.mbid), f) + def get_image(self, data=None): + if data: + f = ContentFile(data['content']) + extensions = { + 'image/jpeg': 'jpg', + 'image/png': 'png', + 'image/gif': 'gif', + } + extension = extensions.get(data['mimetype'], 'jpg') + self.cover.save('{}.{}'.format(self.uuid, extension), f) + else: + image_data = musicbrainz.api.images.get_front(str(self.mbid)) + f = ContentFile(image_data) + self.cover.save('{0}.jpg'.format(self.mbid), f) return self.cover.file def __str__(self): @@ -522,6 +533,12 @@ class TrackFile(models.Model): self.mimetype = utils.guess_mimetype(self.audio_file) return super().save(**kwargs) + def get_metadata(self): + audio_file = self.get_audio_file() + if not audio_file: + return + return metadata.Metadata(audio_file) + IMPORT_STATUS_CHOICES = ( ('pending', 'Pending'), diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py index 993456c2701bbff50477eacea02ef3a56cdac4e1..218e374e8239c91861f82dbf75245cd94bee7ead 100644 --- a/api/funkwhale_api/music/tasks.py +++ b/api/funkwhale_api/music/tasks.py @@ -1,7 +1,10 @@ +import logging import os from django.core.files.base import ContentFile +from musicbrainzngs import ResponseError + from funkwhale_api.common import preferences from funkwhale_api.federation import activity from funkwhale_api.federation import actors @@ -16,6 +19,8 @@ from . import models from . import lyrics as lyrics_utils from . import utils as music_utils +logger = logging.getLogger(__name__) + @celery.app.task(name='acoustid.set_on_track_file') @celery.require_instance(models.TrackFile, 'track_file') @@ -74,6 +79,7 @@ def import_track_from_remote(library_track): def _do_import(import_job, replace=False, use_acoustid=False): + logger.info('[Import Job %s] starting job', import_job.pk) from_file = bool(import_job.audio_file) mbid = import_job.mbid acoustid_track_id = None @@ -91,15 +97,32 @@ def _do_import(import_job, replace=False, use_acoustid=False): mbid = match['recordings'][0]['id'] acoustid_track_id = match['id'] if mbid: + logger.info( + '[Import Job %s] importing track from musicbrainz recording %s', + import_job.pk, + str(mbid)) track, _ = models.Track.get_or_create_from_api(mbid=mbid) elif import_job.audio_file: + logger.info( + '[Import Job %s] importing track from uploaded track data at %s', + import_job.pk, + import_job.audio_file.path) track = audiofile_tasks.import_track_data_from_path( import_job.audio_file.path) elif import_job.library_track: + logger.info( + '[Import Job %s] importing track from federated library track %s', + import_job.pk, + import_job.library_track.pk) track = import_track_from_remote(import_job.library_track) elif import_job.source.startswith('file://'): + tf_path = import_job.source.replace('file://', '', 1) + logger.info( + '[Import Job %s] importing track from local track data at %s', + import_job.pk, + tf_path) track = audiofile_tasks.import_track_data_from_path( - import_job.source.replace('file://', '', 1)) + tf_path) else: raise ValueError( 'Not enough data to process import, ' @@ -107,8 +130,13 @@ def _do_import(import_job, replace=False, use_acoustid=False): track_file = None if replace: + logger.info( + '[Import Job %s] replacing existing audio file', import_job.pk) track_file = track.files.first() elif track.files.count() > 0: + logger.info( + '[Import Job %s] skipping, we already have a file for this track', + import_job.pk) if import_job.audio_file: import_job.audio_file.delete() import_job.status = 'skipped' @@ -132,6 +160,9 @@ def _do_import(import_job, replace=False, use_acoustid=False): pass elif not import_job.audio_file and not import_job.source.startswith('file://'): # not an implace import, and we have a source, so let's download it + logger.info( + '[Import Job %s] downloading audio file from remote', + import_job.pk) track_file.download_file() elif not import_job.audio_file and import_job.source.startswith('file://'): # in place import, we set mimetype from extension @@ -139,23 +170,96 @@ def _do_import(import_job, replace=False, use_acoustid=False): track_file.mimetype = music_utils.get_type_from_ext(ext) track_file.set_audio_data() track_file.save() + # if no cover is set on track album, we try to update it as well: + if not track.album.cover: + logger.info( + '[Import Job %s] retrieving album cover', + import_job.pk) + update_album_cover(track.album, track_file) import_job.status = 'finished' import_job.track_file = track_file if import_job.audio_file: # it's imported on the track, we don't need it anymore import_job.audio_file.delete() import_job.save() - + logger.info( + '[Import Job %s] job finished', + import_job.pk) return track_file +def update_album_cover(album, track_file, replace=False): + if album.cover and not replace: + return + + if track_file: + # maybe the file has a cover embedded? + try: + metadata = track_file.get_metadata() + except FileNotFoundError: + metadata = None + if metadata: + cover = metadata.get_picture('cover_front') + if cover: + # best case scenario, cover is embedded in the track + logger.info( + '[Album %s] Using cover embedded in file', + album.pk) + return album.get_image(data=cover) + if track_file.source and track_file.source.startswith('file://'): + # let's look for a cover in the same directory + path = os.path.dirname(track_file.source.replace('file://', '', 1)) + logger.info( + '[Album %s] scanning covers from %s', + album.pk, + path) + cover = get_cover_from_fs(path) + if cover: + return album.get_image(data=cover) + if not album.mbid: + return + try: + logger.info( + '[Album %s] Fetching cover from musicbrainz release %s', + album.pk, + str(album.mbid)) + return album.get_image() + except ResponseError as exc: + logger.warning( + '[Album %s] cannot fetch cover from musicbrainz: %s', + album.pk, + str(exc)) + + +IMAGE_TYPES = [ + ('jpg', 'image/jpeg'), + ('png', 'image/png'), +] + +def get_cover_from_fs(dir_path): + if os.path.exists(dir_path): + for e, m in IMAGE_TYPES: + cover_path = os.path.join(dir_path, 'cover.{}'.format(e)) + if not os.path.exists(cover_path): + logger.debug('Cover %s does not exists', cover_path) + continue + with open(cover_path, 'rb') as c: + logger.info('Found cover at %s', cover_path) + return { + 'mimetype': m, + 'content': c.read(), + } + + + @celery.app.task(name='ImportJob.run', bind=True) @celery.require_instance( models.ImportJob.objects.filter( status__in=['pending', 'errored']), 'import_job') def import_job_run(self, import_job, replace=False, use_acoustid=False): - def mark_errored(): + def mark_errored(exc): + logger.error('[Import Job %s] Error during import: %s', str(exc)) import_job.status = 'errored' import_job.save(update_fields=['status']) @@ -167,9 +271,9 @@ def import_job_run(self, import_job, replace=False, use_acoustid=False): try: self.retry(exc=exc, countdown=30, max_retries=3) except: - mark_errored() + mark_errored(exc) raise - mark_errored() + mark_errored(exc) raise diff --git a/api/tests/music/cover.jpg b/api/tests/music/cover.jpg new file mode 100644 index 0000000000000000000000000000000000000000..71911bf48766c7181518c1070911019fbb00b1fc Binary files /dev/null and b/api/tests/music/cover.jpg differ diff --git a/api/tests/music/cover.png b/api/tests/music/cover.png new file mode 100644 index 0000000000000000000000000000000000000000..32f56c8ac2aa42bb85f38529fcf59d509c724957 Binary files /dev/null and b/api/tests/music/cover.png differ diff --git a/api/tests/music/sample.flac b/api/tests/music/sample.flac index 6eff1c06e43f6f536c2d810d0d059cc6af319f5e..fe3ec6e4a270da0ff0a3f1ca677da4990ddb0ea5 100644 Binary files a/api/tests/music/sample.flac and b/api/tests/music/sample.flac differ diff --git a/api/tests/music/test_metadata.py b/api/tests/music/test_metadata.py index 9f9c9398418aca521e0e27581d2b26b756fe2bab..326f18324bd4ba027d225f28369e42d40e57ee87 100644 --- a/api/tests/music/test_metadata.py +++ b/api/tests/music/test_metadata.py @@ -58,6 +58,20 @@ def test_can_get_metadata_from_id3_mp3_file(field, value): assert data.get(field) == value +@pytest.mark.parametrize('name', ['test.mp3', 'sample.flac']) +def test_can_get_pictures(name): + path = os.path.join(DATA_DIR, name) + data = metadata.Metadata(path) + + pictures = data.get('pictures') + assert len(pictures) == 1 + cover_data = data.get_picture('cover_front') + assert cover_data['mimetype'].startswith('image/') + assert len(cover_data['content']) > 0 + assert type(cover_data['content']) == bytes + assert type(cover_data['description']) == str + + @pytest.mark.parametrize('field,value', [ ('title', '999,999'), ('artist', 'Nine Inch Nails'), diff --git a/api/tests/music/test_models.py b/api/tests/music/test_models.py index e926d07fa2be7c367b50e53acbc72666eca8b06b..feb68ea33ad53f146b8e05d7a47e288af2285d21 100644 --- a/api/tests/music/test_models.py +++ b/api/tests/music/test_models.py @@ -110,3 +110,11 @@ def test_track_get_file_size_in_place(factories): in_place=True, source='file://{}'.format(path)) assert tf.get_file_size() == 297745 + + +def test_album_get_image_content(factories): + album = factories['music.Album']() + album.get_image(data={'content': b'test', 'mimetype':'image/jpeg'}) + album.refresh_from_db() + + assert album.cover.read() == b'test' diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py index dfe649be0f91b460db912078d25ae825063af523..77245e204e364627cd9667b6fa3b5e1a09faf4bf 100644 --- a/api/tests/music/test_tasks.py +++ b/api/tests/music/test_tasks.py @@ -56,6 +56,7 @@ def test_import_batch_run(factories, mocker): mocked_job_run.assert_called_once_with(import_job_id=job.pk) +@pytest.mark.skip('Acoustid is disabled') def test_import_job_can_run_with_file_and_acoustid( artists, albums, tracks, preferences, factories, mocker): preferences['providers_acoustid__api_key'] = 'test' @@ -183,3 +184,73 @@ def test_import_job_can_be_errored(factories, mocker, preferences): assert job.track_file is None assert job.status == 'errored' + + +def test__do_import_calls_update_album_cover_if_no_cover(factories, mocker): + path = os.path.join(DATA_DIR, 'test.ogg') + album = factories['music.Album'](cover='') + track = factories['music.Track'](album=album) + + mocker.patch( + 'funkwhale_api.providers.audiofile.tasks.import_track_data_from_path', + return_value=track) + + mocked_update = mocker.patch( + 'funkwhale_api.music.tasks.update_album_cover') + + job = factories['music.FileImportJob']( + audio_file__path=path, track_file=None) + + tasks.import_job_run(import_job_id=job.pk) + + mocked_update.assert_called_once_with(album, track.files.first()) + + +def test_update_album_cover_mbid(factories, mocker): + album = factories['music.Album'](cover='') + + mocked_get = mocker.patch('funkwhale_api.music.models.Album.get_image') + tasks.update_album_cover(album=album, track_file=None) + + mocked_get.assert_called_once_with() + + +def test_update_album_cover_file_data(factories, mocker): + path = os.path.join(DATA_DIR, 'test.mp3') + album = factories['music.Album'](cover='', mbid=None) + tf = factories['music.TrackFile'](track__album=album) + + mocked_get = mocker.patch('funkwhale_api.music.models.Album.get_image') + mocker.patch( + 'funkwhale_api.music.metadata.Metadata.get_picture', + return_value={'hello': 'world'}) + tasks.update_album_cover(album=album, track_file=tf) + md = data = tf.get_metadata() + mocked_get.assert_called_once_with( + data={'hello': 'world'}) + + +@pytest.mark.parametrize('ext,mimetype', [ + ('jpg', 'image/jpeg'), + ('png', 'image/png'), +]) +def test_update_album_cover_file_cover_separate_file( + ext, mimetype, factories, mocker): + mocker.patch('funkwhale_api.music.tasks.IMAGE_TYPES', [(ext, mimetype)]) + path = os.path.join(DATA_DIR, 'test.mp3') + image_path = os.path.join(DATA_DIR, 'cover.{}'.format(ext)) + with open(image_path, 'rb') as f: + image_content = f.read() + album = factories['music.Album'](cover='', mbid=None) + tf = factories['music.TrackFile']( + track__album=album, + source='file://' + image_path) + + mocked_get = mocker.patch('funkwhale_api.music.models.Album.get_image') + mocker.patch( + 'funkwhale_api.music.metadata.Metadata.get_picture', + return_value=None) + tasks.update_album_cover(album=album, track_file=tf) + md = data = tf.get_metadata() + mocked_get.assert_called_once_with( + data={'mimetype': mimetype, 'content': image_content}) diff --git a/changes/changelog.d/219.enhancement b/changes/changelog.d/219.enhancement new file mode 100644 index 0000000000000000000000000000000000000000..fec9a07ac466c3b991400734b63e444f4a191204 --- /dev/null +++ b/changes/changelog.d/219.enhancement @@ -0,0 +1 @@ +Can now use album covers from flac/mp3 metadata and separate file in track directory (#219) diff --git a/changes/changelog.d/231.bugfix b/changes/changelog.d/231.bugfix new file mode 100644 index 0000000000000000000000000000000000000000..0b37b6274d9f6f4503a2a77b504f75b72dc14b10 --- /dev/null +++ b/changes/changelog.d/231.bugfix @@ -0,0 +1,50 @@ +We now fetch album covers regardless of the import methods (#231) + +Smarter album cover importer +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In earlier versions, covers where only imported when launching a YouTube import. +Starting from this release, covers will be imported regardless of the import mode +(file upload, youtube-dl, CLI, in-place...). Funkwhale will look for covers +in the following order: + +1. In the imported file itself (FLAC/MP3 only) +2. In a cover.jpg or cover.png in the file directory +3. By fetching cover art from Musibrainz, assuming the file is tagged correctly + +This will only work for newly imported tracks and albums though. In the future, +we may offer an option to refetch album covers from the interface, but in the +meantime, you can use the following snippet: + +.. code-block:: python + + # Store this in /tmp/update_albums.py + from funkwhale_api.music.models import Album, TrackFile + from funkwhale_api.music.tasks import update_album_cover + + albums_without_covers = Album.objects.filter(cover='') + total = albums_without_covers.count() + print('Found {} albums without cover'.format(total)) + for i, album in enumerate(albums_without_covers.iterator()): + print('[{}/{}] Fetching cover for {}...'.format(i+1, total, album.title)) + f = TrackFile.objects.filter(track__album=album).filter(source__startswith='file://').first() + update_album_cover(album, track_file=f) + +Then launch it:: + + # docker setups + cat /tmp/update_albums.py | docker-compose run --rm api python manage.py shell -i python + + # non-docker setups + source /srv/funkwhale/load_env + source /srv/funkwhale/virtualenv/bin/activate + cat /tmp/update_albums.py | python manage.py shell -i python + + # cleanup + rm /tmp/update_albums.py + +.. note:: + + Depending on your number of albums, the previous snippet may take some time + to execute. You can interrupt it at any time using ctrl-c and relaunch it later, + as it's idempotent. diff --git a/docs/importing-music.rst b/docs/importing-music.rst index 4d256604b522190941a8bed69a0c507d6bf05f23..b190dff368f1fae04b6a14860aaf799cb4673242 100644 --- a/docs/importing-music.rst +++ b/docs/importing-music.rst @@ -76,6 +76,15 @@ configuration options to ensure the webserver can serve them properly: Thus, be especially careful when you manipulate the source files. +Album covers +^^^^^^^^^^^^ + +Whenever possible, Funkwhale will import album cover, with the following precedence: + +1. It will use the cover embedded in the audio files themeselves, if any (Flac/MP3 only) +2. It will use a cover.jpg or a cover.png file from the imported track directory, if any +3. It will fectch cover art from musicbrainz, assuming the file is tagged correctly + Getting demo tracks ^^^^^^^^^^^^^^^^^^^