diff --git a/api/config/settings/common.py b/api/config/settings/common.py
index 675d3e8ce17fe4095890cd3e43278b07fd8fdb33..9ba1ecd09ab20059e31298c69bd33031bf1f7ab1 100644
--- a/api/config/settings/common.py
+++ b/api/config/settings/common.py
@@ -1309,3 +1309,6 @@ IGNORE_FORWARDED_HOST_AND_PROTO = env.bool(
"""
Use :attr:`FUNKWHALE_HOSTNAME` and :attr:`FUNKWHALE_PROTOCOL ` instead of request header.
"""
+
+HASHING_ALGORITHM = "sha256"
+HASHING_CHUNK_SIZE = 1024 * 100
diff --git a/api/funkwhale_api/common/utils.py b/api/funkwhale_api/common/utils.py
index 0a38ef05f0ea79dd11dc7d91fa418cb94adfb1a0..2a91524d531ea839127435aa4399a7ca34ee3a5f 100644
--- a/api/funkwhale_api/common/utils.py
+++ b/api/funkwhale_api/common/utils.py
@@ -1,4 +1,5 @@
import datetime
+import hashlib
from django.core.files.base import ContentFile
from django.http import request
@@ -458,3 +459,19 @@ def monkey_patch_request_build_absolute_uri():
request.HttpRequest.scheme = property(scheme)
request.HttpRequest.get_host = get_host
+
+
+def get_file_hash(file, algo=None, chunk_size=None, full_read=False):
+ algo = algo or settings.HASHING_ALGORITHM
+ chunk_size = chunk_size or settings.HASHING_CHUNK_SIZE
+ handler = getattr(hashlib, algo)
+ hash = handler()
+ file.seek(0)
+ if full_read:
+ for byte_block in iter(lambda: file.read(chunk_size), b""):
+ hash.update(byte_block)
+ else:
+ # sometimes, it's useful to only hash the beginning of the file, e.g
+ # to avoid a lot of I/O when crawling large libraries
+ hash.update(file.read(chunk_size))
+ return "{}:{}".format(algo, hash.hexdigest())
diff --git a/api/funkwhale_api/music/management/commands/fix_uploads.py b/api/funkwhale_api/music/management/commands/fix_uploads.py
index 582a837c47096ed8c81c684072abdf5593e962ac..02f3d22328a593a704b1637992dca3c7fdf7df04 100644
--- a/api/funkwhale_api/music/management/commands/fix_uploads.py
+++ b/api/funkwhale_api/music/management/commands/fix_uploads.py
@@ -2,6 +2,7 @@ from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Q
+from funkwhale_api.common import utils as common_utils
from funkwhale_api.music import models, utils
@@ -17,9 +18,9 @@ class Command(BaseCommand):
help="Do not execute anything",
)
parser.add_argument(
- "--mimetypes",
+ "--mimetype",
action="store_true",
- dest="mimetypes",
+ dest="mimetype",
default=True,
help="Check and fix mimetypes",
)
@@ -37,16 +38,33 @@ class Command(BaseCommand):
default=False,
help="Check and fix file size, can be really slow because it needs to access files",
)
+ parser.add_argument(
+ "--checksum",
+ action="store_true",
+ dest="checksum",
+ default=False,
+ help="Check and fix file size, can be really slow because it needs to access files",
+ )
+ parser.add_argument(
+ "--batch-size",
+ "-s",
+ dest="batch_size",
+ default=1000,
+ type=int,
+ help="Size of each updated batch",
+ )
def handle(self, *args, **options):
if options["dry_run"]:
self.stdout.write("Dry-run on, will not commit anything")
- if options["mimetypes"]:
+ if options["mimetype"]:
self.fix_mimetypes(**options)
if options["data"]:
self.fix_file_data(**options)
if options["size"]:
self.fix_file_size(**options)
+ if options["checksum"]:
+ self.fix_file_checksum(**options)
@transaction.atomic
def fix_mimetypes(self, dry_run, **kwargs):
@@ -54,11 +72,12 @@ class Command(BaseCommand):
matching = models.Upload.objects.filter(
Q(source__startswith="file://") | Q(source__startswith="upload://")
).exclude(mimetype__startswith="audio/")
+ total = matching.count()
self.stdout.write(
- "[mimetypes] {} entries found with bad or no mimetype".format(
- matching.count()
- )
+ "[mimetypes] {} entries found with bad or no mimetype".format(total)
)
+ if not total:
+ return
for extension, mimetype in utils.EXTENSION_TO_MIMETYPE.items():
qs = matching.filter(source__endswith=".{}".format(extension))
self.stdout.write(
@@ -81,24 +100,36 @@ class Command(BaseCommand):
)
if dry_run:
return
- for i, upload in enumerate(matching.only("audio_file")):
- self.stdout.write(
- "[bitrate/length] {}/{} fixing file #{}".format(i + 1, total, upload.pk)
- )
- try:
- audio_file = upload.get_audio_file()
- if audio_file:
+ chunks = common_utils.chunk_queryset(
+ matching.only("id", "audio_file", "source"), kwargs["batch_size"]
+ )
+ handled = 0
+ for chunk in chunks:
+ updated = []
+ for upload in chunk:
+ handled += 1
+ self.stdout.write(
+ "[bitrate/length] {}/{} fixing file #{}".format(
+ handled, total, upload.pk
+ )
+ )
+
+ try:
+ audio_file = upload.get_audio_file()
data = utils.get_audio_file_data(audio_file)
upload.bitrate = data["bitrate"]
upload.duration = data["length"]
- upload.save(update_fields=["duration", "bitrate"])
+ except Exception as e:
+ self.stderr.write(
+ "[bitrate/length] error with file #{}: {}".format(
+ upload.pk, str(e)
+ )
+ )
else:
- self.stderr.write("[bitrate/length] no file found")
- except Exception as e:
- self.stderr.write(
- "[bitrate/length] error with file #{}: {}".format(upload.pk, str(e))
- )
+ updated.append(upload)
+
+ models.Upload.objects.bulk_update(updated, ["bitrate", "duration"])
def fix_file_size(self, dry_run, **kwargs):
self.stdout.write("Fixing missing size...")
@@ -107,15 +138,64 @@ class Command(BaseCommand):
self.stdout.write("[size] {} entries found with missing values".format(total))
if dry_run:
return
- for i, upload in enumerate(matching.only("size")):
- self.stdout.write(
- "[size] {}/{} fixing file #{}".format(i + 1, total, upload.pk)
- )
- try:
- upload.size = upload.get_file_size()
- upload.save(update_fields=["size"])
- except Exception as e:
- self.stderr.write(
- "[size] error with file #{}: {}".format(upload.pk, str(e))
+ chunks = common_utils.chunk_queryset(
+ matching.only("id", "audio_file", "source"), kwargs["batch_size"]
+ )
+ handled = 0
+ for chunk in chunks:
+ updated = []
+ for upload in chunk:
+ handled += 1
+
+ self.stdout.write(
+ "[size] {}/{} fixing file #{}".format(handled, total, upload.pk)
+ )
+
+ try:
+ upload.size = upload.get_file_size()
+ except Exception as e:
+ self.stderr.write(
+ "[size] error with file #{}: {}".format(upload.pk, str(e))
+ )
+ else:
+ updated.append(upload)
+
+ models.Upload.objects.bulk_update(updated, ["size"])
+
+ def fix_file_checksum(self, dry_run, **kwargs):
+ self.stdout.write("Fixing missing checksums...")
+ matching = models.Upload.objects.filter(
+ Q(checksum=None)
+ & (Q(audio_file__isnull=False) | Q(source__startswith="file://"))
+ )
+ total = matching.count()
+ self.stdout.write(
+ "[checksum] {} entries found with missing values".format(total)
+ )
+ if dry_run:
+ return
+ chunks = common_utils.chunk_queryset(
+ matching.only("id", "audio_file", "source"), kwargs["batch_size"]
+ )
+ handled = 0
+ for chunk in chunks:
+ updated = []
+ for upload in chunk:
+ handled += 1
+ self.stdout.write(
+ "[checksum] {}/{} fixing file #{}".format(handled, total, upload.pk)
)
+
+ try:
+ upload.checksum = common_utils.get_file_hash(
+ upload.get_audio_file()
+ )
+ except Exception as e:
+ self.stderr.write(
+ "[checksum] error with file #{}: {}".format(upload.pk, str(e))
+ )
+ else:
+ updated.append(upload)
+
+ models.Upload.objects.bulk_update(updated, ["checksum"])
diff --git a/api/funkwhale_api/music/management/commands/import_files.py b/api/funkwhale_api/music/management/commands/import_files.py
index cbffca63df02af4cbe8ee29cde10c07aecf474d9..fab980510d77b045df80df803d0fa40642a520ac 100644
--- a/api/funkwhale_api/music/management/commands/import_files.py
+++ b/api/funkwhale_api/music/management/commands/import_files.py
@@ -1,17 +1,29 @@
+import collections
+import datetime
import itertools
import os
-import urllib.parse
+import queue
+import threading
import time
+import urllib.parse
+
+import watchdog.events
+import watchdog.observers
from django.conf import settings
from django.core.files import File
+from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError
+from django.db.models import Q
from django.utils import timezone
+from rest_framework import serializers
+
+from funkwhale_api.common import utils as common_utils
from funkwhale_api.music import models, tasks, utils
-def crawl_dir(dir, extensions, recursive=True):
+def crawl_dir(dir, extensions, recursive=True, ignored=[]):
if os.path.isfile(dir):
yield dir
return
@@ -20,9 +32,12 @@ def crawl_dir(dir, extensions, recursive=True):
if entry.is_file():
for e in extensions:
if entry.name.lower().endswith(".{}".format(e.lower())):
- yield entry.path
+ if entry.path not in ignored:
+ yield entry.path
elif recursive and entry.is_dir():
- yield from crawl_dir(entry, extensions, recursive=recursive)
+ yield from crawl_dir(
+ entry, extensions, recursive=recursive, ignored=ignored
+ )
def batch(iterable, n=1):
@@ -116,6 +131,17 @@ class Command(BaseCommand):
"of overhead on your server and on servers you are federating with."
),
)
+ parser.add_argument(
+ "--watch",
+ action="store_true",
+ dest="watch",
+ default=False,
+ help=(
+ "Start the command in watch mode. Instead of running a full import, "
+ "and exit, watch the given path and import new files, remove deleted "
+ "files, and update metadata corresponding to updated files."
+ ),
+ )
parser.add_argument("-e", "--extension", nargs="+")
parser.add_argument(
@@ -128,6 +154,15 @@ class Command(BaseCommand):
"This causes some overhead, so it's disabled by default."
),
)
+ parser.add_argument(
+ "--prune",
+ action="store_true",
+ dest="prune",
+ default=False,
+ help=(
+ "Once the import is completed, prune tracks, ablums and artists that aren't linked to any upload."
+ ),
+ )
parser.add_argument(
"--reference",
@@ -157,6 +192,8 @@ class Command(BaseCommand):
)
def handle(self, *args, **options):
+ # handle relative directories
+ options["path"] = [os.path.abspath(path) for path in options["path"]]
self.is_confirmed = False
try:
library = models.Library.objects.select_related("actor__user").get(
@@ -182,22 +219,12 @@ class Command(BaseCommand):
)
if p and not import_path.startswith(p):
raise CommandError(
- "Importing in-place only works if importing"
+ "Importing in-place only works if importing "
"from {} (MUSIC_DIRECTORY_PATH), as this directory"
"needs to be accessible by the webserver."
"Culprit: {}".format(p, import_path)
)
- extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
- crawler = itertools.chain(
- *[
- crawl_dir(p, extensions=extensions, recursive=options["recursive"])
- for p in options["path"]
- ]
- )
- errors = []
- total = 0
- start_time = time.time()
reference = options["reference"] or "cli-{}".format(timezone.now().isoformat())
import_url = "{}://{}/library/{}/upload?{}"
@@ -212,8 +239,62 @@ class Command(BaseCommand):
reference, import_url
)
)
+ extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
+ if options["watch"]:
+ if len(options["path"]) > 1:
+ raise CommandError("Watch only work with a single directory")
+
+ return self.setup_watcher(
+ extensions=extensions,
+ path=options["path"][0],
+ reference=reference,
+ library=library,
+ in_place=options["in_place"],
+ prune=options["prune"],
+ recursive=options["recursive"],
+ replace=options["replace"],
+ dispatch_outbox=options["outbox"],
+ broadcast=options["broadcast"],
+ )
+
+ update = True
+ checked_paths = set()
+ if options["in_place"] and update:
+ self.stdout.write("Checking existing files for updates…")
+ message = (
+ "Are you sure you want to do this?\n\n"
+ "Type 'yes' to continue, or 'no' to skip checking for updates in "
+ "already imported files: "
+ )
+ if options["interactive"] and input("".join(message)) != "yes":
+ pass
+ else:
+ checked_paths = check_updates(
+ stdout=self.stdout,
+ paths=options["path"],
+ extensions=extensions,
+ library=library,
+ batch_size=options["batch_size"],
+ )
+ self.stdout.write("Existing files checked, moving on to next step!")
+
+ crawler = itertools.chain(
+ *[
+ crawl_dir(
+ p,
+ extensions=extensions,
+ recursive=options["recursive"],
+ ignored=checked_paths,
+ )
+ for p in options["path"]
+ ]
+ )
+ errors = []
+ total = 0
+ start_time = time.time()
batch_start = None
batch_duration = None
+ self.stdout.write("Starting import of new files…")
for i, entries in enumerate(batch(crawler, options["batch_size"])):
total += len(entries)
batch_start = time.time()
@@ -225,7 +306,7 @@ class Command(BaseCommand):
if entries:
self.stdout.write(
"Handling batch {} ({} items){}".format(
- i + 1, options["batch_size"], time_stats,
+ i + 1, len(entries), time_stats,
)
)
batch_errors = self.handle_batch(
@@ -240,9 +321,9 @@ class Command(BaseCommand):
batch_duration = time.time() - batch_start
- message = "Successfully imported {} tracks in {}s"
+ message = "Successfully imported {} new tracks in {}s"
if options["async_"]:
- message = "Successfully launched import for {} tracks in {}s"
+ message = "Successfully launched import for {} new tracks in {}s"
self.stdout.write(
message.format(total - len(errors), int(time.time() - start_time))
@@ -259,6 +340,12 @@ class Command(BaseCommand):
)
)
+ if options["prune"]:
+ self.stdout.write(
+ "Pruning dangling tracks, albums and artists from library…"
+ )
+ prune()
+
def handle_batch(self, library, paths, batch, reference, options):
matching = []
for m in paths:
@@ -362,15 +449,15 @@ class Command(BaseCommand):
message.format(batch=batch, path=path, i=i + 1, total=len(paths))
)
try:
- self.create_upload(
- path,
- reference,
- library,
- async_,
- options["replace"],
- options["in_place"],
- options["outbox"],
- options["broadcast"],
+ create_upload(
+ path=path,
+ reference=reference,
+ library=library,
+ async_=async_,
+ replace=options["replace"],
+ in_place=options["in_place"],
+ dispatch_outbox=options["outbox"],
+ broadcast=options["broadcast"],
)
except Exception as e:
if options["exit_on_failure"]:
@@ -382,34 +469,311 @@ class Command(BaseCommand):
errors.append((path, "{} {}".format(e.__class__.__name__, e)))
return errors
- def create_upload(
- self,
- path,
- reference,
- library,
- async_,
- replace,
- in_place,
- dispatch_outbox,
- broadcast,
- ):
- import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
- upload = models.Upload(library=library, import_reference=reference)
- upload.source = "file://" + path
- upload.import_metadata = {
- "funkwhale": {
- "config": {
- "replace": replace,
- "dispatch_outbox": dispatch_outbox,
- "broadcast": broadcast,
- }
+ def setup_watcher(self, path, extensions, recursive, **kwargs):
+ watchdog_queue = queue.Queue()
+ # Set up a worker thread to process database load
+ worker = threading.Thread(
+ target=process_load_queue(self.stdout, **kwargs), args=(watchdog_queue,),
+ )
+ worker.setDaemon(True)
+ worker.start()
+
+ # setup watchdog to monitor directory for trigger files
+ patterns = ["*.{}".format(e) for e in extensions]
+ event_handler = Watcher(
+ stdout=self.stdout, queue=watchdog_queue, patterns=patterns,
+ )
+ observer = watchdog.observers.Observer()
+ observer.schedule(event_handler, path, recursive=recursive)
+ observer.start()
+
+ try:
+ while True:
+ self.stdout.write(
+ "Watching for changes at {}…".format(path), ending="\r"
+ )
+ time.sleep(10)
+ if kwargs["prune"] and GLOBAL["need_pruning"]:
+ self.stdout.write("Some files were deleted, pruning library…")
+ prune()
+ GLOBAL["need_pruning"] = False
+ except KeyboardInterrupt:
+ self.stdout.write("Exiting…")
+ observer.stop()
+
+ observer.join()
+
+
+GLOBAL = {"need_pruning": False}
+
+
+def prune():
+ call_command(
+ "prune_library",
+ dry_run=False,
+ prune_artists=True,
+ prune_albums=True,
+ prune_tracks=True,
+ )
+
+
+def create_upload(
+ path, reference, library, async_, replace, in_place, dispatch_outbox, broadcast,
+):
+ import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
+ upload = models.Upload(library=library, import_reference=reference)
+ upload.source = "file://" + path
+ upload.import_metadata = {
+ "funkwhale": {
+ "config": {
+ "replace": replace,
+ "dispatch_outbox": dispatch_outbox,
+ "broadcast": broadcast,
}
}
- if not in_place:
- name = os.path.basename(path)
- with open(path, "rb") as f:
- upload.audio_file.save(name, File(f), save=False)
+ }
+ if not in_place:
+ name = os.path.basename(path)
+ with open(path, "rb") as f:
+ upload.audio_file.save(name, File(f), save=False)
+
+ upload.save()
+
+ import_handler(upload_id=upload.pk)
+
+
+def process_load_queue(stdout, **kwargs):
+ def inner(q):
+ # we batch events, to avoid calling same methods multiple times if a file is modified
+ # a lot in a really short time
+ flush_delay = 2
+ batched_events = collections.OrderedDict()
+ while True:
+ while True:
+ if not q.empty():
+ event = q.get()
+ batched_events[event["path"]] = event
+ else:
+ break
+ for path, event in batched_events.copy().items():
+ if time.time() - event["time"] <= flush_delay:
+ continue
+ now = datetime.datetime.utcnow()
+ stdout.write(
+ "{} -- Processing {}:{}...\n".format(
+ now.strftime("%Y/%m/%d %H:%M:%S"), event["type"], event["path"]
+ )
+ )
+ del batched_events[path]
+ handle_event(event, stdout=stdout, **kwargs)
+ time.sleep(1)
+
+ return inner
+
+
+class Watcher(watchdog.events.PatternMatchingEventHandler):
+ def __init__(self, stdout, queue, patterns):
+ self.stdout = stdout
+ self.queue = queue
+ super().__init__(patterns=patterns)
+
+ def enqueue(self, event):
+ e = {
+ "is_directory": event.is_directory,
+ "type": event.event_type,
+ "path": event.src_path,
+ "src_path": event.src_path,
+ "dest_path": getattr(event, "dest_path", None),
+ "time": time.time(),
+ }
+ self.queue.put(e)
+
+ def on_moved(self, event):
+ self.enqueue(event)
+
+ def on_created(self, event):
+ self.enqueue(event)
+
+ def on_deleted(self, event):
+ self.enqueue(event)
+
+ def on_modified(self, event):
+ self.enqueue(event)
+
- upload.save()
+def handle_event(event, stdout, **kwargs):
+ handlers = {
+ "modified": handle_modified,
+ "created": handle_created,
+ "moved": handle_moved,
+ "deleted": handle_deleted,
+ }
+ handlers[event["type"]](event=event, stdout=stdout, **kwargs)
- import_handler(upload_id=upload.pk)
+
+def handle_modified(event, stdout, library, in_place, **kwargs):
+ existing_candidates = library.uploads.filter(import_status="finished")
+ with open(event["path"], "rb") as f:
+ checksum = common_utils.get_file_hash(f)
+
+ existing = existing_candidates.filter(checksum=checksum).first()
+ if existing:
+ # found an existing file with same checksum, nothing to do
+ stdout.write(" File already imported and metadata is up-to-date")
+ return
+
+ to_update = None
+ if in_place:
+ source = "file://{}".format(event["path"])
+ to_update = (
+ existing_candidates.in_place()
+ .filter(source=source)
+ .select_related(
+ "track__attributed_to", "track__artist", "track__album__artist",
+ )
+ .first()
+ )
+ if to_update:
+ if (
+ to_update.track.attributed_to
+ and to_update.track.attributed_to != library.actor
+ ):
+ stdout.write(
+ " Cannot update track metadata, track belongs to someone else".format(
+ to_update.pk
+ )
+ )
+ return
+ else:
+ stdout.write(
+ " Updating existing file #{} with new metadata…".format(
+ to_update.pk
+ )
+ )
+ audio_metadata = to_update.get_metadata()
+ try:
+ tasks.update_track_metadata(audio_metadata, to_update.track)
+ except serializers.ValidationError as e:
+ stdout.write(" Invalid metadata: {}".format(e))
+ else:
+ to_update.checksum = checksum
+ to_update.save(update_fields=["checksum"])
+ return
+
+ stdout.write(" Launching import for new file")
+ create_upload(
+ path=event["path"],
+ reference=kwargs["reference"],
+ library=library,
+ async_=False,
+ replace=kwargs["replace"],
+ in_place=in_place,
+ dispatch_outbox=kwargs["dispatch_outbox"],
+ broadcast=kwargs["broadcast"],
+ )
+
+
+def handle_created(event, stdout, **kwargs):
+ """
+ Created is essentially an alias for modified, because for instance when copying a file in the watched directory,
+ a created event will be fired on the initial touch, then many modified event (as the file is written).
+ """
+ return handle_modified(event, stdout, **kwargs)
+
+
+def handle_moved(event, stdout, library, in_place, **kwargs):
+ if not in_place:
+ return
+
+ old_source = "file://{}".format(event["src_path"])
+ new_source = "file://{}".format(event["dest_path"])
+ existing_candidates = library.uploads.filter(import_status="finished")
+ existing_candidates = existing_candidates.in_place().filter(source=old_source)
+ existing = existing_candidates.first()
+ if existing:
+ stdout.write(" Updating path of existing file #{}".format(existing.pk))
+ existing.source = new_source
+ existing.save(update_fields=["source"])
+
+
+def handle_deleted(event, stdout, library, in_place, **kwargs):
+ if not in_place:
+ return
+ source = "file://{}".format(event["path"])
+ existing_candidates = library.uploads.filter(import_status="finished")
+ existing_candidates = existing_candidates.in_place().filter(source=source)
+ if existing_candidates.count():
+ stdout.write(" Removing file from DB")
+ existing_candidates.delete()
+ GLOBAL["need_pruning"] = True
+
+
+def check_updates(stdout, library, extensions, paths, batch_size):
+ existing = (
+ library.uploads.in_place()
+ .filter(import_status="finished")
+ .exclude(checksum=None)
+ .select_related("library", "track")
+ )
+ queries = []
+ checked_paths = set()
+ for path in paths:
+ for ext in extensions:
+ queries.append(
+ Q(source__startswith="file://{}".format(path))
+ & Q(source__endswith=".{}".format(ext))
+ )
+ query, remainder = queries[0], queries[1:]
+ for q in remainder:
+ query = q | query
+ existing = existing.filter(query)
+ total = existing.count()
+ stdout.write("Found {} files to check in database!".format(total))
+ uploads = existing.order_by("source")
+ for i, rows in enumerate(batch(uploads.iterator(), batch_size)):
+ stdout.write("Handling batch {} ({} items)".format(i + 1, len(rows),))
+
+ for upload in rows:
+
+ check_upload(stdout, upload)
+ checked_paths.add(upload.source.replace("file://", "", 1))
+
+ return checked_paths
+
+
+def check_upload(stdout, upload):
+ try:
+ audio_file = upload.get_audio_file()
+ except FileNotFoundError:
+ stdout.write(
+ " Removing file #{} missing from disk at {}".format(
+ upload.pk, upload.source
+ )
+ )
+ return upload.delete()
+
+ checksum = common_utils.get_file_hash(audio_file)
+ if upload.checksum != checksum:
+ stdout.write(
+ " File #{} at {} was modified, updating metadata…".format(
+ upload.pk, upload.source
+ )
+ )
+ if upload.library.actor_id != upload.track.attributed_to_id:
+ stdout.write(
+ " Cannot update track metadata, track belongs to someone else".format(
+ upload.pk
+ )
+ )
+ else:
+ track = models.Track.objects.select_related("artist", "album__artist").get(
+ pk=upload.track_id
+ )
+ try:
+ tasks.update_track_metadata(upload.get_metadata(), track)
+ except serializers.ValidationError as e:
+ stdout.write(" Invalid metadata: {}".format(e))
+ return
+ else:
+ upload.checksum = checksum
+ upload.save(update_fields=["checksum"])
diff --git a/api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py b/api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a05266430e9a65149bf538dfc5ddb4817733c32
--- /dev/null
+++ b/api/funkwhale_api/music/migrations/0052_auto_20200505_0810.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.0.4 on 2020-05-05 08:10
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('music', '0051_auto_20200319_1249'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='upload',
+ name='checksum',
+ field=models.CharField(blank=True, db_index=True, max_length=100, null=True),
+ ),
+ migrations.AlterField(
+ model_name='uploadversion',
+ name='mimetype',
+ field=models.CharField(choices=[('audio/mp3', 'mp3'), ('audio/mpeg3', 'mp3'), ('audio/x-mp3', 'mp3'), ('audio/mpeg', 'mp3'), ('video/ogg', 'ogg'), ('audio/ogg', 'ogg'), ('audio/opus', 'opus'), ('audio/x-m4a', 'aac'), ('audio/x-m4a', 'm4a'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50),
+ ),
+ ]
diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index ecb616b4141529e91aef85fbdffe772ee0233269..43371d5d8c461c6f367fe527096b2e454693a9b1 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -655,6 +655,14 @@ class Track(APIModelMixin):
class UploadQuerySet(common_models.NullsLastQuerySet):
+ def in_place(self, include=True):
+ query = models.Q(source__startswith="file://") & (
+ models.Q(audio_file="") | models.Q(audio_file=None)
+ )
+ if not include:
+ query = ~query
+ return self.filter(query)
+
def playable_by(self, actor, include=True):
libraries = Library.objects.viewable_by(actor)
@@ -754,6 +762,9 @@ class Upload(models.Model):
)
downloads_count = models.PositiveIntegerField(default=0)
+ # stores checksums such as `sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855`
+ checksum = models.CharField(max_length=100, db_index=True, null=True, blank=True)
+
objects = UploadQuerySet.as_manager()
@property
@@ -833,7 +844,7 @@ class Upload(models.Model):
def get_audio_file(self):
if self.audio_file:
return self.audio_file.open()
- if self.source.startswith("file://"):
+ if self.source and self.source.startswith("file://"):
return open(self.source.replace("file://", "", 1), "rb")
def get_audio_data(self):
@@ -866,6 +877,15 @@ class Upload(models.Model):
self.mimetype = mimetypes.guess_type(self.source)[0]
if not self.size and self.audio_file:
self.size = self.audio_file.size
+ if not self.checksum:
+ try:
+ audio_file = self.get_audio_file()
+ except FileNotFoundError:
+ pass
+ else:
+ if audio_file:
+ self.checksum = common_utils.get_file_hash(audio_file)
+
if not self.pk and not self.fid and self.library.actor.get_user():
self.fid = self.get_federation_id()
return super().save(**kwargs)
diff --git a/api/funkwhale_api/music/tasks.py b/api/funkwhale_api/music/tasks.py
index 89ee1a88f98cc959a5a75263562cdc37332efb49..722a9eefb29bbb94a9d01df23c654dc83d20bd79 100644
--- a/api/funkwhale_api/music/tasks.py
+++ b/api/funkwhale_api/music/tasks.py
@@ -851,3 +851,71 @@ def update_library_entity(obj, data):
obj.save(update_fields=list(data.keys()))
return obj
+
+
+UPDATE_CONFIG = {
+ "track": {
+ "position": {},
+ "title": {},
+ "mbid": {},
+ "disc_number": {},
+ "copyright": {},
+ "license": {
+ "getter": lambda data, field: licenses.match(
+ data.get("license"), data.get("copyright")
+ )
+ },
+ },
+ "album": {"title": {}, "mbid": {}, "release_date": {}},
+ "artist": {"name": {}, "mbid": {}},
+ "album_artist": {"name": {}, "mbid": {}},
+}
+
+
+@transaction.atomic
+def update_track_metadata(audio_metadata, track):
+ # XXX: implement this to support updating metadata when an imported files
+ # is updated by an outside tool (e.g beets).
+ serializer = metadata.TrackMetadataSerializer(data=audio_metadata)
+ serializer.is_valid(raise_exception=True)
+ new_data = serializer.validated_data
+
+ to_update = [
+ ("track", track, lambda data: data),
+ ("album", track.album, lambda data: data["album"]),
+ ("artist", track.artist, lambda data: data["artists"][0]),
+ (
+ "album_artist",
+ track.album.artist if track.album else None,
+ lambda data: data["album"]["artists"][0],
+ ),
+ ]
+ for id, obj, data_getter in to_update:
+ if not obj:
+ continue
+ obj_updated_fields = []
+ try:
+ obj_data = data_getter(new_data)
+ except IndexError:
+ continue
+ for field, config in UPDATE_CONFIG[id].items():
+ getter = config.get(
+ "getter", lambda data, field: data[config.get("field", field)]
+ )
+ try:
+ new_value = getter(obj_data, field)
+ except KeyError:
+ continue
+ old_value = getattr(obj, field)
+ if new_value == old_value:
+ continue
+ obj_updated_fields.append(field)
+ setattr(obj, field, new_value)
+
+ if obj_updated_fields:
+ obj.save(update_fields=obj_updated_fields)
+
+ if track.album and "album" in new_data and new_data["album"].get("cover_data"):
+ common_utils.attach_file(
+ track.album, "attachment_cover", new_data["album"].get("cover_data")
+ )
diff --git a/api/requirements/base.txt b/api/requirements/base.txt
index ecb3d9b5ee69af063f5b0ac708e9b638b6285d8d..6e31f857e6059f5f61139fcfa80f652f8f602eac 100644
--- a/api/requirements/base.txt
+++ b/api/requirements/base.txt
@@ -83,3 +83,4 @@ service_identity==18.1.0
markdown>=3.2,<4
bleach>=3,<4
feedparser==6.0.0b3
+watchdog==0.10.2
diff --git a/api/tests/common/test_utils.py b/api/tests/common/test_utils.py
index ffe9eae3b002771cb5516ae037005571f5d7b78c..303c8cbcad21a3efcce887051b3ad14212769dfb 100644
--- a/api/tests/common/test_utils.py
+++ b/api/tests/common/test_utils.py
@@ -258,3 +258,12 @@ def test_monkey_patch_request_build_absolute_uri(
request = fake_request.get("/", **meta)
assert request.build_absolute_uri(path) == expected
+
+
+def test_get_file_hash(tmpfile, settings):
+ settings.HASHING_ALGORITHM = "sha256"
+ content = b"hello"
+ tmpfile.write(content)
+ # echo -n "hello" | sha256sum
+ expected = "sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
+ assert utils.get_file_hash(tmpfile) == expected
diff --git a/api/tests/music/test_commands.py b/api/tests/music/test_commands.py
index a08f1b10b8f9f90d0f3d8474ce7380bcf45594b7..329dcffb5616dc491e0d829d077fb792d1b5b3e1 100644
--- a/api/tests/music/test_commands.py
+++ b/api/tests/music/test_commands.py
@@ -1,6 +1,7 @@
import os
import pytest
+from funkwhale_api.common import utils as common_utils
from funkwhale_api.music.management.commands import check_inplace_files
from funkwhale_api.music.management.commands import fix_uploads
from funkwhale_api.music.management.commands import prune_library
@@ -18,7 +19,7 @@ def test_fix_uploads_bitrate_length(factories, mocker):
return_value={"bitrate": 42, "length": 43},
)
- c.fix_file_data(dry_run=False)
+ c.fix_file_data(dry_run=False, batch_size=100)
upload1.refresh_from_db()
upload2.refresh_from_db()
@@ -41,7 +42,7 @@ def test_fix_uploads_size(factories, mocker):
mocker.patch("funkwhale_api.music.models.Upload.get_file_size", return_value=2)
- c.fix_file_size(dry_run=False)
+ c.fix_file_size(dry_run=False, batch_size=100)
upload1.refresh_from_db()
upload2.refresh_from_db()
@@ -69,7 +70,7 @@ def test_fix_uploads_mimetype(factories, mocker):
mimetype="audio/something",
)
c = fix_uploads.Command()
- c.fix_mimetypes(dry_run=False)
+ c.fix_mimetypes(dry_run=False, batch_size=100)
upload1.refresh_from_db()
upload2.refresh_from_db()
@@ -78,6 +79,25 @@ def test_fix_uploads_mimetype(factories, mocker):
assert upload2.mimetype == "audio/something"
+def test_fix_uploads_checksum(factories, mocker):
+ upload1 = factories["music.Upload"]()
+ upload2 = factories["music.Upload"]()
+ upload1.__class__.objects.filter(pk=upload1.pk).update(checksum="test")
+ upload2.__class__.objects.filter(pk=upload2.pk).update(checksum=None)
+ c = fix_uploads.Command()
+
+ c.fix_file_checksum(dry_run=False, batch_size=100)
+
+ upload1.refresh_from_db()
+ upload2.refresh_from_db()
+
+ # not updated
+ assert upload1.checksum == "test"
+
+ # updated
+ assert upload2.checksum == common_utils.get_file_hash(upload2.audio_file)
+
+
def test_prune_library_dry_run(factories):
prunable = factories["music.Track"]()
not_prunable = factories["music.Track"]()
diff --git a/api/tests/music/test_models.py b/api/tests/music/test_models.py
index 74d5e9604b0fc8f29badf517c4cd1dea2cb90826..d8e3bb444ffd610993f313ceb7bfe78f605adc6f 100644
--- a/api/tests/music/test_models.py
+++ b/api/tests/music/test_models.py
@@ -5,6 +5,7 @@ import pytest
from django.utils import timezone
from django.urls import reverse
+from funkwhale_api.common import utils as common_utils
from funkwhale_api.music import importers, models, tasks
from funkwhale_api.federation import utils as federation_utils
@@ -164,6 +165,17 @@ def test_audio_track_mime_type(extention, mimetype, factories):
assert upload.mimetype == mimetype
+@pytest.mark.parametrize("name", ["test.ogg", "test.mp3"])
+def test_audio_track_checksum(name, factories):
+
+ path = os.path.join(DATA_DIR, name)
+ upload = factories["music.Upload"](audio_file__from_path=path, mimetype=None)
+
+ with open(path, "rb") as f:
+ expected = common_utils.get_file_hash(f)
+ assert upload.checksum == expected
+
+
def test_upload_file_name(factories):
name = "test.mp3"
path = os.path.join(DATA_DIR, name)
diff --git a/api/tests/music/test_tasks.py b/api/tests/music/test_tasks.py
index 4fbe6b5132ffab2b58fb932b632310a77841b186..96ba451aae6b2eca4da5dd5c24d726effbdc21a6 100644
--- a/api/tests/music/test_tasks.py
+++ b/api/tests/music/test_tasks.py
@@ -1329,3 +1329,40 @@ def test_can_import_track_with_same_position_in_same_discs_skipped(factories, mo
new_upload.refresh_from_db()
assert new_upload.import_status == "skipped"
+
+
+def test_update_track_metadata(factories):
+ track = factories["music.Track"]()
+ data = {
+ "title": "Peer Gynt Suite no. 1, op. 46: I. Morning",
+ "artist": "Edvard Grieg",
+ "album_artist": "Edvard Grieg; Musopen Symphony Orchestra",
+ "album": "Peer Gynt Suite no. 1, op. 46",
+ "date": "2012-08-15",
+ "position": "4",
+ "disc_number": "2",
+ "musicbrainz_albumid": "a766da8b-8336-47aa-a3ee-371cc41ccc75",
+ "mbid": "bd21ac48-46d8-4e78-925f-d9cc2a294656",
+ "musicbrainz_artistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823",
+ "musicbrainz_albumartistid": "013c8e5b-d72a-4cd3-8dee-6c64d6125823;5b4d7d2d-36df-4b38-95e3-a964234f520f",
+ "license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
+ "copyright": "Someone",
+ "comment": "hello there",
+ }
+ tasks.update_track_metadata(metadata.FakeMetadata(data), track)
+
+ track.refresh_from_db()
+
+ assert track.title == data["title"]
+ assert track.position == int(data["position"])
+ assert track.disc_number == int(data["disc_number"])
+ assert track.license.code == "cc-by-sa-4.0"
+ assert track.copyright == data["copyright"]
+ assert str(track.mbid) == data["mbid"]
+ assert track.album.title == data["album"]
+ assert track.album.release_date == datetime.date(2012, 8, 15)
+ assert str(track.album.mbid) == data["musicbrainz_albumid"]
+ assert track.artist.name == data["artist"]
+ assert str(track.artist.mbid) == data["musicbrainz_artistid"]
+ assert track.album.artist.name == "Edvard Grieg"
+ assert str(track.album.artist.mbid) == "013c8e5b-d72a-4cd3-8dee-6c64d6125823"
diff --git a/api/tests/test_import_audio_file.py b/api/tests/test_import_audio_file.py
index c7d5a976c753cd7bee6d40e7a3d36063af3e9116..04c06a8f462fbdd7db3dfe4e31139f7edc5585c6 100644
--- a/api/tests/test_import_audio_file.py
+++ b/api/tests/test_import_audio_file.py
@@ -4,6 +4,8 @@ import pytest
from django.core.management import call_command
from django.core.management.base import CommandError
+from funkwhale_api.common import utils as common_utils
+from funkwhale_api.music.management.commands import import_files
DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "files")
@@ -159,3 +161,194 @@ def test_import_files_in_place(factories, mocker, settings):
def test_storage_rename_utf_8_files(factories):
upload = factories["music.Upload"](audio_file__filename="été.ogg")
assert upload.audio_file.name.endswith("ete.ogg")
+
+
+@pytest.mark.parametrize("name", ["modified", "moved", "created", "deleted"])
+def test_handle_event(name, mocker):
+ handler = mocker.patch.object(import_files, "handle_{}".format(name))
+
+ event = {"type": name}
+ stdout = mocker.Mock()
+ kwargs = {"hello": "world"}
+ import_files.handle_event(event, stdout, **kwargs)
+
+ handler.assert_called_once_with(event=event, stdout=stdout, **kwargs)
+
+
+def test_handle_created(mocker):
+ handle_modified = mocker.patch.object(import_files, "handle_modified")
+
+ event = mocker.Mock()
+ stdout = mocker.Mock()
+ kwargs = {"hello": "world"}
+ import_files.handle_created(event, stdout, **kwargs)
+
+ handle_modified.assert_called_once_with(event, stdout, **kwargs)
+
+
+def test_handle_deleted(factories, mocker):
+ stdout = mocker.Mock()
+ event = {
+ "path": "/path.mp3",
+ }
+ library = factories["music.Library"]()
+ deleted = factories["music.Upload"](
+ library=library,
+ source="file://{}".format(event["path"]),
+ import_status="finished",
+ audio_file=None,
+ )
+ kept = [
+ factories["music.Upload"](
+ library=library,
+ source="file://{}".format(event["path"]),
+ import_status="finished",
+ ),
+ factories["music.Upload"](
+ source="file://{}".format(event["path"]),
+ import_status="finished",
+ audio_file=None,
+ ),
+ ]
+
+ import_files.handle_deleted(
+ event=event, stdout=stdout, library=library, in_place=True
+ )
+
+ with pytest.raises(deleted.DoesNotExist):
+ deleted.refresh_from_db()
+
+ for upload in kept:
+ upload.refresh_from_db()
+
+
+def test_handle_moved(factories, mocker):
+ stdout = mocker.Mock()
+ event = {
+ "src_path": "/path.mp3",
+ "dest_path": "/new_path.mp3",
+ }
+ library = factories["music.Library"]()
+ updated = factories["music.Upload"](
+ library=library,
+ source="file://{}".format(event["src_path"]),
+ import_status="finished",
+ audio_file=None,
+ )
+ untouched = [
+ factories["music.Upload"](
+ library=library,
+ source="file://{}".format(event["src_path"]),
+ import_status="finished",
+ ),
+ factories["music.Upload"](
+ source="file://{}".format(event["src_path"]),
+ import_status="finished",
+ audio_file=None,
+ ),
+ ]
+
+ import_files.handle_moved(
+ event=event, stdout=stdout, library=library, in_place=True
+ )
+
+ updated.refresh_from_db()
+ assert updated.source == "file://{}".format(event["dest_path"])
+ for upload in untouched:
+ source = upload.source
+ upload.refresh_from_db()
+ assert source == upload.source
+
+
+def test_handle_modified_creates_upload(tmpfile, factories, mocker):
+ stdout = mocker.Mock()
+ event = {
+ "path": tmpfile.name,
+ }
+ process_upload = mocker.patch("funkwhale_api.music.tasks.process_upload")
+ library = factories["music.Library"]()
+ import_files.handle_modified(
+ event=event,
+ stdout=stdout,
+ library=library,
+ in_place=True,
+ reference="hello",
+ replace=False,
+ dispatch_outbox=False,
+ broadcast=False,
+ )
+ upload = library.uploads.latest("id")
+ assert upload.source == "file://{}".format(event["path"])
+
+ process_upload.assert_called_once_with(upload_id=upload.pk)
+
+
+def test_handle_modified_skips_existing_checksum(tmpfile, factories, mocker):
+ stdout = mocker.Mock()
+ event = {
+ "path": tmpfile.name,
+ }
+ tmpfile.write(b"hello")
+
+ library = factories["music.Library"]()
+ factories["music.Upload"](
+ checksum=common_utils.get_file_hash(tmpfile),
+ library=library,
+ import_status="finished",
+ )
+ import_files.handle_modified(
+ event=event, stdout=stdout, library=library, in_place=True,
+ )
+ assert library.uploads.count() == 1
+
+
+def test_handle_modified_update_existing_path_if_found(tmpfile, factories, mocker):
+ stdout = mocker.Mock()
+ event = {
+ "path": tmpfile.name,
+ }
+ update_track_metadata = mocker.patch(
+ "funkwhale_api.music.tasks.update_track_metadata"
+ )
+ get_metadata = mocker.patch("funkwhale_api.music.models.Upload.get_metadata")
+ library = factories["music.Library"]()
+ track = factories["music.Track"](attributed_to=library.actor)
+ upload = factories["music.Upload"](
+ source="file://{}".format(event["path"]),
+ track=track,
+ checksum="old",
+ library=library,
+ import_status="finished",
+ audio_file=None,
+ )
+ import_files.handle_modified(
+ event=event, stdout=stdout, library=library, in_place=True,
+ )
+ update_track_metadata.assert_called_once_with(
+ get_metadata.return_value, upload.track,
+ )
+
+
+def test_handle_modified_update_existing_path_if_found_and_attributed_to(
+ tmpfile, factories, mocker
+):
+ stdout = mocker.Mock()
+ event = {
+ "path": tmpfile.name,
+ }
+ update_track_metadata = mocker.patch(
+ "funkwhale_api.music.tasks.update_track_metadata"
+ )
+ library = factories["music.Library"]()
+ factories["music.Upload"](
+ source="file://{}".format(event["path"]),
+ checksum="old",
+ library=library,
+ track__attributed_to=factories["federation.Actor"](),
+ import_status="finished",
+ audio_file=None,
+ )
+ import_files.handle_modified(
+ event=event, stdout=stdout, library=library, in_place=True,
+ )
+ update_track_metadata.assert_not_called()
diff --git a/changes/changelog.d/1093.bugfix b/changes/changelog.d/1093.bugfix
index 1d9a15e59171bbd412da829645095cb6228a9c64..0295841afbc6bf9e3eec87a27cede78ea104e055 100644
--- a/changes/changelog.d/1093.bugfix
+++ b/changes/changelog.d/1093.bugfix
@@ -1 +1 @@
-Fixed mimetype detection issue that broke transcoding on some tracks (#1093). Run ``python manage.py fix_uploads --mimetypes`` to set proper mimetypes on existing uploads.
+Fixed mimetype detection issue that broke transcoding on some tracks (#1093). Run ``python manage.py fix_uploads --mimetype`` to set proper mimetypes on existing uploads.
diff --git a/changes/changelog.d/721.feature b/changes/changelog.d/721.feature
new file mode 100644
index 0000000000000000000000000000000000000000..332f992eb920af097722f0613149590f1a918416
--- /dev/null
+++ b/changes/changelog.d/721.feature
@@ -0,0 +1 @@
+Support a --watch mode with ``import_files`` to automatically add, update and remove files when filesystem is updated (#721)
diff --git a/docs/admin/importing-music.rst b/docs/admin/importing-music.rst
index bc0a642cbf6d651ec869e9c38716e4cf8b86c664..bff23845bf7a7bae4fbf77febd741af10453798d 100644
--- a/docs/admin/importing-music.rst
+++ b/docs/admin/importing-music.rst
@@ -1,15 +1,21 @@
-Importing music
-================
+Importing music from the server
+===============================
-From music directory on the server
-----------------------------------
-
-You can import music files in Funkwhale assuming they are located on the server
-and readable by the Funkwhale application. Your music files should contain at
+Funkwhale can import music files that are located on the server assuming
+they readable by the Funkwhale application. Your music files should contain at
least an ``artist``, ``album`` and ``title`` tags, but we recommend you tag
it extensively using a proper tool, such as Beets or Musicbrainz Picard.
-You can import those tracks as follows, assuming they are located in
+Funkwhale supports two different import modes:
+
+- copy (the default): files are copied into Funkwhale's internal storage. This means importing a 1GB library will result in the same amount of space being used by Funkwhale.
+- :ref:`in-place <in-place-import>` (when the ``--in-place`` is provided): files are referenced in Funkwhale's DB but not copied or touched in anyway. This is useful if you have a huge library, or one that is updated by an external tool such as Beets..
+
+.. note::
+
+ In Funkwhale 1.0, **the default behaviour will change to in-place import**
+
+Regardless of the mode you're choosing, import works as described below, assuming your files are located in
``/srv/funkwhale/data/music``:
.. code-block:: bash
@@ -17,6 +23,17 @@ You can import those tracks as follows, assuming they are located in
export LIBRARY_ID="<your_libary_id>"
python api/manage.py import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput
+.. note::
+ You'll have to create a library in the Web UI before to get your library ID. Simply visit
+ https://yourdomain/content/libraries/ to create one.
+
+ Library IDs are available in library urls or sharing link. In this example:
+ https://funkwhale.instance/content/libraries/769a2ae3-eb3d-4aff-9f94-2c4d80d5c2d1,
+ the library ID is 769a2bc3-eb1d-4aff-9f84-2c4d80d5c2d1
+
+ You can use only the first characters of the ID when calling the command, like that:
+ ``export LIBRARY_ID="769a2bc3"``
+
When you use docker, the ``/srv/funkwhale/data/music`` is mounted from the host
to the ``/music`` directory on the container:
@@ -32,16 +49,6 @@ When you installed Funkwhale via ansible, you need to call a script instead of P
export LIBRARY_ID="<your_libary_id>"
/srv/funkwhale/manage import_files $LIBRARY_ID "/srv/funkwhale/data/music/" --recursive --noinput
-.. note::
- You'll have to create a library in the Web UI before to get your library ID. Simply visit
- https://yourdomain/content/libraries/ to create one.
-
- Library IDs are available in library urls or sharing link. In this example:
- https://funkwhale.instance/content/libraries/769a2ae3-eb3d-4aff-9f94-2c4d80d5c2d1,
- the library ID is 769a2bc3-eb1d-4aff-9f84-2c4d80d5c2d1
-
- You can use only the first characters of the ID when calling the command, like that:
- ``export LIBRARY_ID="769a2bc3"``
The import command supports several options, and you can check the help to
get details::
@@ -63,6 +70,7 @@ get details::
At the moment, only Flac, OGG/Vorbis and MP3 files with ID3 tags are supported
+
.. _in-place-import:
In-place import
@@ -88,14 +96,6 @@ configuration options to ensure the webserver can serve them properly:
- :ref:`setting-MUSIC_DIRECTORY_PATH`
- :ref:`setting-MUSIC_DIRECTORY_SERVE_PATH`
-.. warning::
-
- While in-place import is faster and less disk-space-hungry, it's also
- more fragile: if, for some reason, you move or rename the source files,
- Funkwhale will not be able to serve those files anymore.
-
- Thus, be especially careful when you manipulate the source files.
-
We recommend you symlink all your music directories into ``/srv/funkwhale/data/music``
and run the `import_files` command from that directory. This will make it possible
to use multiple music directories, without any additional configuration
@@ -134,6 +134,49 @@ If you want to go with symlinks, ensure each symlinked directory is mounted as a
# add your symlinked dirs here
- /media/nfsshare:/media/nfsshare:ro
+Metadata updates
+^^^^^^^^^^^^^^^^
+
+When doing an import with in ``in-place`` mode, the importer will also check and update existing entries
+found in the database. For instance, if a file was imported, the ID3 Title tag was updated, and you rerun a scan,
+Funkwhale will pick up the new title. The following fields can be updated this way:
+
+- Track mbid
+- Track title
+- Track position and disc number
+- Track license and copyright
+- Album cover
+- Album title
+- Album mbid
+- Album release date
+- Artist name
+- Artist mbid
+- Album artist name
+- Album artist mbid
+
+
+React to filesystem events with ``--watch``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you have a really big library or one that is updated quite often, running the ``import_files`` command by hand
+may not be practical. To help with this use case, the ``import_files`` command supports a ``--watch`` flag that will observes filesystem events
+instead of performing a full import.
+
+File creation, move, update and removal are handled when ``--watch`` is provided:
+
+- Files created in the watched directory are imported immediatly
+- If using ``in-place`` mode, files updates trigger a metadata update on the corresponding entries
+- If using ``in-place`` mode, files that are moved and known by Funkwhale will see their path updated in Funkwhale's DB
+- If using ``in-place`` mode, files that are removed and known by Funkwhale will be removed from Funkwhale's DB
+
+Pruning dangling metadata with ``--prune``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Funkwhale is, by design, conservative with music metadata in its database. If you remove a file from Funkwhale's DB,
+the corresponding artist, album and track object won't be deleted by default.
+
+If you want to prune dangling metadata from the database once the ``import_files`` command is over, simply add the ``--prune`` flag.
+This also works in with ``--watch``.
Album covers
^^^^^^^^^^^^
@@ -159,9 +202,3 @@ under creative commons (courtesy of Jamendo):
./download-tracks.sh music.txt
This will download a bunch of zip archives (one per album) under the ``data/music`` directory and unzip their content.
-
-From other instances
---------------------
-
-Funkwhale also supports importing music from other instances. Please refer
-to :doc:`../federation/index` for more details.