Skip to content
Snippets Groups Projects
import_files.py 14.4 KiB
Newer Older
import itertools
import urllib.parse
import time
from django.conf import settings
from django.core.files import File
from django.core.management.base import BaseCommand, CommandError
from django.utils import timezone
from funkwhale_api.music import models, tasks, utils


def crawl_dir(dir, extensions, recursive=True):
    if os.path.isfile(dir):
        yield dir
        return
    with os.scandir(dir) as scanner:
        for entry in scanner:
            if entry.is_file():
                for e in extensions:
                    if entry.name.lower().endswith(".{}".format(e.lower())):
                        yield entry.path
            elif recursive and entry.is_dir():
                yield from crawl_dir(entry, extensions, recursive=recursive)


def batch(iterable, n=1):
    has_entries = True
    while has_entries:
        current = []
        for i in range(0, n):
            try:
                current.append(next(iterable))
            except StopIteration:
                has_entries = False
        yield current
Eliot Berriot's avatar
Eliot Berriot committed
    help = "Import audio files mathinc given glob pattern"
        parser.add_argument(
            "library_id",
            type=str,
            help=(
                "A local library identifier where the files should be imported. "
                "You can use the full uuid such as e29c5be9-6da3-4d92-b40b-4970edd3ee4b "
                "or only a small portion of it, starting from the beginning, such as "
                "e29c5be9"
            ),
        )
        parser.add_argument("path", nargs="+", type=str)
Eliot Berriot's avatar
Eliot Berriot committed
            "--recursive",
            action="store_true",
            dest="recursive",
Eliot Berriot's avatar
Eliot Berriot committed
            help="Will match the pattern recursively (including subdirectories)",
Eliot Berriot's avatar
Eliot Berriot committed
            "--username",
            dest="username",
            help="The username of the user you want to be bound to the import",
Eliot Berriot's avatar
Eliot Berriot committed
            "--async",
            action="store_true",
Eliot Berriot's avatar
Eliot Berriot committed
            help="Will launch celery tasks for each file to import instead of doing it synchronously and block the CLI",
        parser.add_argument(
Eliot Berriot's avatar
Eliot Berriot committed
            "--exit",
            "-x",
            action="store_true",
            dest="exit_on_failure",
Eliot Berriot's avatar
Eliot Berriot committed
            help="Use this flag to disable error catching",
        )
        parser.add_argument(
Eliot Berriot's avatar
Eliot Berriot committed
            "--in-place",
            "-i",
            action="store_true",
            dest="in_place",
            default=False,
            help=(
Eliot Berriot's avatar
Eliot Berriot committed
                "Import files without duplicating them into the media directory."
                "For in-place import to work, the music files must be readable"
                "by the web-server and funkwhale api and celeryworker processes."
                "You may want to use this if you have a big music library to "
                "import and not much disk space available."
            ),
        parser.add_argument(
            "--replace",
            action="store_true",
            dest="replace",
            default=False,
            help=(
                "Use this flag to replace duplicates (tracks with same "
                "musicbrainz mbid, or same artist, album and title) on import "
                "with their newest version."
Eliot Berriot's avatar
Eliot Berriot committed
        parser.add_argument(
            "--outbox",
            action="store_true",
            dest="outbox",
            default=False,
            help=(
                "Use this flag to notify library followers of newly imported files. "
                "You'll likely want to keep this disabled for CLI imports, especially if"
                "you plan to import hundreds or thousands of files, as it will cause a lot "
                "of overhead on your server and on servers you are federating with."
            ),
        )
        parser.add_argument("-e", "--extension", nargs="+")
Eliot Berriot's avatar
Eliot Berriot committed

        parser.add_argument(
            "--broadcast",
            action="store_true",
            dest="broadcast",
            default=False,
            help=(
                "Use this flag to enable realtime updates about the import in the UI. "
                "This causes some overhead, so it's disabled by default."
            ),
        )

        parser.add_argument(
            "--reference",
            action="store",
            dest="reference",
            default=None,
            help=(
                "A custom reference for the import. Leave this empty to have a random "
                "reference being generated for you."
            ),
        )
Eliot Berriot's avatar
Eliot Berriot committed
            "--noinput",
            "--no-input",
            action="store_false",
            dest="interactive",
            help="Do NOT prompt the user for input of any kind.",
        )

        parser.add_argument(
            "--batch-size",
            "-s",
            dest="batch_size",
            default=1000,
            type=int,
            help="Size of each batch, only used when crawling large collections",
        )
    def handle(self, *args, **options):
        self.is_confirmed = False
        try:
            library = models.Library.objects.select_related("actor__user").get(
                uuid__startswith=options["library_id"]
            )
        except models.Library.DoesNotExist:
            raise CommandError("Invalid library id")

        if not library.actor.get_user():
            raise CommandError("Library {} is not a local library".format(library.uuid))

        if options["in_place"]:
            self.stdout.write(
                "Checking imported paths against settings.MUSIC_DIRECTORY_PATH"
            )

            for import_path in options["path"]:
                p = settings.MUSIC_DIRECTORY_PATH
                if not p:
                    raise CommandError(
                        "Importing in-place requires setting the "
                        "MUSIC_DIRECTORY_PATH variable"
                    )
                if p and not import_path.startswith(p):
                    raise CommandError(
                        "Importing in-place only works if importing"
                        "from {} (MUSIC_DIRECTORY_PATH), as this directory"
                        "needs to be accessible by the webserver."
                        "Culprit: {}".format(p, import_path)
                    )

        extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
        crawler = itertools.chain(
            *[
                crawl_dir(p, extensions=extensions, recursive=options["recursive"])
                for p in options["path"]
            ]
        )
        errors = []
        total = 0
        start_time = time.time()
        reference = options["reference"] or "cli-{}".format(timezone.now().isoformat())

        import_url = "{}://{}/content/libraries/{}/upload?{}"
        import_url = import_url.format(
            settings.FUNKWHALE_PROTOCOL,
            settings.FUNKWHALE_HOSTNAME,
            str(library.uuid),
            urllib.parse.urlencode([("import", reference)]),
        )
        self.stdout.write(
            "For details, please refer to import reference '{}' or URL {}".format(
                reference, import_url
            )
        )
        batch_start = None
        batch_duration = None
        for i, entries in enumerate(batch(crawler, options["batch_size"])):
            total += len(entries)
            batch_start = time.time()
            time_stats = ""
            if i > 0:
                time_stats = " - running for {}s, previous batch took {}s".format(
                    int(time.time() - start_time), int(batch_duration),
                )
            if entries:
                self.stdout.write(
                    "Handling batch {} ({} items){}".format(
                        i + 1, options["batch_size"], time_stats,
                    )
                )
                batch_errors = self.handle_batch(
                    library=library,
                    paths=entries,
                    batch=i + 1,
                    reference=reference,
                    options=options,
                )
                if batch_errors:
                    errors += batch_errors

            batch_duration = time.time() - batch_start

        message = "Successfully imported {} tracks in {}s"
        if options["async_"]:
            message = "Successfully launched import for {} tracks in {}s"
        self.stdout.write(
            message.format(total - len(errors), int(time.time() - start_time))
        )
        if len(errors) > 0:
            self.stderr.write("{} tracks could not be imported:".format(len(errors)))

            for path, error in errors:
                self.stderr.write("- {}: {}".format(path, error))

        self.stdout.write(
            "For details, please refer to import reference '{}' or URL {}".format(
                reference, import_url
            )
        )

    def handle_batch(self, library, paths, batch, reference, options):
        for m in paths:
            # In some situations, the path is encoded incorrectly on the filesystem
            # so we filter out faulty paths and display a warning to the user.
Eliot Berriot's avatar
Eliot Berriot committed
            # see https://dev.funkwhale.audio/funkwhale/funkwhale/issues/138
            try:
                m.encode("utf-8")
                matching.append(m)
            except UnicodeEncodeError:
                try:
                    previous = matching[-1]
                except IndexError:
                    previous = None
                self.stderr.write(
                    self.style.WARNING(
                        "[warning] Ignoring undecodable path. Previous ok file was {}".format(
                            previous
                        )
                    )
                )

Eliot Berriot's avatar
Eliot Berriot committed
            raise CommandError("No file matching pattern, aborting")
        if options["replace"]:
            filtered = {"initial": matching, "skipped": [], "new": matching}
            message = "  - {} files to be replaced"
            import_paths = matching
        else:
            filtered = self.filter_matching(matching, library)
            message = "  - {} files already found in database"
            import_paths = filtered["new"]

        self.stdout.write("  Import summary:")
Eliot Berriot's avatar
Eliot Berriot committed
        self.stdout.write(
            "  - {} files found matching this pattern: {}".format(
Eliot Berriot's avatar
Eliot Berriot committed
                len(matching), options["path"]
            )
        )
        self.stdout.write(message.format(len(filtered["skipped"])))

        self.stdout.write("  - {} new files".format(len(filtered["new"])))
Eliot Berriot's avatar
Eliot Berriot committed

        if batch == 1:
            self.stdout.write(
                "  Selected options: {}".format(
                    ", ".join(
                        ["in place" if options["in_place"] else "copy music files"]
                    )
                )
Eliot Berriot's avatar
Eliot Berriot committed
            )
        if len(filtered["new"]) == 0:
            self.stdout.write("  Nothing new to import, exiting")
        if options["interactive"] and not self.is_confirmed:
Eliot Berriot's avatar
Eliot Berriot committed
                "Are you sure you want to do this?\n\n"
                "Type 'yes' to continue, or 'no' to cancel: "
            )
Eliot Berriot's avatar
Eliot Berriot committed
            if input("".join(message)) != "yes":
            self.is_confirmed = True
        errors = self.do_import(
            import_paths,
            library=library,
            reference=reference,
            batch=batch,
            options=options,
Eliot Berriot's avatar
Eliot Berriot committed
        )
        return errors
    def filter_matching(self, matching, library):
Eliot Berriot's avatar
Eliot Berriot committed
        sources = ["file://{}".format(p) for p in matching]
        # we skip reimport for path that are already found
Eliot Berriot's avatar
Eliot Berriot committed
        # as a Upload.source
        existing = library.uploads.filter(source__in=sources, import_status="finished")
Eliot Berriot's avatar
Eliot Berriot committed
        existing = existing.values_list("source", flat=True)
        existing = set([p.replace("file://", "", 1) for p in existing])
        skipped = set(matching) & existing
        result = {
Eliot Berriot's avatar
Eliot Berriot committed
            "initial": matching,
            "skipped": list(sorted(skipped)),
            "new": list(sorted(set(matching) - skipped)),
    def do_import(self, paths, library, reference, batch, options):
        message = "[batch {batch}] {i}/{total} Importing {path}..."
        if options["async_"]:
            message = "[batch {batch}] {i}/{total} Launching import for {path}..."
        # we create an upload binded to the library
        async_ = options["async_"]
        for i, path in list(enumerate(paths)):
            if options["verbosity"] > 1:
                self.stdout.write(
                    message.format(batch=batch, path=path, i=i + 1, total=len(paths))
                )
                self.create_upload(
                    path,
                    reference,
                    library,
                    async_,
                    options["replace"],
                    options["in_place"],
Eliot Berriot's avatar
Eliot Berriot committed
                    options["outbox"],
                    options["broadcast"],
Eliot Berriot's avatar
Eliot Berriot committed
                if options["exit_on_failure"]:
Eliot Berriot's avatar
Eliot Berriot committed
                m = "Error while importing {}: {} {}".format(
                    path, e.__class__.__name__, e
                )
Eliot Berriot's avatar
Eliot Berriot committed
                errors.append((path, "{} {}".format(e.__class__.__name__, e)))
Eliot Berriot's avatar
Eliot Berriot committed
    def create_upload(
        self,
        path,
        reference,
        library,
        async_,
        replace,
        in_place,
        dispatch_outbox,
        broadcast,
    ):
        import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
        upload = models.Upload(library=library, import_reference=reference)
        upload.source = "file://" + path
Eliot Berriot's avatar
Eliot Berriot committed
        upload.import_metadata = {
            "funkwhale": {
                "config": {
                    "replace": replace,
                    "dispatch_outbox": dispatch_outbox,
                    "broadcast": broadcast,
                }
            }
        }
        if not in_place:
            name = os.path.basename(path)
Eliot Berriot's avatar
Eliot Berriot committed
            with open(path, "rb") as f:
                upload.audio_file.save(name, File(f), save=False)

        upload.save()
        import_handler(upload_id=upload.pk)