Newer
Older
from django.core.files import File
Eliot Berriot
committed
from django.core.management.base import BaseCommand, CommandError
from django.utils import timezone
from funkwhale_api.music import models, tasks, utils
def crawl_dir(dir, extensions, recursive=True):
if os.path.isfile(dir):
yield dir
return
with os.scandir(dir) as scanner:
for entry in scanner:
if entry.is_file():
for e in extensions:
if entry.name.lower().endswith(".{}".format(e.lower())):
yield entry.path
elif recursive and entry.is_dir():
yield from crawl_dir(entry, extensions, recursive=recursive)
def batch(iterable, n=1):
has_entries = True
while has_entries:
current = []
for i in range(0, n):
try:
current.append(next(iterable))
except StopIteration:
has_entries = False
yield current
Eliot Berriot
committed
class Command(BaseCommand):
Eliot Berriot
committed
def add_arguments(self, parser):
parser.add_argument(
"library_id",
type=str,
help=(
"A local library identifier where the files should be imported. "
"You can use the full uuid such as e29c5be9-6da3-4d92-b40b-4970edd3ee4b "
"or only a small portion of it, starting from the beginning, such as "
"e29c5be9"
),
)
parser.add_argument("path", nargs="+", type=str)
Eliot Berriot
committed
parser.add_argument(
"--recursive",
action="store_true",
dest="recursive",
Eliot Berriot
committed
default=False,
help="Will match the pattern recursively (including subdirectories)",
Eliot Berriot
committed
)
parser.add_argument(
"--username",
dest="username",
help="The username of the user you want to be bound to the import",
Eliot Berriot
committed
parser.add_argument(
Eliot Berriot
committed
default=False,
help="Will launch celery tasks for each file to import instead of doing it synchronously and block the CLI",
Eliot Berriot
committed
)
"--exit",
"-x",
action="store_true",
dest="exit_on_failure",
"--in-place",
"-i",
action="store_true",
dest="in_place",
"Import files without duplicating them into the media directory."
"For in-place import to work, the music files must be readable"
"by the web-server and funkwhale api and celeryworker processes."
"You may want to use this if you have a big music library to "
"import and not much disk space available."
),
default=False,
help=(
"Use this flag to replace duplicates (tracks with same "
"musicbrainz mbid, or same artist, album and title) on import "
"with their newest version."
parser.add_argument(
"--outbox",
action="store_true",
dest="outbox",
default=False,
help=(
"Use this flag to notify library followers of newly imported files. "
"You'll likely want to keep this disabled for CLI imports, especially if"
"you plan to import hundreds or thousands of files, as it will cause a lot "
"of overhead on your server and on servers you are federating with."
),
)
parser.add_argument("-e", "--extension", nargs="+")
parser.add_argument(
"--broadcast",
action="store_true",
dest="broadcast",
default=False,
help=(
"Use this flag to enable realtime updates about the import in the UI. "
"This causes some overhead, so it's disabled by default."
),
)
parser.add_argument(
"--reference",
action="store",
dest="reference",
default=None,
help=(
"A custom reference for the import. Leave this empty to have a random "
"reference being generated for you."
),
)
Eliot Berriot
committed
parser.add_argument(
"--noinput",
"--no-input",
action="store_false",
dest="interactive",
Eliot Berriot
committed
help="Do NOT prompt the user for input of any kind.",
)
parser.add_argument(
"--batch-size",
"-s",
dest="batch_size",
default=1000,
type=int,
help="Size of each batch, only used when crawling large collections",
)
def handle(self, *args, **options):
self.is_confirmed = False
try:
library = models.Library.objects.select_related("actor__user").get(
uuid__startswith=options["library_id"]
)
except models.Library.DoesNotExist:
raise CommandError("Invalid library id")
raise CommandError("Library {} is not a local library".format(library.uuid))
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
if options["in_place"]:
self.stdout.write(
"Checking imported paths against settings.MUSIC_DIRECTORY_PATH"
)
for import_path in options["path"]:
p = settings.MUSIC_DIRECTORY_PATH
if not p:
raise CommandError(
"Importing in-place requires setting the "
"MUSIC_DIRECTORY_PATH variable"
)
if p and not import_path.startswith(p):
raise CommandError(
"Importing in-place only works if importing"
"from {} (MUSIC_DIRECTORY_PATH), as this directory"
"needs to be accessible by the webserver."
"Culprit: {}".format(p, import_path)
)
extensions = options.get("extension") or utils.SUPPORTED_EXTENSIONS
crawler = itertools.chain(
*[
crawl_dir(p, extensions=extensions, recursive=options["recursive"])
for p in options["path"]
]
)
errors = []
total = 0
start_time = time.time()
reference = options["reference"] or "cli-{}".format(timezone.now().isoformat())
import_url = "{}://{}/content/libraries/{}/upload?{}"
import_url = import_url.format(
settings.FUNKWHALE_PROTOCOL,
settings.FUNKWHALE_HOSTNAME,
str(library.uuid),
urllib.parse.urlencode([("import", reference)]),
)
self.stdout.write(
"For details, please refer to import reference '{}' or URL {}".format(
reference, import_url
)
)
batch_start = None
batch_duration = None
for i, entries in enumerate(batch(crawler, options["batch_size"])):
total += len(entries)
batch_start = time.time()
time_stats = ""
if i > 0:
time_stats = " - running for {}s, previous batch took {}s".format(
int(time.time() - start_time), int(batch_duration),
)
if entries:
self.stdout.write(
"Handling batch {} ({} items){}".format(
i + 1, options["batch_size"], time_stats,
)
)
batch_errors = self.handle_batch(
library=library,
paths=entries,
batch=i + 1,
reference=reference,
options=options,
)
if batch_errors:
errors += batch_errors
batch_duration = time.time() - batch_start
message = "Successfully imported {} tracks in {}s"
if options["async_"]:
message = "Successfully launched import for {} tracks in {}s"
self.stdout.write(
message.format(total - len(errors), int(time.time() - start_time))
)
if len(errors) > 0:
self.stderr.write("{} tracks could not be imported:".format(len(errors)))
for path, error in errors:
self.stderr.write("- {}: {}".format(path, error))
self.stdout.write(
"For details, please refer to import reference '{}' or URL {}".format(
reference, import_url
)
)
def handle_batch(self, library, paths, batch, reference, options):
Eliot Berriot
committed
matching = []
Eliot Berriot
committed
# In some situations, the path is encoded incorrectly on the filesystem
# so we filter out faulty paths and display a warning to the user.
# see https://dev.funkwhale.audio/funkwhale/funkwhale/issues/138
Eliot Berriot
committed
try:
m.encode("utf-8")
matching.append(m)
except UnicodeEncodeError:
try:
previous = matching[-1]
except IndexError:
previous = None
self.stderr.write(
self.style.WARNING(
"[warning] Ignoring undecodable path. Previous ok file was {}".format(
previous
)
)
)
Eliot Berriot
committed
if not matching:
raise CommandError("No file matching pattern, aborting")
Eliot Berriot
committed
if options["replace"]:
filtered = {"initial": matching, "skipped": [], "new": matching}
filtered = self.filter_matching(matching, library)
message = " - {} files already found in database"
" - {} files found matching this pattern: {}".format(
self.stdout.write(message.format(len(filtered["skipped"])))
self.stdout.write(" - {} new files".format(len(filtered["new"])))
if batch == 1:
self.stdout.write(
" Selected options: {}".format(
", ".join(
["in place" if options["in_place"] else "copy music files"]
)
)
self.stdout.write(" Nothing new to import, exiting")
return
if options["interactive"] and not self.is_confirmed:
Eliot Berriot
committed
message = (
Eliot Berriot
committed
"Type 'yes' to continue, or 'no' to cancel: "
)
Eliot Berriot
committed
raise CommandError("Import cancelled.")
Eliot Berriot
committed
import_paths,
library=library,
reference=reference,
batch=batch,
options=options,
def filter_matching(self, matching, library):
# we skip reimport for path that are already found
existing = library.uploads.filter(source__in=sources, import_status="finished")
existing = existing.values_list("source", flat=True)
existing = set([p.replace("file://", "", 1) for p in existing])
skipped = set(matching) & existing
result = {
"initial": matching,
"skipped": list(sorted(skipped)),
"new": list(sorted(set(matching) - skipped)),
}
return result
def do_import(self, paths, library, reference, batch, options):
message = "[batch {batch}] {i}/{total} Importing {path}..."
message = "[batch {batch}] {i}/{total} Launching import for {path}..."
Eliot Berriot
committed
# we create an upload binded to the library
async_ = options["async_"]
errors = []
for i, path in list(enumerate(paths)):
if options["verbosity"] > 1:
self.stdout.write(
message.format(batch=batch, path=path, i=i + 1, total=len(paths))
)
Eliot Berriot
committed
try:
self.create_upload(
path,
reference,
library,
async_,
options["replace"],
options["in_place"],
Eliot Berriot
committed
except Exception as e:
m = "Error while importing {}: {} {}".format(
path, e.__class__.__name__, e
)
errors.append((path, "{} {}".format(e.__class__.__name__, e)))
def create_upload(
self,
path,
reference,
library,
async_,
replace,
in_place,
dispatch_outbox,
broadcast,
):
import_handler = tasks.process_upload.delay if async_ else tasks.process_upload
upload = models.Upload(library=library, import_reference=reference)
upload.source = "file://" + path
upload.import_metadata = {
"funkwhale": {
"config": {
"replace": replace,
"dispatch_outbox": dispatch_outbox,
"broadcast": broadcast,
}
}
}
upload.audio_file.save(name, File(f), save=False)
upload.save()
import_handler(upload_id=upload.pk)