From 101ae278859ba1efc8e0559a147821a4338a83e2 Mon Sep 17 00:00:00 2001 From: Eliot Berriot <contact@eliotberriot.com> Date: Tue, 23 Apr 2019 18:00:00 +0200 Subject: [PATCH] Fix #565: store media files in S3 bucket --- api/config/settings/common.py | 22 +++++ api/funkwhale_api/federation/serializers.py | 2 +- api/funkwhale_api/music/models.py | 10 +++ api/funkwhale_api/music/views.py | 3 + api/requirements/base.txt | 2 + changes/changelog.d/565.feature | 1 + changes/notes.rst | 10 +++ deploy/docker.nginx.template | 13 ++- deploy/env.prod.sample | 16 ++++ deploy/nginx.template | 14 ++++ dev.yml | 19 +++++ docker/nginx/conf.dev | 14 +++- docs/admin/external-storages.rst | 92 +++++++++++++++++++++ docs/admin/index.rst | 1 + 14 files changed, 212 insertions(+), 7 deletions(-) create mode 100644 changes/changelog.d/565.feature create mode 100644 docs/admin/external-storages.rst diff --git a/api/config/settings/common.py b/api/config/settings/common.py index 4f97756b88..7ee5171a17 100644 --- a/api/config/settings/common.py +++ b/api/config/settings/common.py @@ -306,6 +306,28 @@ STATIC_ROOT = env("STATIC_ROOT", default=str(ROOT_DIR("staticfiles"))) STATIC_URL = env("STATIC_URL", default="/staticfiles/") DEFAULT_FILE_STORAGE = "funkwhale_api.common.storage.ASCIIFileSystemStorage" +AWS_DEFAULT_ACL = None +AWS_QUERYSTRING_AUTH = False +# MINIO_ACCESS_KEY_ID = env("MINIO_ACCESS_KEY_ID", default=None) + +# if MINIO_ACCESS_KEY_ID: +# AWS_ACCESS_KEY_ID = MINIO_ACCESS_KEY_ID +# AWS_SECRET_ACCESS_KEY = env("MINIO_SECRET_KEY") +# AWS_STORAGE_BUCKET_NAME = env("MINIO_STORAGE_BUCKET_NAME") +# AWS_S3_ENDPOINT_URL = env("MINIO_URL") +# AWS_LOCATION = env("MINIO_BUCKET_DIRECTORY", default="") +# DEFAULT_FILE_STORAGE = "storages.backends.s3boto3.S3Boto3Storage" + +AWS_ACCESS_KEY_ID = env("AWS_ACCESS_KEY_ID", default=None) + +if AWS_ACCESS_KEY_ID: + AWS_ACCESS_KEY_ID = AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY = env("AWS_SECRET_ACCESS_KEY") + AWS_STORAGE_BUCKET_NAME = env("AWS_STORAGE_BUCKET_NAME") + AWS_S3_ENDPOINT_URL = env("AWS_S3_ENDPOINT_URL", default=None) + AWS_LOCATION = env("AWS_LOCATION", default="") + DEFAULT_FILE_STORAGE = "storages.backends.s3boto3.S3Boto3Storage" + # See: https://docs.djangoproject.com/en/dev/ref/contrib/staticfiles/#std:setting-STATICFILES_DIRS STATICFILES_DIRS = (str(APPS_DIR.path("static")),) diff --git a/api/funkwhale_api/federation/serializers.py b/api/funkwhale_api/federation/serializers.py index 3e7618c9cf..8a2fcefeaa 100644 --- a/api/funkwhale_api/federation/serializers.py +++ b/api/funkwhale_api/federation/serializers.py @@ -838,7 +838,7 @@ class AlbumSerializer(MusicEntitySerializer): d["cover"] = { "type": "Link", "href": utils.full_url(instance.cover.url), - "mediaType": mimetypes.guess_type(instance.cover.path)[0] + "mediaType": mimetypes.guess_type(instance.cover_path)[0] or "image/jpeg", } if self.context.get("include_ap_context", self.parent is None): diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py index db9b8cbb27..5f42ed7a3b 100644 --- a/api/funkwhale_api/music/models.py +++ b/api/funkwhale_api/music/models.py @@ -346,6 +346,16 @@ class Album(APIModelMixin): def __str__(self): return self.title + @property + def cover_path(self): + if not self.cover: + return None + try: + return self.cover.path + except NotImplementedError: + # external storage + return self.cover.name + @property def tags(self): t = [] diff --git a/api/funkwhale_api/music/views.py b/api/funkwhale_api/music/views.py index 6d5e154555..1723959906 100644 --- a/api/funkwhale_api/music/views.py +++ b/api/funkwhale_api/music/views.py @@ -240,6 +240,9 @@ def get_file_path(audio_file): "MUSIC_DIRECTORY_PATH to serve in-place imported files" ) path = "/music" + audio_file.replace(prefix, "", 1) + if path.startswith("http://") or path.startswith("https://"): + raise + return (settings.PROTECT_FILES_PATH + "/media/" + path).encode("utf-8") return (settings.PROTECT_FILES_PATH + path).encode("utf-8") if t == "apache2": try: diff --git a/api/requirements/base.txt b/api/requirements/base.txt index fe4efac47f..57617b103c 100644 --- a/api/requirements/base.txt +++ b/api/requirements/base.txt @@ -69,3 +69,5 @@ aiohttp==3.5.4 autobahn>=19.3.2 django-oauth-toolkit==1.2 +django-storages==1.7.1 +boto3 diff --git a/changes/changelog.d/565.feature b/changes/changelog.d/565.feature new file mode 100644 index 0000000000..9f2a92a719 --- /dev/null +++ b/changes/changelog.d/565.feature @@ -0,0 +1 @@ +Support S3-compatible storages for media files (#565) diff --git a/changes/notes.rst b/changes/notes.rst index 6299c7c8c5..40f7e3c59d 100644 --- a/changes/notes.rst +++ b/changes/notes.rst @@ -79,6 +79,16 @@ or invalid, and additional debug information to share in your support requests. This information is available in all pages that list uploads, when clicking on the button next to the upload status. +Support for S3-compatible storages to store media files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Storing all media files on the Funkwhale server itself may not be possible or desirable +in all scenarios. You can now configure Funkwhale to store those files in a S3 +bucket instead. + +Check-out `https://docs.funkwhale.audio/admin/external-storages.html`_ if you want to use +this feature. + Prune library command ^^^^^^^^^^^^^^^^^^^^^ diff --git a/deploy/docker.nginx.template b/deploy/docker.nginx.template index fd99c07050..4319756297 100644 --- a/deploy/docker.nginx.template +++ b/deploy/docker.nginx.template @@ -57,13 +57,20 @@ server { alias ${MEDIA_ROOT}/; } + # this is an internal location that is used to serve + # audio files once correct permission / authentication + # has been checked on API side location /_protected/media { - # this is an internal location that is used to serve - # audio files once correct permission / authentication - # has been checked on API side internal; alias ${MEDIA_ROOT}; + } + # Comment the previous location and uncomment this one if you're storing + # media files in a S3 bucket + # location ~ /_protected/media/(.+) { + # internal; + # proxy_pass $1; + # } location /_protected/music { # this is an internal location that is used to serve diff --git a/deploy/env.prod.sample b/deploy/env.prod.sample index a71af5310b..1f09046ce8 100644 --- a/deploy/env.prod.sample +++ b/deploy/env.prod.sample @@ -136,3 +136,19 @@ FUNKWHALE_FRONTEND_PATH=/srv/funkwhale/front/dist # Nginx related configuration NGINX_MAX_BODY_SIZE=100M + +## External storages configuration +# Funkwhale can store uploaded files on Amazon S3 and S3-compatible storages (such as Minio) +# Uncomment and fill the variables below + +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_STORAGE_BUCKET_NAME= +# An optional bucket subdirectory were you want to store the files. This is especially useful +# if you plan to use share the bucket with other services +# AWS_LOCATION= + +# If you use a S3-compatible storage such as minio, set the following variable +# the full URL to the storage server. Example: +# AWS_S3_ENDPOINT_URL=https://minio.mydomain.com +# AWS_S3_ENDPOINT_URL= diff --git a/deploy/nginx.template b/deploy/nginx.template index 7cdee70f48..bde535d41b 100644 --- a/deploy/nginx.template +++ b/deploy/nginx.template @@ -109,8 +109,22 @@ server { # audio files once correct permission / authentication # has been checked on API side internal; + } + + # this is an internal location that is used to serve + # audio files once correct permission / authentication + # has been checked on API side + location /_protected/media { + internal; alias ${MEDIA_ROOT}; + } + # Comment the previous location and uncomment this one if you're storing + # media files in a S3 bucket + # location ~ /_protected/media/(.+) { + # internal; + # proxy_pass $1; + # } location /_protected/music { # this is an internal location that is used to serve diff --git a/dev.yml b/dev.yml index ddee631f50..7c58b91059 100644 --- a/dev.yml +++ b/dev.yml @@ -63,6 +63,7 @@ services: depends_on: - postgres + # - minio - redis networks: - internal @@ -76,6 +77,7 @@ services: build: *backend depends_on: - postgres + # - minio - redis command: celery -A funkwhale_api.taskapp worker -l debug -B environment: @@ -146,6 +148,23 @@ services: volumes: - "./docs/swagger.yml:/usr/share/nginx/html/swagger.yml" + # minio: + # image: minio/minio + # command: server /data + # volumes: + # - "./data/${COMPOSE_PROJECT_NAME-node1}/minio:/data" + # environment: + # - "MINIO_ACCESS_KEY=${AWS_ACCESS_KEY_ID-access_key}" + # - "MINIO_SECRET_KEY=${AWS_SECRET_ACCESS_KEY-secret_key}" + # - "MINIO_HTTP_TRACE: /dev/stdout" + # ports: + # - "9000:9000" + # networks: + # - federation + # - internal + + + networks: ? internal federation: diff --git a/docker/nginx/conf.dev b/docker/nginx/conf.dev index 50c3cbc2ef..a47bd477ac 100644 --- a/docker/nginx/conf.dev +++ b/docker/nginx/conf.dev @@ -93,13 +93,21 @@ http { alias /protected/media/; } + # this is an internal location that is used to serve + # audio files once correct permission / authentication + # has been checked on API side location /_protected/media { - # this is an internal location that is used to serve - # audio files once correct permission / authentication - # has been checked on API side internal; alias /protected/media; + } + # Comment the previous location and uncomment this one if you're storing + # media files in a S3 bucket + # location ~ /_protected/media/(.+) { + # internal; + # resolver 127.0.0.11; + # proxy_pass $1; + # } location /_protected/music { # this is an internal location that is used to serve diff --git a/docs/admin/external-storages.rst b/docs/admin/external-storages.rst new file mode 100644 index 0000000000..73b8889046 --- /dev/null +++ b/docs/admin/external-storages.rst @@ -0,0 +1,92 @@ +Using external storages to store Funkwhale content +================================================== + +By default, Funkwhale will store user-uploaded and related media such as audio files, +transcoded files, avatars and album covers on a server directory. + +However, for bigger instances or more complex deployment scenarios, you may want +to use distributed or external storages. + +S3 and S3-compatible servers +---------------------------- + +.. note:: + + This feature was released in Funkwhale 0.19 and is still considered experimental. + Please let us know if you see anything unusual while using it. + +Funkwhale supports storing media files Amazon S3 and compatible implementations such as Minio or Wasabi. + +In this scenario, the content itself is stored in the S3 bucket. Non-sensitive media such as +album covers or user avatars are served directly from the bucket. However, audio files +are still served by the reverse proxy, to enforce proper authentication. + +To enable S3 on Funkwhale, add the following environment variables:: + + AWS_ACCESS_KEY_ID= + AWS_SECRET_ACCESS_KEY= + AWS_STORAGE_BUCKET_NAME= + # An optional bucket subdirectory were you want to store the files. This is especially useful + # if you plan to use share the bucket with other services + # AWS_LOCATION= + + # If you use a S3-compatible storage such as minio, set the following variable + # the full URL to the storage server. Example: + # AWS_S3_ENDPOINT_URL=https://minio.mydomain.com + # AWS_S3_ENDPOINT_URL= + +Then, edit your nginx configuration. On docker setups, the file is located at ``/srv/funkwhale/nginx/funkwhale.template``, +and at ``/etc/nginx/sites-available/funkwhale.template`` on non-docker setups. + +Replace the ``location /_protected/media`` block with the following:: + + location ~ /_protected/media/(.+) { + internal; + proxy_pass $1; + } + +Then restart Funkwhale and nginx. + +From now on, media files will be stored on the S3 bucket you configured. If you already +had media files before configuring the S3 bucket, you also have to move those on the bucket +by hand (which is outside the scope of this guide). + +.. note:: + + At the moment, we do not support S3 when using Apache as a reverse proxy. + + +Securing your S3 bucket +*********************** + +It's important to ensure your the root of your bucket doesn't list its content, +which is the default on many S3 servers. Otherwise, anyone could find out the true +URLs of your audio files and bypass authentication. + +To avoid that, you can set the following policy on your bucket:: + + { + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "s3:GetObject" + ], + "Effect": "Allow", + "Principal": { + "AWS": [ + "*" + ] + }, + "Resource": [ + "arn:aws:s3:::<yourbucketname>/*" + ], + "Sid": "Public" + } + ] + } + +If you are using ``awscli``, you can store this policy in a ``/tmp/policy`` file, and +apply it using the following command:: + + aws s3api put-bucket-policy --bucket <yourbucketname> --policy file:///tmp/policy diff --git a/docs/admin/index.rst b/docs/admin/index.rst index 55f6bbf56c..8d80e3e0ad 100644 --- a/docs/admin/index.rst +++ b/docs/admin/index.rst @@ -14,6 +14,7 @@ Setup Guides ../installation/index configuration importing-music + external-storages Administration -------------- -- GitLab