From 101ae278859ba1efc8e0559a147821a4338a83e2 Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Tue, 23 Apr 2019 18:00:00 +0200
Subject: [PATCH] Fix #565: store media files in S3 bucket

---
 api/config/settings/common.py               | 22 +++++
 api/funkwhale_api/federation/serializers.py |  2 +-
 api/funkwhale_api/music/models.py           | 10 +++
 api/funkwhale_api/music/views.py            |  3 +
 api/requirements/base.txt                   |  2 +
 changes/changelog.d/565.feature             |  1 +
 changes/notes.rst                           | 10 +++
 deploy/docker.nginx.template                | 13 ++-
 deploy/env.prod.sample                      | 16 ++++
 deploy/nginx.template                       | 14 ++++
 dev.yml                                     | 19 +++++
 docker/nginx/conf.dev                       | 14 +++-
 docs/admin/external-storages.rst            | 92 +++++++++++++++++++++
 docs/admin/index.rst                        |  1 +
 14 files changed, 212 insertions(+), 7 deletions(-)
 create mode 100644 changes/changelog.d/565.feature
 create mode 100644 docs/admin/external-storages.rst

diff --git a/api/config/settings/common.py b/api/config/settings/common.py
index 4f97756b88..7ee5171a17 100644
--- a/api/config/settings/common.py
+++ b/api/config/settings/common.py
@@ -306,6 +306,28 @@ STATIC_ROOT = env("STATIC_ROOT", default=str(ROOT_DIR("staticfiles")))
 STATIC_URL = env("STATIC_URL", default="/staticfiles/")
 DEFAULT_FILE_STORAGE = "funkwhale_api.common.storage.ASCIIFileSystemStorage"
 
+AWS_DEFAULT_ACL = None
+AWS_QUERYSTRING_AUTH = False
+# MINIO_ACCESS_KEY_ID = env("MINIO_ACCESS_KEY_ID", default=None)
+
+# if MINIO_ACCESS_KEY_ID:
+#     AWS_ACCESS_KEY_ID = MINIO_ACCESS_KEY_ID
+#     AWS_SECRET_ACCESS_KEY = env("MINIO_SECRET_KEY")
+#     AWS_STORAGE_BUCKET_NAME = env("MINIO_STORAGE_BUCKET_NAME")
+#     AWS_S3_ENDPOINT_URL = env("MINIO_URL")
+#     AWS_LOCATION = env("MINIO_BUCKET_DIRECTORY", default="")
+#     DEFAULT_FILE_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"
+
+AWS_ACCESS_KEY_ID = env("AWS_ACCESS_KEY_ID", default=None)
+
+if AWS_ACCESS_KEY_ID:
+    AWS_ACCESS_KEY_ID = AWS_ACCESS_KEY_ID
+    AWS_SECRET_ACCESS_KEY = env("AWS_SECRET_ACCESS_KEY")
+    AWS_STORAGE_BUCKET_NAME = env("AWS_STORAGE_BUCKET_NAME")
+    AWS_S3_ENDPOINT_URL = env("AWS_S3_ENDPOINT_URL", default=None)
+    AWS_LOCATION = env("AWS_LOCATION", default="")
+    DEFAULT_FILE_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"
+
 # See: https://docs.djangoproject.com/en/dev/ref/contrib/staticfiles/#std:setting-STATICFILES_DIRS
 STATICFILES_DIRS = (str(APPS_DIR.path("static")),)
 
diff --git a/api/funkwhale_api/federation/serializers.py b/api/funkwhale_api/federation/serializers.py
index 3e7618c9cf..8a2fcefeaa 100644
--- a/api/funkwhale_api/federation/serializers.py
+++ b/api/funkwhale_api/federation/serializers.py
@@ -838,7 +838,7 @@ class AlbumSerializer(MusicEntitySerializer):
             d["cover"] = {
                 "type": "Link",
                 "href": utils.full_url(instance.cover.url),
-                "mediaType": mimetypes.guess_type(instance.cover.path)[0]
+                "mediaType": mimetypes.guess_type(instance.cover_path)[0]
                 or "image/jpeg",
             }
         if self.context.get("include_ap_context", self.parent is None):
diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index db9b8cbb27..5f42ed7a3b 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -346,6 +346,16 @@ class Album(APIModelMixin):
     def __str__(self):
         return self.title
 
+    @property
+    def cover_path(self):
+        if not self.cover:
+            return None
+        try:
+            return self.cover.path
+        except NotImplementedError:
+            # external storage
+            return self.cover.name
+
     @property
     def tags(self):
         t = []
diff --git a/api/funkwhale_api/music/views.py b/api/funkwhale_api/music/views.py
index 6d5e154555..1723959906 100644
--- a/api/funkwhale_api/music/views.py
+++ b/api/funkwhale_api/music/views.py
@@ -240,6 +240,9 @@ def get_file_path(audio_file):
                     "MUSIC_DIRECTORY_PATH to serve in-place imported files"
                 )
             path = "/music" + audio_file.replace(prefix, "", 1)
+        if path.startswith("http://") or path.startswith("https://"):
+            raise
+            return (settings.PROTECT_FILES_PATH + "/media/" + path).encode("utf-8")
         return (settings.PROTECT_FILES_PATH + path).encode("utf-8")
     if t == "apache2":
         try:
diff --git a/api/requirements/base.txt b/api/requirements/base.txt
index fe4efac47f..57617b103c 100644
--- a/api/requirements/base.txt
+++ b/api/requirements/base.txt
@@ -69,3 +69,5 @@ aiohttp==3.5.4
 autobahn>=19.3.2
 
 django-oauth-toolkit==1.2
+django-storages==1.7.1
+boto3
diff --git a/changes/changelog.d/565.feature b/changes/changelog.d/565.feature
new file mode 100644
index 0000000000..9f2a92a719
--- /dev/null
+++ b/changes/changelog.d/565.feature
@@ -0,0 +1 @@
+Support S3-compatible storages for media files (#565)
diff --git a/changes/notes.rst b/changes/notes.rst
index 6299c7c8c5..40f7e3c59d 100644
--- a/changes/notes.rst
+++ b/changes/notes.rst
@@ -79,6 +79,16 @@ or invalid, and additional debug information to share in your support requests.
 
 This information is available in all pages that list uploads, when clicking on the button next to the upload status.
 
+Support for S3-compatible storages to store media files
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Storing all media files on the Funkwhale server itself may not be possible or desirable
+in all scenarios. You can now configure Funkwhale to store those files in a S3
+bucket instead.
+
+Check-out `https://docs.funkwhale.audio/admin/external-storages.html`_ if you want to use
+this feature.
+
 Prune library command
 ^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/deploy/docker.nginx.template b/deploy/docker.nginx.template
index fd99c07050..4319756297 100644
--- a/deploy/docker.nginx.template
+++ b/deploy/docker.nginx.template
@@ -57,13 +57,20 @@ server {
         alias ${MEDIA_ROOT}/;
     }
 
+    # this is an internal location that is used to serve
+    # audio files once correct permission / authentication
+    # has been checked on API side
     location /_protected/media {
-        # this is an internal location that is used to serve
-        # audio files once correct permission / authentication
-        # has been checked on API side
         internal;
         alias   ${MEDIA_ROOT};
+
     }
+    # Comment the previous location and uncomment this one if you're storing
+    # media files in a S3 bucket
+    # location ~ /_protected/media/(.+) {
+    #     internal;
+    #     proxy_pass $1;
+    # }
 
     location /_protected/music {
         # this is an internal location that is used to serve
diff --git a/deploy/env.prod.sample b/deploy/env.prod.sample
index a71af5310b..1f09046ce8 100644
--- a/deploy/env.prod.sample
+++ b/deploy/env.prod.sample
@@ -136,3 +136,19 @@ FUNKWHALE_FRONTEND_PATH=/srv/funkwhale/front/dist
 
 # Nginx related configuration
 NGINX_MAX_BODY_SIZE=100M
+
+## External storages configuration
+# Funkwhale can store uploaded files on Amazon S3 and S3-compatible storages (such as Minio)
+# Uncomment and fill the variables below
+
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_STORAGE_BUCKET_NAME=
+# An optional bucket subdirectory were you want to store the files. This is especially useful
+# if you plan to use share the bucket with other services
+# AWS_LOCATION=
+
+# If you use a S3-compatible storage such as minio, set the following variable
+# the full URL to the storage server. Example:
+#   AWS_S3_ENDPOINT_URL=https://minio.mydomain.com
+# AWS_S3_ENDPOINT_URL=
diff --git a/deploy/nginx.template b/deploy/nginx.template
index 7cdee70f48..bde535d41b 100644
--- a/deploy/nginx.template
+++ b/deploy/nginx.template
@@ -109,8 +109,22 @@ server {
         # audio files once correct permission / authentication
         # has been checked on API side
         internal;
+    }
+
+    # this is an internal location that is used to serve
+    # audio files once correct permission / authentication
+    # has been checked on API side
+    location /_protected/media {
+        internal;
         alias   ${MEDIA_ROOT};
+
     }
+    # Comment the previous location and uncomment this one if you're storing
+    # media files in a S3 bucket
+    # location ~ /_protected/media/(.+) {
+    #     internal;
+    #     proxy_pass $1;
+    # }
 
     location /_protected/music {
         # this is an internal location that is used to serve
diff --git a/dev.yml b/dev.yml
index ddee631f50..7c58b91059 100644
--- a/dev.yml
+++ b/dev.yml
@@ -63,6 +63,7 @@ services:
 
     depends_on:
       - postgres
+      # - minio
       - redis
     networks:
       - internal
@@ -76,6 +77,7 @@ services:
     build: *backend
     depends_on:
       - postgres
+      # - minio
       - redis
     command: celery -A funkwhale_api.taskapp worker -l debug -B
     environment:
@@ -146,6 +148,23 @@ services:
     volumes:
       - "./docs/swagger.yml:/usr/share/nginx/html/swagger.yml"
 
+  # minio:
+  #   image: minio/minio
+  #   command: server /data
+  #   volumes:
+  #     - "./data/${COMPOSE_PROJECT_NAME-node1}/minio:/data"
+  #   environment:
+  #     - "MINIO_ACCESS_KEY=${AWS_ACCESS_KEY_ID-access_key}"
+  #     - "MINIO_SECRET_KEY=${AWS_SECRET_ACCESS_KEY-secret_key}"
+  #     - "MINIO_HTTP_TRACE: /dev/stdout"
+  #   ports:
+  #     - "9000:9000"
+  #   networks:
+  #     - federation
+  #     - internal
+
+
+
 networks:
   ? internal
   federation:
diff --git a/docker/nginx/conf.dev b/docker/nginx/conf.dev
index 50c3cbc2ef..a47bd477ac 100644
--- a/docker/nginx/conf.dev
+++ b/docker/nginx/conf.dev
@@ -93,13 +93,21 @@ http {
             alias /protected/media/;
         }
 
+        # this is an internal location that is used to serve
+        # audio files once correct permission / authentication
+        # has been checked on API side
         location /_protected/media {
-            # this is an internal location that is used to serve
-            # audio files once correct permission / authentication
-            # has been checked on API side
             internal;
             alias   /protected/media;
+
         }
+        # Comment the previous location and uncomment this one if you're storing
+        # media files in a S3 bucket
+        # location ~ /_protected/media/(.+) {
+        #     internal;
+        #     resolver 127.0.0.11;
+        #     proxy_pass $1;
+        # }
 
         location /_protected/music {
             # this is an internal location that is used to serve
diff --git a/docs/admin/external-storages.rst b/docs/admin/external-storages.rst
new file mode 100644
index 0000000000..73b8889046
--- /dev/null
+++ b/docs/admin/external-storages.rst
@@ -0,0 +1,92 @@
+Using external storages to store Funkwhale content
+==================================================
+
+By default, Funkwhale will store user-uploaded and related media such as audio files,
+transcoded files, avatars and album covers on a server directory.
+
+However, for bigger instances or more complex deployment scenarios, you may want
+to use distributed or external storages.
+
+S3 and S3-compatible servers
+----------------------------
+
+.. note::
+
+    This feature was released in Funkwhale 0.19 and is still considered experimental.
+    Please let us know if you see anything unusual while using it.
+
+Funkwhale supports storing media files Amazon S3 and compatible implementations such as Minio or Wasabi.
+
+In this scenario, the content itself is stored in the S3 bucket. Non-sensitive media such as
+album covers or user avatars are served directly from the bucket. However, audio files
+are still served by the reverse proxy, to enforce proper authentication.
+
+To enable S3 on Funkwhale, add the following environment variables::
+
+    AWS_ACCESS_KEY_ID=
+    AWS_SECRET_ACCESS_KEY=
+    AWS_STORAGE_BUCKET_NAME=
+    # An optional bucket subdirectory were you want to store the files. This is especially useful
+    # if you plan to use share the bucket with other services
+    # AWS_LOCATION=
+
+    # If you use a S3-compatible storage such as minio, set the following variable
+    # the full URL to the storage server. Example:
+    #   AWS_S3_ENDPOINT_URL=https://minio.mydomain.com
+    # AWS_S3_ENDPOINT_URL=
+
+Then, edit your nginx configuration. On docker setups, the file is located at ``/srv/funkwhale/nginx/funkwhale.template``,
+and at ``/etc/nginx/sites-available/funkwhale.template`` on non-docker setups.
+
+Replace the ``location /_protected/media`` block with the following::
+
+    location ~ /_protected/media/(.+) {
+        internal;
+        proxy_pass $1;
+    }
+
+Then restart Funkwhale and nginx.
+
+From now on, media files will be stored on the S3 bucket you configured. If you already
+had media files before configuring the S3 bucket, you also have to move those on the bucket
+by hand (which is outside the scope of this guide).
+
+.. note::
+
+    At the moment, we do not support S3 when using Apache as a reverse proxy.
+
+
+Securing your S3 bucket
+***********************
+
+It's important to ensure your the root of your bucket doesn't list its content,
+which is the default on many S3 servers. Otherwise, anyone could find out the true
+URLs of your audio files and bypass authentication.
+
+To avoid that, you can set the following policy on your bucket::
+
+    {
+        "Version": "2012-10-17",
+        "Statement": [
+            {
+            "Action": [
+                "s3:GetObject"
+            ],
+            "Effect": "Allow",
+            "Principal": {
+                "AWS": [
+                "*"
+                ]
+            },
+            "Resource": [
+                "arn:aws:s3:::<yourbucketname>/*"
+            ],
+            "Sid": "Public"
+            }
+        ]
+    }
+
+If you are using ``awscli``, you can store this policy in a ``/tmp/policy`` file, and
+apply it using the following command::
+
+    aws s3api put-bucket-policy --bucket <yourbucketname> --policy file:///tmp/policy
diff --git a/docs/admin/index.rst b/docs/admin/index.rst
index 55f6bbf56c..8d80e3e0ad 100644
--- a/docs/admin/index.rst
+++ b/docs/admin/index.rst
@@ -14,6 +14,7 @@ Setup Guides
    ../installation/index
    configuration
    importing-music
+   external-storages
 
 Administration
 --------------
-- 
GitLab