From 4a277c17bb94cabeeeac992ad91146eaa6e7ab65 Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Mon, 22 Jul 2019 12:12:57 +0200
Subject: [PATCH] Added "refresh=true" API param to artist, track and album
 detail to retrieve up-to-date data if needed

---
 api/config/settings/common.py               |  3 +
 api/funkwhale_api/music/models.py           | 15 ++++
 api/funkwhale_api/music/views.py            | 64 ++++++++++++++
 api/tests/music/test_views.py               | 94 +++++++++++++++++++++
 changes/changelog.d/refetch.enhancement     |  1 +
 docs/swagger.yml                            | 11 +++
 front/src/components/library/AlbumBase.vue  |  2 +-
 front/src/components/library/ArtistBase.vue |  2 +-
 front/src/components/library/TrackBase.vue  |  2 +-
 9 files changed, 191 insertions(+), 3 deletions(-)
 create mode 100644 changes/changelog.d/refetch.enhancement

diff --git a/api/config/settings/common.py b/api/config/settings/common.py
index 076177eaf..6d1f4a9f5 100644
--- a/api/config/settings/common.py
+++ b/api/config/settings/common.py
@@ -722,3 +722,6 @@ SUBSONIC_DEFAULT_TRANSCODING_FORMAT = (
 
 # extra tags will be ignored
 TAGS_MAX_BY_OBJ = env.int("TAGS_MAX_BY_OBJ", default=30)
+FEDERATION_OBJECT_FETCH_DELAY = env.int(
+    "FEDERATION_OBJECT_FETCH_DELAY", default=60 * 24 * 3
+)
diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index 1c0531983..3e6d892ae 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -202,6 +202,11 @@ class Artist(APIModelMixin):
         related_name="attributed_artists",
     )
     tagged_items = GenericRelation(tags_models.TaggedItem)
+    fetches = GenericRelation(
+        "federation.Fetch",
+        content_type_field="object_content_type",
+        object_id_field="object_id",
+    )
 
     api = musicbrainz.api.artists
     objects = ArtistQuerySet.as_manager()
@@ -282,6 +287,11 @@ class Album(APIModelMixin):
         related_name="attributed_albums",
     )
     tagged_items = GenericRelation(tags_models.TaggedItem)
+    fetches = GenericRelation(
+        "federation.Fetch",
+        content_type_field="object_content_type",
+        object_id_field="object_id",
+    )
 
     api_includes = ["artist-credits", "recordings", "media", "release-groups"]
     api = musicbrainz.api.releases
@@ -463,6 +473,11 @@ class Track(APIModelMixin):
     import_hooks = [import_tags]
     objects = TrackQuerySet.as_manager()
     tagged_items = GenericRelation(tags_models.TaggedItem)
+    fetches = GenericRelation(
+        "federation.Fetch",
+        content_type_field="object_content_type",
+        object_id_field="object_id",
+    )
 
     class Meta:
         ordering = ["album", "disc_number", "position"]
diff --git a/api/funkwhale_api/music/views.py b/api/funkwhale_api/music/views.py
index 34ec02632..15a2559da 100644
--- a/api/funkwhale_api/music/views.py
+++ b/api/funkwhale_api/music/views.py
@@ -1,3 +1,4 @@
+import datetime
 import logging
 import urllib
 
@@ -21,7 +22,9 @@ from funkwhale_api.federation.authentication import SignatureAuthentication
 from funkwhale_api.federation import actors
 from funkwhale_api.federation import api_serializers as federation_api_serializers
 from funkwhale_api.federation import decorators as federation_decorators
+from funkwhale_api.federation import models as federation_models
 from funkwhale_api.federation import routes
+from funkwhale_api.federation import tasks as federation_tasks
 from funkwhale_api.tags.models import Tag, TaggedItem
 from funkwhale_api.tags.serializers import TagSerializer
 from funkwhale_api.users.oauth import permissions as oauth_permissions
@@ -59,6 +62,37 @@ def get_libraries(filter_uploads):
     return libraries
 
 
+def refetch_obj(obj, queryset):
+    """
+    Given an Artist/Album/Track instance, if the instance is from a remote pod,
+    will attempt to update local data with the latest ActivityPub representation.
+    """
+    if obj.is_local:
+        return obj
+
+    now = timezone.now()
+    limit = now - datetime.timedelta(minutes=settings.FEDERATION_OBJECT_FETCH_DELAY)
+    last_fetch = obj.fetches.order_by("-creation_date").first()
+    if last_fetch is not None and last_fetch.creation_date > limit:
+        # we fetched recently, no need to do it again
+        return obj
+
+    logger.info("Refetching %s:%s at %s…", obj._meta.label, obj.pk, obj.fid)
+    actor = actors.get_service_actor()
+    fetch = federation_models.Fetch.objects.create(actor=actor, url=obj.fid, object=obj)
+    try:
+        federation_tasks.fetch(fetch_id=fetch.pk)
+    except Exception:
+        logger.exception(
+            "Error while refetching %s:%s at %s…", obj._meta.label, obj.pk, obj.fid
+        )
+    else:
+        fetch.refresh_from_db()
+        if fetch.status == "finished":
+            obj = queryset.get(pk=obj.pk)
+    return obj
+
+
 class ArtistViewSet(common_views.SkipFilterForGetObject, viewsets.ReadOnlyModelViewSet):
     queryset = models.Artist.objects.all().select_related("attributed_to")
     serializer_class = serializers.ArtistWithAlbumsSerializer
@@ -71,6 +105,16 @@ class ArtistViewSet(common_views.SkipFilterForGetObject, viewsets.ReadOnlyModelV
     fetches = federation_decorators.fetches_route()
     mutations = common_decorators.mutations_route(types=["update"])
 
+    def get_object(self):
+        obj = super().get_object()
+
+        if (
+            self.action == "retrieve"
+            and self.request.GET.get("refresh", "").lower() == "true"
+        ):
+            obj = refetch_obj(obj, self.get_queryset())
+        return obj
+
     def get_queryset(self):
         queryset = super().get_queryset()
         albums = models.Album.objects.with_tracks_count()
@@ -106,6 +150,16 @@ class AlbumViewSet(common_views.SkipFilterForGetObject, viewsets.ReadOnlyModelVi
     fetches = federation_decorators.fetches_route()
     mutations = common_decorators.mutations_route(types=["update"])
 
+    def get_object(self):
+        obj = super().get_object()
+
+        if (
+            self.action == "retrieve"
+            and self.request.GET.get("refresh", "").lower() == "true"
+        ):
+            obj = refetch_obj(obj, self.get_queryset())
+        return obj
+
     def get_queryset(self):
         queryset = super().get_queryset()
         tracks = (
@@ -212,6 +266,16 @@ class TrackViewSet(common_views.SkipFilterForGetObject, viewsets.ReadOnlyModelVi
     fetches = federation_decorators.fetches_route()
     mutations = common_decorators.mutations_route(types=["update"])
 
+    def get_object(self):
+        obj = super().get_object()
+
+        if (
+            self.action == "retrieve"
+            and self.request.GET.get("refresh", "").lower() == "true"
+        ):
+            obj = refetch_obj(obj, self.get_queryset())
+        return obj
+
     def get_queryset(self):
         queryset = super().get_queryset()
         filter_favorites = self.request.GET.get("favorites", None)
diff --git a/api/tests/music/test_views.py b/api/tests/music/test_views.py
index a5ef808f7..7382e4f78 100644
--- a/api/tests/music/test_views.py
+++ b/api/tests/music/test_views.py
@@ -1,7 +1,9 @@
+import datetime
 import io
 import magic
 import os
 import urllib.parse
+import uuid
 
 import pytest
 from django.urls import reverse
@@ -10,6 +12,7 @@ from django.utils import timezone
 from funkwhale_api.common import utils
 from funkwhale_api.federation import api_serializers as federation_api_serializers
 from funkwhale_api.federation import utils as federation_utils
+from funkwhale_api.federation import tasks as federation_tasks
 from funkwhale_api.music import licenses, models, serializers, tasks, views
 
 DATA_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -826,3 +829,94 @@ def test_oembed_artist(factories, no_api_auth, api_client, settings):
     response = api_client.get(url, {"url": artist_url, "format": "json"})
 
     assert response.data == expected
+
+
+@pytest.mark.parametrize(
+    "factory_name, url_name",
+    [
+        ("music.Artist", "api:v1:artists-detail"),
+        ("music.Album", "api:v1:albums-detail"),
+        ("music.Track", "api:v1:tracks-detail"),
+    ],
+)
+def test_refresh_remote_entity_when_param_is_true(
+    factories,
+    factory_name,
+    url_name,
+    mocker,
+    logged_in_api_client,
+    queryset_equal_queries,
+):
+    obj = factories[factory_name](mbid=None)
+
+    assert obj.is_local is False
+
+    new_mbid = uuid.uuid4()
+
+    def fake_refetch(obj, queryset):
+        obj.mbid = new_mbid
+        return obj
+
+    refetch_obj = mocker.patch.object(views, "refetch_obj", side_effect=fake_refetch)
+    url = reverse(url_name, kwargs={"pk": obj.pk})
+    response = logged_in_api_client.get(url, {"refresh": "true"})
+
+    assert response.status_code == 200
+    assert response.data["mbid"] == str(new_mbid)
+    assert refetch_obj.call_count == 1
+    assert refetch_obj.call_args[0][0] == obj
+
+
+@pytest.mark.parametrize("param", ["false", "0", ""])
+def test_refresh_remote_entity_no_param(
+    factories, param, mocker, logged_in_api_client, service_actor
+):
+    obj = factories["music.Artist"](mbid=None)
+
+    assert obj.is_local is False
+
+    fetch_task = mocker.patch.object(federation_tasks, "fetch")
+    url = reverse("api:v1:artists-detail", kwargs={"pk": obj.pk})
+    response = logged_in_api_client.get(url, {"refresh": param})
+
+    assert response.status_code == 200
+    fetch_task.assert_not_called()
+    assert service_actor.fetches.count() == 0
+
+
+def test_refetch_obj_not_local(mocker, factories, service_actor):
+    obj = factories["music.Artist"](local=True)
+    fetch_task = mocker.patch.object(federation_tasks, "fetch")
+    assert views.refetch_obj(obj, obj.__class__.objects.all()) == obj
+    fetch_task.assert_not_called()
+    assert service_actor.fetches.count() == 0
+
+
+def test_refetch_obj_last_fetch_date_too_close(
+    mocker, factories, settings, service_actor
+):
+    settings.FEDERATION_OBJECT_FETCH_DELAY = 300
+    obj = factories["music.Artist"]()
+    factories["federation.Fetch"](
+        object=obj,
+        creation_date=timezone.now()
+        - datetime.timedelta(minutes=settings.FEDERATION_OBJECT_FETCH_DELAY - 1),
+    )
+    fetch_task = mocker.patch.object(federation_tasks, "fetch")
+    assert views.refetch_obj(obj, obj.__class__.objects.all()) == obj
+    fetch_task.assert_not_called()
+    assert service_actor.fetches.count() == 0
+
+
+def test_refetch_obj(mocker, factories, settings, service_actor):
+    settings.FEDERATION_OBJECT_FETCH_DELAY = 300
+    obj = factories["music.Artist"]()
+    factories["federation.Fetch"](
+        object=obj,
+        creation_date=timezone.now()
+        - datetime.timedelta(minutes=settings.FEDERATION_OBJECT_FETCH_DELAY + 1),
+    )
+    fetch_task = mocker.patch.object(federation_tasks, "fetch")
+    views.refetch_obj(obj, obj.__class__.objects.all())
+    fetch = obj.fetches.filter(actor=service_actor).order_by("-creation_date").first()
+    fetch_task.assert_called_once_with(fetch_id=fetch.pk)
diff --git a/changes/changelog.d/refetch.enhancement b/changes/changelog.d/refetch.enhancement
new file mode 100644
index 000000000..f55facec8
--- /dev/null
+++ b/changes/changelog.d/refetch.enhancement
@@ -0,0 +1 @@
+Now refetch remote ActivityPub artists, albums and tracks to avoid local stale data
diff --git a/docs/swagger.yml b/docs/swagger.yml
index 2e3bf9245..8ea7d00a7 100644
--- a/docs/swagger.yml
+++ b/docs/swagger.yml
@@ -296,6 +296,7 @@ paths:
       summary: Retrieve a single artist
       parameters:
         - $ref: "#/parameters/ObjectId"
+        - $ref: "#/parameters/Refresh"
       security:
         - oauth2:
           - "read:libraries"
@@ -395,6 +396,7 @@ paths:
       summary: Retrieve a single album
       parameters:
         - $ref: "#/parameters/ObjectId"
+        - $ref: "#/parameters/Refresh"
 
       security:
         - oauth2:
@@ -518,6 +520,7 @@ paths:
     get:
       parameters:
         - $ref: "#/parameters/ObjectId"
+        - $ref: "#/parameters/Refresh"
       summary: Retrieve a single track
 
       security:
@@ -974,6 +977,14 @@ parameters:
     schema:
       required: false
       type: "boolean"
+  Refresh:
+    name: "refresh"
+    in: "query"
+    default: false
+    description: "Trigger an ActivityPub fetch to refresh local data"
+    schema:
+      required: false
+      type: "boolean"
 
 responses:
   200:
diff --git a/front/src/components/library/AlbumBase.vue b/front/src/components/library/AlbumBase.vue
index 083e5547b..9f050e286 100644
--- a/front/src/components/library/AlbumBase.vue
+++ b/front/src/components/library/AlbumBase.vue
@@ -146,7 +146,7 @@ export default {
       this.isLoading = true
       let url = FETCH_URL + this.id + "/"
       logger.default.debug('Fetching album "' + this.id + '"')
-      axios.get(url).then(response => {
+      axios.get(url, {params: {refresh: 'true'}}).then(response => {
         self.object = backend.Album.clean(response.data)
         self.discs = self.object.tracks.reduce(groupByDisc, [])
         self.isLoading = false
diff --git a/front/src/components/library/ArtistBase.vue b/front/src/components/library/ArtistBase.vue
index 5472ee307..602423f70 100644
--- a/front/src/components/library/ArtistBase.vue
+++ b/front/src/components/library/ArtistBase.vue
@@ -179,7 +179,7 @@ export default {
 
       })
 
-      let artistPromise = axios.get("artists/" + this.id + "/").then(response => {
+      let artistPromise = axios.get("artists/" + this.id + "/", {params: {refresh: 'true'}}).then(response => {
         self.object = response.data
       })
       await trackPromise
diff --git a/front/src/components/library/TrackBase.vue b/front/src/components/library/TrackBase.vue
index 32942958b..17a739d42 100644
--- a/front/src/components/library/TrackBase.vue
+++ b/front/src/components/library/TrackBase.vue
@@ -155,7 +155,7 @@ export default {
       this.isLoadingTrack = true
       let url = FETCH_URL + this.id + "/"
       logger.default.debug('Fetching track "' + this.id + '"')
-      axios.get(url).then(response => {
+      axios.get(url, {params: {refresh: 'true'}}).then(response => {
         self.track = response.data
         self.isLoadingTrack = false
       })
-- 
GitLab