Skip to content
Snippets Groups Projects
Verified Commit d9dd69f0 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Initial poc with musicbrainz

parent 2da29ea5
No related branches found
No related tags found
No related merge requests found
...@@ -219,6 +219,9 @@ ALLOWED_HOSTS = env.list("DJANGO_ALLOWED_HOSTS", default=["retribute.me"]) ...@@ -219,6 +219,9 @@ ALLOWED_HOSTS = env.list("DJANGO_ALLOWED_HOSTS", default=["retribute.me"])
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
CACHES = {"default": env.cache()} CACHES = {"default": env.cache()}
CACHE_DEFAULT_EXPIRATION = env.int("CACHE_DEFAULT_EXPIRATION", default=60 * 60 * 12) CACHE_DEFAULT_EXPIRATION = env.int("CACHE_DEFAULT_EXPIRATION", default=60 * 60 * 12)
CACHE_MUSICBRAINZ_EXPIRATION = env.int(
"CACHE_MUSICBRAINZ_EXPIRATION", default=60 * 60 * 24
)
ASYNC_REDIS_PARAMS = {"address": CACHES["default"]["LOCATION"]} ASYNC_REDIS_PARAMS = {"address": CACHES["default"]["LOCATION"]}
CHANNEL_LAYERS = { CHANNEL_LAYERS = {
"default": { "default": {
...@@ -227,6 +230,10 @@ CHANNEL_LAYERS = { ...@@ -227,6 +230,10 @@ CHANNEL_LAYERS = {
} }
} }
MUSICBRAINZ_MAX_CONCURRENT_REQUESTS = env.int(
"MUSICBRAINZ_MAX_CONCURRENT_REQUESTS", default=1
)
# SECURITY # SECURITY
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#secure-proxy-ssl-header # https://docs.djangoproject.com/en/dev/ref/settings/#secure-proxy-ssl-header
......
...@@ -12,7 +12,7 @@ class Backend: ...@@ -12,7 +12,7 @@ class Backend:
async def get(self, key): async def get(self, key):
raise NotImplementedError raise NotImplementedError
async def set(self, key): async def set(self, key, expire=None):
raise NotImplementedError raise NotImplementedError
async def close(self): async def close(self):
...@@ -29,7 +29,7 @@ class Dummy(Backend): ...@@ -29,7 +29,7 @@ class Dummy(Backend):
except KeyError: except KeyError:
raise self.NotFound(key) raise self.NotFound(key)
async def set(self, key, value): async def set(self, key, value, expire=None):
self._cache[key] = value self._cache[key] = value
......
import lxml
from django.conf import settings
async def lookup(name, session, cache):
try:
cached = await cache.get("musicbrainz:artist:{}".format(name))
return lxml.etree.fromstring(cached.encode())
except cache.NotFound:
pass
async with session.get(
"https://musicbrainz.org/ws/2/artist/{}".format(name),
params={"inc": "url-rels"},
headers={
"User-Agent": "Retribute.me/0.1 (https://dev.funkwhale.audio/retribute.me/api)"
},
) as response:
response.raise_for_status()
text = await response.read()
await cache.set(
"musicbrainz:artist:{}".format(name),
text.decode(),
expire=settings.CACHE_MUSICBRAINZ_EXPIRATION,
)
return lxml.etree.fromstring(text)
def extract_urls_from_element(el):
data = []
xpath_expr = "//*[local-name()='relation-list'][@target-type='url']//*[local-name()='target']"
for target in el.xpath(xpath_expr):
parent = target.getparent()
type = parent.get("type")
url = target.text
data.append({"summary": type, "url": url})
return data
...@@ -6,6 +6,7 @@ from .. import providers ...@@ -6,6 +6,7 @@ from .. import providers
from . import activitypub from . import activitypub
from . import exceptions from . import exceptions
from . import means from . import means
from . import musicbrainz
from . import webfinger from . import webfinger
...@@ -81,6 +82,18 @@ class Webfinger(Source): ...@@ -81,6 +82,18 @@ class Webfinger(Source):
return found return found
@registry.register
class MusicBrainz(Source):
id = "musicbrainz"
async def get(self, lookup, session, cache):
el = await musicbrainz.lookup(lookup, session, cache)
data = {"links": musicbrainz.extract_urls_from_element(el)}
return data
def result_to_retribute_profile(lookup_type, lookup, data): def result_to_retribute_profile(lookup_type, lookup, data):
path = settings.BASE_URL + "/compat" path = settings.BASE_URL + "/compat"
now = timezone.now() now = timezone.now()
......
import lxml
from retribute_api.search import musicbrainz
async def test_artist_lookup(settings, responses, session, dummycache, mocker):
name = "7f034c95-968a-4dd8-9869-30efeee20d32"
musicbrainz_response = """<?xml version="1.0" encoding="UTF-8"?>
<metadata xmlns="http://musicbrainz.org/ns/mmd-2.0#">
<artist type-id="b6e035f4-3ce9-331c-97df-83397230b0df" id="7f034c95-968a-4dd8-9869-30efeee20d32" type="Person">
<name>3D63</name>
<sort-name>3D63</sort-name>
<area id="6c900f36-1d2c-4194-ba96-8e9baae30af3">
<name>Valenciennes</name>
<sort-name>Valenciennes</sort-name>
</area>
<relation-list target-type="url">
<relation type="bandcamp" type-id="c550166e-0548-4a18-b1d4-e2ae423a3e88">
<target id="dcf566a0-9bac-4d75-8fee-565078a94310">https://3d63.bandcamp.com/</target>
</relation>
<relation type="official homepage" type-id="fe33d22f-c3b0-4d68-bd53-a856badf2b15">
<target id="b1a4fad6-289f-43ed-aea6-82d9b508f52e">https://3d63.net/</target>
</relation>
<relation type="soundcloud" type-id="89e4a949-0976-440d-bda1-5f772c1e5710">
<target id="9bd050c4-404b-4fcf-bb30-745954b74333">https://soundcloud.com/3d63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="217419e3-f62b-4b1e-9fe1-46b816c3407b">https://twitter.com/0x3D63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="ce3d16ce-2062-4d09-b506-88ec249e8ee4">https://www.facebook.com/0x3D63</target>
</relation>
</relation-list>
</artist>
</metadata>
"""
responses.get(
"https://musicbrainz.org/ws/2/artist/{}?inc=url-rels".format(name),
body=musicbrainz_response,
)
cache_get = mocker.spy(dummycache, "get")
cache_set = mocker.spy(dummycache, "set")
response = await musicbrainz.lookup(name, session, cache=dummycache)
cache_get.assert_called_once_with("musicbrainz:artist:{}".format(name))
cache_set.assert_called_once_with(
"musicbrainz:artist:{}".format(name),
musicbrainz_response,
expire=settings.CACHE_MUSICBRAINZ_EXPIRATION,
)
assert lxml.etree.tostring(response) == lxml.etree.tostring(
lxml.etree.fromstring(musicbrainz_response.encode())
)
def test_extract_urls_from_element():
musicbrainz_response = """<?xml version="1.0" encoding="UTF-8"?>
<metadata xmlns="http://musicbrainz.org/ns/mmd-2.0#">
<artist type-id="b6e035f4-3ce9-331c-97df-83397230b0df" id="7f034c95-968a-4dd8-9869-30efeee20d32" type="Person">
<name>3D63</name>
<sort-name>3D63</sort-name>
<area id="6c900f36-1d2c-4194-ba96-8e9baae30af3">
<name>Valenciennes</name>
<sort-name>Valenciennes</sort-name>
</area>
<relation-list target-type="url">
<relation type="bandcamp" type-id="c550166e-0548-4a18-b1d4-e2ae423a3e88">
<target id="dcf566a0-9bac-4d75-8fee-565078a94310">https://3d63.bandcamp.com/</target>
</relation>
<relation type="official homepage" type-id="fe33d22f-c3b0-4d68-bd53-a856badf2b15">
<target id="b1a4fad6-289f-43ed-aea6-82d9b508f52e">https://3d63.net/</target>
</relation>
<relation type="soundcloud" type-id="89e4a949-0976-440d-bda1-5f772c1e5710">
<target id="9bd050c4-404b-4fcf-bb30-745954b74333">https://soundcloud.com/3d63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="217419e3-f62b-4b1e-9fe1-46b816c3407b">https://twitter.com/0x3D63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="ce3d16ce-2062-4d09-b506-88ec249e8ee4">https://www.facebook.com/0x3D63</target>
</relation>
</relation-list>
</artist>
</metadata>
"""
el = lxml.etree.fromstring(musicbrainz_response.encode())
expected = [
{"summary": "bandcamp", "url": "https://3d63.bandcamp.com/"},
{"summary": "official homepage", "url": "https://3d63.net/"},
{"summary": "soundcloud", "url": "https://soundcloud.com/3d63"},
{"summary": "social network", "url": "https://twitter.com/0x3D63"},
{"summary": "social network", "url": "https://www.facebook.com/0x3D63"},
]
assert musicbrainz.extract_urls_from_element(el) == expected
...@@ -65,6 +65,66 @@ async def test_webfinger_source(mocker, session, responses, dummycache): ...@@ -65,6 +65,66 @@ async def test_webfinger_source(mocker, session, responses, dummycache):
assert result == expected assert result == expected
async def test_musicbrainz_source(mocker, session, responses, dummycache, settings):
name = "7f034c95-968a-4dd8-9869-30efeee20d32"
musicbrainz_response = """<?xml version="1.0" encoding="UTF-8"?>
<metadata xmlns="http://musicbrainz.org/ns/mmd-2.0#">
<artist type-id="b6e035f4-3ce9-331c-97df-83397230b0df" id="7f034c95-968a-4dd8-9869-30efeee20d32" type="Person">
<name>3D63</name>
<sort-name>3D63</sort-name>
<area id="6c900f36-1d2c-4194-ba96-8e9baae30af3">
<name>Valenciennes</name>
<sort-name>Valenciennes</sort-name>
</area>
<relation-list target-type="url">
<relation type="bandcamp" type-id="c550166e-0548-4a18-b1d4-e2ae423a3e88">
<target id="dcf566a0-9bac-4d75-8fee-565078a94310">https://3d63.bandcamp.com/</target>
</relation>
<relation type="official homepage" type-id="fe33d22f-c3b0-4d68-bd53-a856badf2b15">
<target id="b1a4fad6-289f-43ed-aea6-82d9b508f52e">https://3d63.net/</target>
</relation>
<relation type="soundcloud" type-id="89e4a949-0976-440d-bda1-5f772c1e5710">
<target id="9bd050c4-404b-4fcf-bb30-745954b74333">https://soundcloud.com/3d63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="217419e3-f62b-4b1e-9fe1-46b816c3407b">https://twitter.com/0x3D63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="ce3d16ce-2062-4d09-b506-88ec249e8ee4">https://www.facebook.com/0x3D63</target>
</relation>
</relation-list>
</artist>
</metadata>
"""
responses.get(
"https://musicbrainz.org/ws/2/artist/{}?inc=url-rels".format(name),
body=musicbrainz_response,
)
expected = {
"links": [
{"summary": "bandcamp", "url": "https://3d63.bandcamp.com/"},
{"summary": "official homepage", "url": "https://3d63.net/"},
{"summary": "soundcloud", "url": "https://soundcloud.com/3d63"},
{"summary": "social network", "url": "https://twitter.com/0x3D63"},
{"summary": "social network", "url": "https://www.facebook.com/0x3D63"},
]
}
source = sources.MusicBrainz()
cache_get = mocker.spy(dummycache, "get")
cache_set = mocker.spy(dummycache, "set")
result = await source.get(name, session, cache=dummycache)
cache_get.assert_any_call("musicbrainz:artist:{}".format(name))
cache_set.assert_any_call(
"musicbrainz:artist:{}".format(name),
musicbrainz_response,
expire=settings.CACHE_MUSICBRAINZ_EXPIRATION,
)
assert result == expected
def test_source_result_to_retribute_profile(settings, now): def test_source_result_to_retribute_profile(settings, now):
settings.BASE_URL = "https://retribute.me.test" settings.BASE_URL = "https://retribute.me.test"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment