Verified Commit d9dd69f0 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Initial poc with musicbrainz

parent 2da29ea5
......@@ -219,6 +219,9 @@ ALLOWED_HOSTS = env.list("DJANGO_ALLOWED_HOSTS", default=["retribute.me"])
# ------------------------------------------------------------------------------
CACHES = {"default": env.cache()}
CACHE_DEFAULT_EXPIRATION = env.int("CACHE_DEFAULT_EXPIRATION", default=60 * 60 * 12)
CACHE_MUSICBRAINZ_EXPIRATION = env.int(
"CACHE_MUSICBRAINZ_EXPIRATION", default=60 * 60 * 24
)
ASYNC_REDIS_PARAMS = {"address": CACHES["default"]["LOCATION"]}
CHANNEL_LAYERS = {
"default": {
......@@ -227,6 +230,10 @@ CHANNEL_LAYERS = {
}
}
MUSICBRAINZ_MAX_CONCURRENT_REQUESTS = env.int(
"MUSICBRAINZ_MAX_CONCURRENT_REQUESTS", default=1
)
# SECURITY
# ------------------------------------------------------------------------------
# https://docs.djangoproject.com/en/dev/ref/settings/#secure-proxy-ssl-header
......
......@@ -12,7 +12,7 @@ class Backend:
async def get(self, key):
raise NotImplementedError
async def set(self, key):
async def set(self, key, expire=None):
raise NotImplementedError
async def close(self):
......@@ -29,7 +29,7 @@ class Dummy(Backend):
except KeyError:
raise self.NotFound(key)
async def set(self, key, value):
async def set(self, key, value, expire=None):
self._cache[key] = value
......
import lxml
from django.conf import settings
async def lookup(name, session, cache):
try:
cached = await cache.get("musicbrainz:artist:{}".format(name))
return lxml.etree.fromstring(cached.encode())
except cache.NotFound:
pass
async with session.get(
"https://musicbrainz.org/ws/2/artist/{}".format(name),
params={"inc": "url-rels"},
headers={
"User-Agent": "Retribute.me/0.1 (https://dev.funkwhale.audio/retribute.me/api)"
},
) as response:
response.raise_for_status()
text = await response.read()
await cache.set(
"musicbrainz:artist:{}".format(name),
text.decode(),
expire=settings.CACHE_MUSICBRAINZ_EXPIRATION,
)
return lxml.etree.fromstring(text)
def extract_urls_from_element(el):
data = []
xpath_expr = "//*[local-name()='relation-list'][@target-type='url']//*[local-name()='target']"
for target in el.xpath(xpath_expr):
parent = target.getparent()
type = parent.get("type")
url = target.text
data.append({"summary": type, "url": url})
return data
......@@ -6,6 +6,7 @@ from .. import providers
from . import activitypub
from . import exceptions
from . import means
from . import musicbrainz
from . import webfinger
......@@ -81,6 +82,18 @@ class Webfinger(Source):
return found
@registry.register
class MusicBrainz(Source):
id = "musicbrainz"
async def get(self, lookup, session, cache):
el = await musicbrainz.lookup(lookup, session, cache)
data = {"links": musicbrainz.extract_urls_from_element(el)}
return data
def result_to_retribute_profile(lookup_type, lookup, data):
path = settings.BASE_URL + "/compat"
now = timezone.now()
......
import lxml
from retribute_api.search import musicbrainz
async def test_artist_lookup(settings, responses, session, dummycache, mocker):
name = "7f034c95-968a-4dd8-9869-30efeee20d32"
musicbrainz_response = """<?xml version="1.0" encoding="UTF-8"?>
<metadata xmlns="http://musicbrainz.org/ns/mmd-2.0#">
<artist type-id="b6e035f4-3ce9-331c-97df-83397230b0df" id="7f034c95-968a-4dd8-9869-30efeee20d32" type="Person">
<name>3D63</name>
<sort-name>3D63</sort-name>
<area id="6c900f36-1d2c-4194-ba96-8e9baae30af3">
<name>Valenciennes</name>
<sort-name>Valenciennes</sort-name>
</area>
<relation-list target-type="url">
<relation type="bandcamp" type-id="c550166e-0548-4a18-b1d4-e2ae423a3e88">
<target id="dcf566a0-9bac-4d75-8fee-565078a94310">https://3d63.bandcamp.com/</target>
</relation>
<relation type="official homepage" type-id="fe33d22f-c3b0-4d68-bd53-a856badf2b15">
<target id="b1a4fad6-289f-43ed-aea6-82d9b508f52e">https://3d63.net/</target>
</relation>
<relation type="soundcloud" type-id="89e4a949-0976-440d-bda1-5f772c1e5710">
<target id="9bd050c4-404b-4fcf-bb30-745954b74333">https://soundcloud.com/3d63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="217419e3-f62b-4b1e-9fe1-46b816c3407b">https://twitter.com/0x3D63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="ce3d16ce-2062-4d09-b506-88ec249e8ee4">https://www.facebook.com/0x3D63</target>
</relation>
</relation-list>
</artist>
</metadata>
"""
responses.get(
"https://musicbrainz.org/ws/2/artist/{}?inc=url-rels".format(name),
body=musicbrainz_response,
)
cache_get = mocker.spy(dummycache, "get")
cache_set = mocker.spy(dummycache, "set")
response = await musicbrainz.lookup(name, session, cache=dummycache)
cache_get.assert_called_once_with("musicbrainz:artist:{}".format(name))
cache_set.assert_called_once_with(
"musicbrainz:artist:{}".format(name),
musicbrainz_response,
expire=settings.CACHE_MUSICBRAINZ_EXPIRATION,
)
assert lxml.etree.tostring(response) == lxml.etree.tostring(
lxml.etree.fromstring(musicbrainz_response.encode())
)
def test_extract_urls_from_element():
musicbrainz_response = """<?xml version="1.0" encoding="UTF-8"?>
<metadata xmlns="http://musicbrainz.org/ns/mmd-2.0#">
<artist type-id="b6e035f4-3ce9-331c-97df-83397230b0df" id="7f034c95-968a-4dd8-9869-30efeee20d32" type="Person">
<name>3D63</name>
<sort-name>3D63</sort-name>
<area id="6c900f36-1d2c-4194-ba96-8e9baae30af3">
<name>Valenciennes</name>
<sort-name>Valenciennes</sort-name>
</area>
<relation-list target-type="url">
<relation type="bandcamp" type-id="c550166e-0548-4a18-b1d4-e2ae423a3e88">
<target id="dcf566a0-9bac-4d75-8fee-565078a94310">https://3d63.bandcamp.com/</target>
</relation>
<relation type="official homepage" type-id="fe33d22f-c3b0-4d68-bd53-a856badf2b15">
<target id="b1a4fad6-289f-43ed-aea6-82d9b508f52e">https://3d63.net/</target>
</relation>
<relation type="soundcloud" type-id="89e4a949-0976-440d-bda1-5f772c1e5710">
<target id="9bd050c4-404b-4fcf-bb30-745954b74333">https://soundcloud.com/3d63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="217419e3-f62b-4b1e-9fe1-46b816c3407b">https://twitter.com/0x3D63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="ce3d16ce-2062-4d09-b506-88ec249e8ee4">https://www.facebook.com/0x3D63</target>
</relation>
</relation-list>
</artist>
</metadata>
"""
el = lxml.etree.fromstring(musicbrainz_response.encode())
expected = [
{"summary": "bandcamp", "url": "https://3d63.bandcamp.com/"},
{"summary": "official homepage", "url": "https://3d63.net/"},
{"summary": "soundcloud", "url": "https://soundcloud.com/3d63"},
{"summary": "social network", "url": "https://twitter.com/0x3D63"},
{"summary": "social network", "url": "https://www.facebook.com/0x3D63"},
]
assert musicbrainz.extract_urls_from_element(el) == expected
......@@ -65,6 +65,66 @@ async def test_webfinger_source(mocker, session, responses, dummycache):
assert result == expected
async def test_musicbrainz_source(mocker, session, responses, dummycache, settings):
name = "7f034c95-968a-4dd8-9869-30efeee20d32"
musicbrainz_response = """<?xml version="1.0" encoding="UTF-8"?>
<metadata xmlns="http://musicbrainz.org/ns/mmd-2.0#">
<artist type-id="b6e035f4-3ce9-331c-97df-83397230b0df" id="7f034c95-968a-4dd8-9869-30efeee20d32" type="Person">
<name>3D63</name>
<sort-name>3D63</sort-name>
<area id="6c900f36-1d2c-4194-ba96-8e9baae30af3">
<name>Valenciennes</name>
<sort-name>Valenciennes</sort-name>
</area>
<relation-list target-type="url">
<relation type="bandcamp" type-id="c550166e-0548-4a18-b1d4-e2ae423a3e88">
<target id="dcf566a0-9bac-4d75-8fee-565078a94310">https://3d63.bandcamp.com/</target>
</relation>
<relation type="official homepage" type-id="fe33d22f-c3b0-4d68-bd53-a856badf2b15">
<target id="b1a4fad6-289f-43ed-aea6-82d9b508f52e">https://3d63.net/</target>
</relation>
<relation type="soundcloud" type-id="89e4a949-0976-440d-bda1-5f772c1e5710">
<target id="9bd050c4-404b-4fcf-bb30-745954b74333">https://soundcloud.com/3d63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="217419e3-f62b-4b1e-9fe1-46b816c3407b">https://twitter.com/0x3D63</target>
</relation>
<relation type="social network" type-id="99429741-f3f6-484b-84f8-23af51991770">
<target id="ce3d16ce-2062-4d09-b506-88ec249e8ee4">https://www.facebook.com/0x3D63</target>
</relation>
</relation-list>
</artist>
</metadata>
"""
responses.get(
"https://musicbrainz.org/ws/2/artist/{}?inc=url-rels".format(name),
body=musicbrainz_response,
)
expected = {
"links": [
{"summary": "bandcamp", "url": "https://3d63.bandcamp.com/"},
{"summary": "official homepage", "url": "https://3d63.net/"},
{"summary": "soundcloud", "url": "https://soundcloud.com/3d63"},
{"summary": "social network", "url": "https://twitter.com/0x3D63"},
{"summary": "social network", "url": "https://www.facebook.com/0x3D63"},
]
}
source = sources.MusicBrainz()
cache_get = mocker.spy(dummycache, "get")
cache_set = mocker.spy(dummycache, "set")
result = await source.get(name, session, cache=dummycache)
cache_get.assert_any_call("musicbrainz:artist:{}".format(name))
cache_set.assert_any_call(
"musicbrainz:artist:{}".format(name),
musicbrainz_response,
expire=settings.CACHE_MUSICBRAINZ_EXPIRATION,
)
assert result == expected
def test_source_result_to_retribute_profile(settings, now):
settings.BASE_URL = "https://retribute.me.test"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment