diff --git a/funkwhale_network/crawler.py b/funkwhale_network/crawler.py new file mode 100644 index 0000000000000000000000000000000000000000..cb9a1a1fb2b8353dffaec6e11cebdcd158a95135 --- /dev/null +++ b/funkwhale_network/crawler.py @@ -0,0 +1,26 @@ +from . import schemas + + +async def fetch(session, domain): + nodeinfo = await get_well_known_data(session, domain) + data = await get_nodeinfo(session, nodeinfo) + return data + + +async def get_well_known_data(session, domain, protocol="https"): + url = f"https://{domain}/.well-known/nodeinfo" + response = await session.get(url) + return await response.json() + + +async def get_nodeinfo(session, nodeinfo): + for link in nodeinfo.get("links", []): + if link["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0": + response = await session.get(link["href"]) + return await response.json() + raise + + +def clean_nodeinfo(data): + schema = schemas.NodeInfo2Schema() + return schema.load(data) diff --git a/funkwhale_network/schemas.py b/funkwhale_network/schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..01db987db5b6421d260a074c12e9b736ff35841d --- /dev/null +++ b/funkwhale_network/schemas.py @@ -0,0 +1,82 @@ +import marshmallow +import semver +import re + + +class VersionField(marshmallow.fields.Str): + def deserialize(self, value, *args, **kwargs): + value = super().deserialize(value, *args, **kwargs) + try: + return semver.parse(value) + except ValueError: + # funkwhale does not always include the patch version, so we add the 0 ourself and + # try again + try: + v_regex = r"(\d+\.\d+)" + match = re.match(v_regex, value) + if match and match[0]: + new_version = f"{match[0]}.0" + return semver.parse(value.replace(match[0], new_version, 1)) + raise ValueError() + except (ValueError, IndexError): + raise marshmallow.ValidationError( + f"{value} is not a semver version number" + ) + return value + + +class SoftwareSchema(marshmallow.Schema): + name = marshmallow.fields.String( + required=True, validate=[marshmallow.validate.OneOf(["funkwhale", "Funkwhale"])] + ) + version = VersionField(required=True) + + +""" +"openRegistrations": False, + "usage": {"users": {"total": 78}}, + "metadata": { + "private": False, + "nodeName": "Funkwhale 101", + "library": { + "federationEnabled": True, + "federationNeedsApproval": True, + "anonymousCanListen": True, + "tracks": {"total": 98552}, + "artists": {"total": 9831}, + "albums": {"total": 10872}, + "music": {"hours": 7650.678055555555}, + }, + "usage": { + "favorites": {"tracks": {"total": 1683}}, + "listenings": {"total": 50294}, + }, + }, + } +""" + + +class StatisticsSchema(marshmallow.Schema): + total = marshmallow.fields.Integer(required=False) + + +class UsageSchema(marshmallow.Schema): + users = marshmallow.fields.Nested(StatisticsSchema, required=False) + + +class LibraryMetadataSchema(marshmallow.Schema): + anonymousCanListen = marshmallow.fields.Boolean(required=True) + federationEnabled = marshmallow.fields.Boolean(required=True) + + +class MetadataSchema(marshmallow.Schema): + nodeName = marshmallow.fields.String(required=True) + private = marshmallow.fields.Boolean(required=True) + library = usage = marshmallow.fields.Nested(LibraryMetadataSchema, required=False) + + +class NodeInfo2Schema(marshmallow.Schema): + software = marshmallow.fields.Nested(SoftwareSchema) + openRegistrations = marshmallow.fields.Boolean(required=True) + usage = marshmallow.fields.Nested(UsageSchema, required=False) + metadata = marshmallow.fields.Nested(MetadataSchema, required=False) diff --git a/setup.cfg b/setup.cfg index c939b30a154a41477faa3b4d26e8cf4ab6f139c7..248fc8c5d387ad0030a78ba950d51c2e9d94d545 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,6 +21,8 @@ install_requires = aiopg aiohttp arq + marshmallow + semver [options.entry_points] console_scripts = @@ -30,6 +32,7 @@ console_scripts = dev = ipdb pytest pytest-mock + aioresponses [options.packages.find] exclude = diff --git a/tests/conftest.py b/tests/conftest.py index c29543adfa4516e1d3c3272143e228b03f8a8677..f6600d7cb096ab6f5e2ded95c067d18281c4e7df 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,15 +1,18 @@ +import os + import pytest -from aiohttp import web +import aiohttp +from aioresponses import aioresponses + import funkwhale_network from funkwhale_network import db -import os pytest_plugins = "aiohttp.pytest_plugin" @pytest.fixture def client(loop, aiohttp_client, populated_db, db_pool): - app = web.Application(middlewares=funkwhale_network.MIDDLEWARES) + app = aiohttp.web.Application(middlewares=funkwhale_network.MIDDLEWARES) funkwhale_network.prepare_app(app, pool=db_pool) yield loop.run_until_complete(aiohttp_client(app)) @@ -38,3 +41,15 @@ async def populated_db(db_pool): await db.create(conn) yield conn await db.clear(conn) + + +@pytest.fixture +def responses(): + with aioresponses() as m: + yield m + + +@pytest.fixture +async def session(loop): + async with aiohttp.ClientSession() as session: + yield session diff --git a/tests/test_crawler.py b/tests/test_crawler.py new file mode 100644 index 0000000000000000000000000000000000000000..ddcc226b630c81a59d25ce8e6e9bc1408299a1ec --- /dev/null +++ b/tests/test_crawler.py @@ -0,0 +1,67 @@ +from funkwhale_network import crawler + + +async def test_fetch(session, responses): + domain = "test.domain" + well_known_payload = { + "links": [ + { + "rel": "http://nodeinfo.diaspora.software/ns/schema/2.0", + "href": "https://test.domain/nodeinfo/2.0/", + } + ] + } + payload = {"hello": "world"} + responses.get( + "https://test.domain/.well-known/nodeinfo", payload=well_known_payload + ) + responses.get("https://test.domain/nodeinfo/2.0/", payload=payload) + result = await crawler.fetch(session, domain) + assert result == payload + + +def test_validate_data(populated_db): + payload = { + "version": "2.0", + "software": {"name": "funkwhale", "version": "0.18-dev+git.b575999e"}, + "openRegistrations": False, + "usage": {"users": {"total": 78}}, + "metadata": { + "private": False, + "nodeName": "Funkwhale 101", + "library": { + "federationEnabled": True, + "federationNeedsApproval": True, + "anonymousCanListen": True, + "tracks": {"total": 98552}, + "artists": {"total": 9831}, + "albums": {"total": 10872}, + "music": {"hours": 7650.678055555555}, + }, + "usage": { + "favorites": {"tracks": {"total": 1683}}, + "listenings": {"total": 50294}, + }, + }, + } + expected = { + "software": { + "name": "funkwhale", + "version": { + "major": 0, + "minor": 18, + "patch": 0, + "prerelease": "dev", + "build": "git.b575999e", + }, + }, + "openRegistrations": False, + "usage": {"users": {"total": 78}}, + "metadata": { + "private": False, + "nodeName": "Funkwhale 101", + "library": {"federationEnabled": True, "anonymousCanListen": True}, + }, + } + result = crawler.clean_nodeinfo(payload) + assert result.data == expected diff --git a/tests/test_schemas.py b/tests/test_schemas.py new file mode 100644 index 0000000000000000000000000000000000000000..c3f21d888940a47b45b7d0d8aff6bba1f35c80ef --- /dev/null +++ b/tests/test_schemas.py @@ -0,0 +1,46 @@ +import pytest + +from funkwhale_network import schemas + + +@pytest.mark.parametrize( + "value, expected", + [ + ( + "1.2.3-dev+build-1", + { + "major": 1, + "minor": 2, + "patch": 3, + "prerelease": "dev", + "build": "build-1", + }, + ), + ( + "1.2-dev+build-1", + { + "major": 1, + "minor": 2, + "patch": 0, + "prerelease": "dev", + "build": "build-1", + }, + ), + ( + "1.2+build-1", + { + "major": 1, + "minor": 2, + "patch": 0, + "prerelease": None, + "build": "build-1", + }, + ), + ( + "1.2", + {"major": 1, "minor": 2, "patch": 0, "prerelease": None, "build": None}, + ), + ], +) +def test_validate_version_number(value, expected): + assert schemas.VersionField().deserialize(value) == expected