diff --git a/funkwhale_network/crawler.py b/funkwhale_network/crawler.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb9a1a1fb2b8353dffaec6e11cebdcd158a95135
--- /dev/null
+++ b/funkwhale_network/crawler.py
@@ -0,0 +1,26 @@
+from . import schemas
+
+
+async def fetch(session, domain):
+    nodeinfo = await get_well_known_data(session, domain)
+    data = await get_nodeinfo(session, nodeinfo)
+    return data
+
+
+async def get_well_known_data(session, domain, protocol="https"):
+    url = f"https://{domain}/.well-known/nodeinfo"
+    response = await session.get(url)
+    return await response.json()
+
+
+async def get_nodeinfo(session, nodeinfo):
+    for link in nodeinfo.get("links", []):
+        if link["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0":
+            response = await session.get(link["href"])
+            return await response.json()
+    raise
+
+
+def clean_nodeinfo(data):
+    schema = schemas.NodeInfo2Schema()
+    return schema.load(data)
diff --git a/funkwhale_network/schemas.py b/funkwhale_network/schemas.py
new file mode 100644
index 0000000000000000000000000000000000000000..01db987db5b6421d260a074c12e9b736ff35841d
--- /dev/null
+++ b/funkwhale_network/schemas.py
@@ -0,0 +1,82 @@
+import marshmallow
+import semver
+import re
+
+
+class VersionField(marshmallow.fields.Str):
+    def deserialize(self, value, *args, **kwargs):
+        value = super().deserialize(value, *args, **kwargs)
+        try:
+            return semver.parse(value)
+        except ValueError:
+            # funkwhale does not always include the patch version, so we add the 0 ourself and
+            # try again
+            try:
+                v_regex = r"(\d+\.\d+)"
+                match = re.match(v_regex, value)
+                if match and match[0]:
+                    new_version = f"{match[0]}.0"
+                    return semver.parse(value.replace(match[0], new_version, 1))
+                raise ValueError()
+            except (ValueError, IndexError):
+                raise marshmallow.ValidationError(
+                    f"{value} is not a semver version number"
+                )
+        return value
+
+
+class SoftwareSchema(marshmallow.Schema):
+    name = marshmallow.fields.String(
+        required=True, validate=[marshmallow.validate.OneOf(["funkwhale", "Funkwhale"])]
+    )
+    version = VersionField(required=True)
+
+
+"""
+"openRegistrations": False,
+        "usage": {"users": {"total": 78}},
+        "metadata": {
+            "private": False,
+            "nodeName": "Funkwhale 101",
+            "library": {
+                "federationEnabled": True,
+                "federationNeedsApproval": True,
+                "anonymousCanListen": True,
+                "tracks": {"total": 98552},
+                "artists": {"total": 9831},
+                "albums": {"total": 10872},
+                "music": {"hours": 7650.678055555555},
+            },
+            "usage": {
+                "favorites": {"tracks": {"total": 1683}},
+                "listenings": {"total": 50294},
+            },
+        },
+    }
+"""
+
+
+class StatisticsSchema(marshmallow.Schema):
+    total = marshmallow.fields.Integer(required=False)
+
+
+class UsageSchema(marshmallow.Schema):
+    users = marshmallow.fields.Nested(StatisticsSchema, required=False)
+
+
+class LibraryMetadataSchema(marshmallow.Schema):
+    anonymousCanListen = marshmallow.fields.Boolean(required=True)
+    federationEnabled = marshmallow.fields.Boolean(required=True)
+
+
+class MetadataSchema(marshmallow.Schema):
+    nodeName = marshmallow.fields.String(required=True)
+    private = marshmallow.fields.Boolean(required=True)
+    library = usage = marshmallow.fields.Nested(LibraryMetadataSchema, required=False)
+
+
+class NodeInfo2Schema(marshmallow.Schema):
+    software = marshmallow.fields.Nested(SoftwareSchema)
+    openRegistrations = marshmallow.fields.Boolean(required=True)
+    usage = marshmallow.fields.Nested(UsageSchema, required=False)
+    metadata = marshmallow.fields.Nested(MetadataSchema, required=False)
diff --git a/setup.cfg b/setup.cfg
index c939b30a154a41477faa3b4d26e8cf4ab6f139c7..248fc8c5d387ad0030a78ba950d51c2e9d94d545 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -21,6 +21,8 @@ install_requires =
     aiopg
     aiohttp
     arq
+    marshmallow
+    semver
 
 [options.entry_points]
 console_scripts =
@@ -30,6 +32,7 @@ console_scripts =
 dev = ipdb
     pytest
     pytest-mock
+    aioresponses
 
 [options.packages.find]
 exclude =
diff --git a/tests/conftest.py b/tests/conftest.py
index c29543adfa4516e1d3c3272143e228b03f8a8677..f6600d7cb096ab6f5e2ded95c067d18281c4e7df 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,15 +1,18 @@
+import os
+
 import pytest
-from aiohttp import web
+import aiohttp
+from aioresponses import aioresponses
+
 import funkwhale_network
 from funkwhale_network import db
-import os
 
 pytest_plugins = "aiohttp.pytest_plugin"
 
 
 @pytest.fixture
 def client(loop, aiohttp_client, populated_db, db_pool):
-    app = web.Application(middlewares=funkwhale_network.MIDDLEWARES)
+    app = aiohttp.web.Application(middlewares=funkwhale_network.MIDDLEWARES)
     funkwhale_network.prepare_app(app, pool=db_pool)
     yield loop.run_until_complete(aiohttp_client(app))
 
@@ -38,3 +41,15 @@ async def populated_db(db_pool):
         await db.create(conn)
         yield conn
         await db.clear(conn)
+
+
+@pytest.fixture
+def responses():
+    with aioresponses() as m:
+        yield m
+
+
+@pytest.fixture
+async def session(loop):
+    async with aiohttp.ClientSession() as session:
+        yield session
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddcc226b630c81a59d25ce8e6e9bc1408299a1ec
--- /dev/null
+++ b/tests/test_crawler.py
@@ -0,0 +1,67 @@
+from funkwhale_network import crawler
+
+
+async def test_fetch(session, responses):
+    domain = "test.domain"
+    well_known_payload = {
+        "links": [
+            {
+                "rel": "http://nodeinfo.diaspora.software/ns/schema/2.0",
+                "href": "https://test.domain/nodeinfo/2.0/",
+            }
+        ]
+    }
+    payload = {"hello": "world"}
+    responses.get(
+        "https://test.domain/.well-known/nodeinfo", payload=well_known_payload
+    )
+    responses.get("https://test.domain/nodeinfo/2.0/", payload=payload)
+    result = await crawler.fetch(session, domain)
+    assert result == payload
+
+
+def test_validate_data(populated_db):
+    payload = {
+        "version": "2.0",
+        "software": {"name": "funkwhale", "version": "0.18-dev+git.b575999e"},
+        "openRegistrations": False,
+        "usage": {"users": {"total": 78}},
+        "metadata": {
+            "private": False,
+            "nodeName": "Funkwhale 101",
+            "library": {
+                "federationEnabled": True,
+                "federationNeedsApproval": True,
+                "anonymousCanListen": True,
+                "tracks": {"total": 98552},
+                "artists": {"total": 9831},
+                "albums": {"total": 10872},
+                "music": {"hours": 7650.678055555555},
+            },
+            "usage": {
+                "favorites": {"tracks": {"total": 1683}},
+                "listenings": {"total": 50294},
+            },
+        },
+    }
+    expected = {
+        "software": {
+            "name": "funkwhale",
+            "version": {
+                "major": 0,
+                "minor": 18,
+                "patch": 0,
+                "prerelease": "dev",
+                "build": "git.b575999e",
+            },
+        },
+        "openRegistrations": False,
+        "usage": {"users": {"total": 78}},
+        "metadata": {
+            "private": False,
+            "nodeName": "Funkwhale 101",
+            "library": {"federationEnabled": True, "anonymousCanListen": True},
+        },
+    }
+    result = crawler.clean_nodeinfo(payload)
+    assert result.data == expected
diff --git a/tests/test_schemas.py b/tests/test_schemas.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3f21d888940a47b45b7d0d8aff6bba1f35c80ef
--- /dev/null
+++ b/tests/test_schemas.py
@@ -0,0 +1,46 @@
+import pytest
+
+from funkwhale_network import schemas
+
+
+@pytest.mark.parametrize(
+    "value, expected",
+    [
+        (
+            "1.2.3-dev+build-1",
+            {
+                "major": 1,
+                "minor": 2,
+                "patch": 3,
+                "prerelease": "dev",
+                "build": "build-1",
+            },
+        ),
+        (
+            "1.2-dev+build-1",
+            {
+                "major": 1,
+                "minor": 2,
+                "patch": 0,
+                "prerelease": "dev",
+                "build": "build-1",
+            },
+        ),
+        (
+            "1.2+build-1",
+            {
+                "major": 1,
+                "minor": 2,
+                "patch": 0,
+                "prerelease": None,
+                "build": "build-1",
+            },
+        ),
+        (
+            "1.2",
+            {"major": 1, "minor": 2, "patch": 0, "prerelease": None, "build": None},
+        ),
+    ],
+)
+def test_validate_version_number(value, expected):
+    assert schemas.VersionField().deserialize(value) == expected