From 3b1e50c683d109a7dbb0c59eb44e5497ecd04f4d Mon Sep 17 00:00:00 2001 From: Petitminion <petitminion@riseup.net> Date: Wed, 5 Mar 2025 17:49:13 +0100 Subject: [PATCH] delete domains that are not funkwhale ones --- funkwhale_network/crawler.py | 20 +++++++++++++++++--- funkwhale_network/db.py | 7 +++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/funkwhale_network/crawler.py b/funkwhale_network/crawler.py index b559952..b0fd305 100644 --- a/funkwhale_network/crawler.py +++ b/funkwhale_network/crawler.py @@ -4,6 +4,7 @@ import sys import aiohttp import aiopg +import marshmallow import psycopg2 from funkwhale_network import exceptions, schemas, settings @@ -46,7 +47,7 @@ async def check(session, domain, stdout=sys.stdout): check_data = {"up": True, "domain": domain} try: nodeinfo = await fetch_nodeinfo(session, domain) - cleaned_nodeinfo = clean_nodeinfo(nodeinfo) + cleaned_nodeinfo = clean_nodeinfo(nodeinfo, domain_name=domain) cleaned_check = clean_check(check_data, cleaned_nodeinfo) except (aiohttp.client_exceptions.ClientError, exceptions.CrawlerError) as e: stdout.write( @@ -127,9 +128,22 @@ async def gather_known_nodes(session, url, data): data["pending_domains"] |= known_domains - data["handled_domains"] -def clean_nodeinfo(data): +def clean_nodeinfo(data, domain_name): schema = schemas.NodeInfo2Schema() - result = schema.load(data) + try: + result = schema.load(data) + except marshmallow.exceptions.ValidationError as e: + if "Must be one of: funkwhale, Funkwhale" in e.messages.get( + "software.name", [] + ): + DB.delete_domain(domain_name) + print( + f"Deleted {domain_name} from database since it's not a Funkwhale instance", + flush=True, + ) + return + else: + raise e return result diff --git a/funkwhale_network/db.py b/funkwhale_network/db.py index afd2260..4b3a99c 100644 --- a/funkwhale_network/db.py +++ b/funkwhale_network/db.py @@ -281,3 +281,10 @@ class DB: await cursor.execute(sql, [data["name"]]) domain = await cursor.fetchone() return domain + + async def delete_domain(self, name): + with await self.pool.cursor( + cursor_factory=psycopg2.extras.RealDictCursor + ) as cursor: + sql = "DELETE FROM checks WHERE domain = %s" + await cursor.execute(sql, name) -- GitLab