From 3b1e50c683d109a7dbb0c59eb44e5497ecd04f4d Mon Sep 17 00:00:00 2001
From: Petitminion <petitminion@riseup.net>
Date: Wed, 5 Mar 2025 17:49:13 +0100
Subject: [PATCH] delete domains that are not funkwhale ones

---
 funkwhale_network/crawler.py | 20 +++++++++++++++++---
 funkwhale_network/db.py      |  7 +++++++
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/funkwhale_network/crawler.py b/funkwhale_network/crawler.py
index b559952..b0fd305 100644
--- a/funkwhale_network/crawler.py
+++ b/funkwhale_network/crawler.py
@@ -4,6 +4,7 @@ import sys
 
 import aiohttp
 import aiopg
+import marshmallow
 import psycopg2
 
 from funkwhale_network import exceptions, schemas, settings
@@ -46,7 +47,7 @@ async def check(session, domain, stdout=sys.stdout):
         check_data = {"up": True, "domain": domain}
         try:
             nodeinfo = await fetch_nodeinfo(session, domain)
-            cleaned_nodeinfo = clean_nodeinfo(nodeinfo)
+            cleaned_nodeinfo = clean_nodeinfo(nodeinfo, domain_name=domain)
             cleaned_check = clean_check(check_data, cleaned_nodeinfo)
         except (aiohttp.client_exceptions.ClientError, exceptions.CrawlerError) as e:
             stdout.write(
@@ -127,9 +128,22 @@ async def gather_known_nodes(session, url, data):
         data["pending_domains"] |= known_domains - data["handled_domains"]
 
 
-def clean_nodeinfo(data):
+def clean_nodeinfo(data, domain_name):
     schema = schemas.NodeInfo2Schema()
-    result = schema.load(data)
+    try:
+        result = schema.load(data)
+    except marshmallow.exceptions.ValidationError as e:
+        if "Must be one of: funkwhale, Funkwhale" in e.messages.get(
+            "software.name", []
+        ):
+            DB.delete_domain(domain_name)
+            print(
+                f"Deleted {domain_name} from database since it's not a Funkwhale instance",
+                flush=True,
+            )
+            return
+        else:
+            raise e
     return result
 
 
diff --git a/funkwhale_network/db.py b/funkwhale_network/db.py
index afd2260..4b3a99c 100644
--- a/funkwhale_network/db.py
+++ b/funkwhale_network/db.py
@@ -281,3 +281,10 @@ class DB:
             await cursor.execute(sql, [data["name"]])
             domain = await cursor.fetchone()
             return domain
+
+    async def delete_domain(self, name):
+        with await self.pool.cursor(
+            cursor_factory=psycopg2.extras.RealDictCursor
+        ) as cursor:
+            sql = "DELETE FROM checks WHERE domain = %s"
+            await cursor.execute(sql, name)
-- 
GitLab