diff --git a/funkwhale_network/crawler.py b/funkwhale_network/crawler.py index b559952e626bc54d0836f148bc66b9d3886f930f..b0fd30591c2f87036112e6ae0518235f9f2c8770 100644 --- a/funkwhale_network/crawler.py +++ b/funkwhale_network/crawler.py @@ -4,6 +4,7 @@ import sys import aiohttp import aiopg +import marshmallow import psycopg2 from funkwhale_network import exceptions, schemas, settings @@ -46,7 +47,7 @@ async def check(session, domain, stdout=sys.stdout): check_data = {"up": True, "domain": domain} try: nodeinfo = await fetch_nodeinfo(session, domain) - cleaned_nodeinfo = clean_nodeinfo(nodeinfo) + cleaned_nodeinfo = clean_nodeinfo(nodeinfo, domain_name=domain) cleaned_check = clean_check(check_data, cleaned_nodeinfo) except (aiohttp.client_exceptions.ClientError, exceptions.CrawlerError) as e: stdout.write( @@ -127,9 +128,22 @@ async def gather_known_nodes(session, url, data): data["pending_domains"] |= known_domains - data["handled_domains"] -def clean_nodeinfo(data): +def clean_nodeinfo(data, domain_name): schema = schemas.NodeInfo2Schema() - result = schema.load(data) + try: + result = schema.load(data) + except marshmallow.exceptions.ValidationError as e: + if "Must be one of: funkwhale, Funkwhale" in e.messages.get( + "software.name", [] + ): + DB.delete_domain(domain_name) + print( + f"Deleted {domain_name} from database since it's not a Funkwhale instance", + flush=True, + ) + return + else: + raise e return result diff --git a/funkwhale_network/db.py b/funkwhale_network/db.py index afd226043496e7ffd74465a503ddc01f8fd7a269..4b3a99ce714d151a246b60b73196ff8657af68f2 100644 --- a/funkwhale_network/db.py +++ b/funkwhale_network/db.py @@ -281,3 +281,10 @@ class DB: await cursor.execute(sql, [data["name"]]) domain = await cursor.fetchone() return domain + + async def delete_domain(self, name): + with await self.pool.cursor( + cursor_factory=psycopg2.extras.RealDictCursor + ) as cursor: + sql = "DELETE FROM checks WHERE domain = %s" + await cursor.execute(sql, name)