diff --git a/retribute_api/search/activitypub.py b/retribute_api/search/activitypub.py index 00f5246e0ab441cfc512915d4bed50c5d01dcd3b..0f0792b9eefb5ffd7d787ef20d5272f549646efb 100644 --- a/retribute_api/search/activitypub.py +++ b/retribute_api/search/activitypub.py @@ -1,4 +1,6 @@ import lxml.html +import markdown + from rest_framework import serializers @@ -27,15 +29,18 @@ def extract_urls_from_attachments(attachments): return data -def extract_urls_from_summary(summary): +def extract_urls_from_text_fields(*fields): data = [] - if not summary: - return [] - links = get_links(summary) - for link in links: - row = {"summary": link.text, "url": link.get("href")} - data.append(row) + for field in fields: + if not field: + continue + # ugly but only way to support PeerTube and Mastodon right now + html = markdown.markdown(field) + links = get_links(html) + for link in links: + row = {"summary": link.text, "url": link.get("href")} + data.append(row) return data @@ -54,6 +59,7 @@ class AttachmentSerializer(serializers.Serializer): class ActorSerializer(serializers.Serializer): id = serializers.URLField() summary = serializers.CharField(required=False) + support = serializers.CharField(required=False) url = serializers.URLField(required=False) attachment = serializers.ListField( child=AttachmentSerializer(), min_length=0, required=False diff --git a/retribute_api/search/sources.py b/retribute_api/search/sources.py index 71097d0ae4e791e5a6995e59fa89485db656eff0..98bb5ec9fc1d4ad3d7dee144a4b4e78ce3b6198b 100644 --- a/retribute_api/search/sources.py +++ b/retribute_api/search/sources.py @@ -61,8 +61,9 @@ class Activitypub(Source): "links": activitypub.extract_urls_from_attachments( serializer.validated_data.get("attachment", []) ) - + activitypub.extract_urls_from_summary( - serializer.validated_data["summary"] + + activitypub.extract_urls_from_text_fields( + serializer.validated_data.get("summary"), + serializer.validated_data.get("support"), ) } diff --git a/setup.cfg b/setup.cfg index 392b6eee9f2d27ac4c2434856402e57777230311..d045583d0f63bfd53713bc43878fb60a71680593 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,6 +34,7 @@ install_requires = django-environ django-redis djangorestframework + markdown psycopg2-binary lxml diff --git a/tests/search/test_activitypub.py b/tests/search/test_activitypub.py index 382c7d1571facff78050636501cda577a6738ee6..3974e4ebe542fae6ea3f76698c5a739fe7ba1ce3 100644 --- a/tests/search/test_activitypub.py +++ b/tests/search/test_activitypub.py @@ -1,3 +1,5 @@ +import pytest + from retribute_api.search import activitypub @@ -43,8 +45,26 @@ def test_extract_urls_from_attachments(): assert activitypub.extract_urls_from_attachments(attachments) == expected -def test_extract_urls_from_summary(): - summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>' - expected = [{"summary": "Test", "url": "https://paypal.me/username"}] - - assert activitypub.extract_urls_from_summary(summary) == expected +@pytest.mark.parametrize( + "input, expected", + [ + ( + [ + '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>' + ], + [{"summary": "Test", "url": "https://paypal.me/username"}], + ), + ( + [ + '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>', + "Markdown test [patreon](https://patreon.com/username)", + ], + [ + {"summary": "Test", "url": "https://paypal.me/username"}, + {"summary": "patreon", "url": "https://patreon.com/username"}, + ], + ), + ], +) +def test_extract_urls_from_text_fields(input, expected): + assert activitypub.extract_urls_from_text_fields(*input) == expected