From 672a194002583b04386ea96d17386b3df8f85975 Mon Sep 17 00:00:00 2001 From: Eliot Berriot <contact@eliotberriot.com> Date: Sun, 9 Jun 2019 17:59:29 +0200 Subject: [PATCH] Support parsing of markdown in activity pub fields --- retribute_api/search/activitypub.py | 20 ++++++++++++------- retribute_api/search/sources.py | 5 +++-- setup.cfg | 1 + tests/search/test_activitypub.py | 30 ++++++++++++++++++++++++----- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/retribute_api/search/activitypub.py b/retribute_api/search/activitypub.py index 00f5246..0f0792b 100644 --- a/retribute_api/search/activitypub.py +++ b/retribute_api/search/activitypub.py @@ -1,4 +1,6 @@ import lxml.html +import markdown + from rest_framework import serializers @@ -27,15 +29,18 @@ def extract_urls_from_attachments(attachments): return data -def extract_urls_from_summary(summary): +def extract_urls_from_text_fields(*fields): data = [] - if not summary: - return [] - links = get_links(summary) - for link in links: - row = {"summary": link.text, "url": link.get("href")} - data.append(row) + for field in fields: + if not field: + continue + # ugly but only way to support PeerTube and Mastodon right now + html = markdown.markdown(field) + links = get_links(html) + for link in links: + row = {"summary": link.text, "url": link.get("href")} + data.append(row) return data @@ -54,6 +59,7 @@ class AttachmentSerializer(serializers.Serializer): class ActorSerializer(serializers.Serializer): id = serializers.URLField() summary = serializers.CharField(required=False) + support = serializers.CharField(required=False) url = serializers.URLField(required=False) attachment = serializers.ListField( child=AttachmentSerializer(), min_length=0, required=False diff --git a/retribute_api/search/sources.py b/retribute_api/search/sources.py index 71097d0..98bb5ec 100644 --- a/retribute_api/search/sources.py +++ b/retribute_api/search/sources.py @@ -61,8 +61,9 @@ class Activitypub(Source): "links": activitypub.extract_urls_from_attachments( serializer.validated_data.get("attachment", []) ) - + activitypub.extract_urls_from_summary( - serializer.validated_data["summary"] + + activitypub.extract_urls_from_text_fields( + serializer.validated_data.get("summary"), + serializer.validated_data.get("support"), ) } diff --git a/setup.cfg b/setup.cfg index 392b6ee..d045583 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,6 +34,7 @@ install_requires = django-environ django-redis djangorestframework + markdown psycopg2-binary lxml diff --git a/tests/search/test_activitypub.py b/tests/search/test_activitypub.py index 382c7d1..3974e4e 100644 --- a/tests/search/test_activitypub.py +++ b/tests/search/test_activitypub.py @@ -1,3 +1,5 @@ +import pytest + from retribute_api.search import activitypub @@ -43,8 +45,26 @@ def test_extract_urls_from_attachments(): assert activitypub.extract_urls_from_attachments(attachments) == expected -def test_extract_urls_from_summary(): - summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>' - expected = [{"summary": "Test", "url": "https://paypal.me/username"}] - - assert activitypub.extract_urls_from_summary(summary) == expected +@pytest.mark.parametrize( + "input, expected", + [ + ( + [ + '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>' + ], + [{"summary": "Test", "url": "https://paypal.me/username"}], + ), + ( + [ + '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>', + "Markdown test [patreon](https://patreon.com/username)", + ], + [ + {"summary": "Test", "url": "https://paypal.me/username"}, + {"summary": "patreon", "url": "https://patreon.com/username"}, + ], + ), + ], +) +def test_extract_urls_from_text_fields(input, expected): + assert activitypub.extract_urls_from_text_fields(*input) == expected -- GitLab