Support parsing of markdown in activity pub fields

672a1940 · Eliot Berriot · 44a56cd5 · 672a1940 · 672a1940 · 672a1940
Verified Commit 672a1940 authored 6 years ago by Eliot Berriot
--- a/retribute_api/search/activitypub.py
+++ b/retribute_api/search/activitypub.py
 import lxml.html
+import markdown
 from rest_framework import serializers
@@ -27,12 +29,15 @@ def extract_urls_from_attachments(attachments):
    return data
-def extract_urls_from_summary(summary):
+def extract_urls_from_text_fields(*fields):
    data = []
-    if not summary:
-        return []
-    links = get_links(summary)
+    for field in fields:
+        if not field:
+            continue
+        # ugly but only way to support PeerTube and Mastodon right now
+        html = markdown.markdown(field)
+        links = get_links(html)
        for link in links:
            row = {"summary": link.text, "url": link.get("href")}
            data.append(row)
@@ -54,6 +59,7 @@ class AttachmentSerializer(serializers.Serializer):
 class ActorSerializer(serializers.Serializer):
    id = serializers.URLField()
    summary = serializers.CharField(required=False)
+    support = serializers.CharField(required=False)
    url = serializers.URLField(required=False)
    attachment = serializers.ListField(
        child=AttachmentSerializer(), min_length=0, required=False

--- a/retribute_api/search/sources.py
+++ b/retribute_api/search/sources.py
@@ -61,8 +61,9 @@ class Activitypub(Source):
            "links": activitypub.extract_urls_from_attachments(
                serializer.validated_data.get("attachment", [])
            )
-            + activitypub.extract_urls_from_summary(
+            + activitypub.extract_urls_from_text_fields(
-                serializer.validated_data["summary"]
+                serializer.validated_data.get("summary"),
+                serializer.validated_data.get("support"),
            )
        }

--- a/setup.cfg
+++ b/setup.cfg
@@ -34,6 +34,7 @@ install_requires =
    django-environ
    django-redis
    djangorestframework
+    markdown
    psycopg2-binary
    lxml

--- a/tests/search/test_activitypub.py
+++ b/tests/search/test_activitypub.py
+import pytest
 from retribute_api.search import activitypub
@@ -43,8 +45,26 @@ def test_extract_urls_from_attachments():
    assert activitypub.extract_urls_from_attachments(attachments) == expected
-def test_extract_urls_from_summary():
+@pytest.mark.parametrize(
-    summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
+    "input, expected",
-    expected = [{"summary": "Test", "url": "https://paypal.me/username"}]
+    [
+        (
-    assert activitypub.extract_urls_from_summary(summary) == expected
+            [
+                '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
+            ],
+            [{"summary": "Test", "url": "https://paypal.me/username"}],
+        ),
+        (
+            [
+                '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>',
+                "Markdown test [patreon](https://patreon.com/username)",
+            ],
+            [
+                {"summary": "Test", "url": "https://paypal.me/username"},
+                {"summary": "patreon", "url": "https://patreon.com/username"},
+            ],
+        ),
+    ],
+)
+def test_extract_urls_from_text_fields(input, expected):
+    assert activitypub.extract_urls_from_text_fields(*input) == expected