From 672a194002583b04386ea96d17386b3df8f85975 Mon Sep 17 00:00:00 2001
From: Eliot Berriot <contact@eliotberriot.com>
Date: Sun, 9 Jun 2019 17:59:29 +0200
Subject: [PATCH] Support parsing of markdown in activity pub fields

---
 retribute_api/search/activitypub.py | 20 ++++++++++++-------
 retribute_api/search/sources.py     |  5 +++--
 setup.cfg                           |  1 +
 tests/search/test_activitypub.py    | 30 ++++++++++++++++++++++++-----
 4 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/retribute_api/search/activitypub.py b/retribute_api/search/activitypub.py
index 00f5246..0f0792b 100644
--- a/retribute_api/search/activitypub.py
+++ b/retribute_api/search/activitypub.py
@@ -1,4 +1,6 @@
 import lxml.html
+import markdown
+
 from rest_framework import serializers
 
 
@@ -27,15 +29,18 @@ def extract_urls_from_attachments(attachments):
     return data
 
 
-def extract_urls_from_summary(summary):
+def extract_urls_from_text_fields(*fields):
     data = []
-    if not summary:
-        return []
 
-    links = get_links(summary)
-    for link in links:
-        row = {"summary": link.text, "url": link.get("href")}
-        data.append(row)
+    for field in fields:
+        if not field:
+            continue
+        # ugly but only way to support PeerTube and Mastodon right now
+        html = markdown.markdown(field)
+        links = get_links(html)
+        for link in links:
+            row = {"summary": link.text, "url": link.get("href")}
+            data.append(row)
 
     return data
 
@@ -54,6 +59,7 @@ class AttachmentSerializer(serializers.Serializer):
 class ActorSerializer(serializers.Serializer):
     id = serializers.URLField()
     summary = serializers.CharField(required=False)
+    support = serializers.CharField(required=False)
     url = serializers.URLField(required=False)
     attachment = serializers.ListField(
         child=AttachmentSerializer(), min_length=0, required=False
diff --git a/retribute_api/search/sources.py b/retribute_api/search/sources.py
index 71097d0..98bb5ec 100644
--- a/retribute_api/search/sources.py
+++ b/retribute_api/search/sources.py
@@ -61,8 +61,9 @@ class Activitypub(Source):
             "links": activitypub.extract_urls_from_attachments(
                 serializer.validated_data.get("attachment", [])
             )
-            + activitypub.extract_urls_from_summary(
-                serializer.validated_data["summary"]
+            + activitypub.extract_urls_from_text_fields(
+                serializer.validated_data.get("summary"),
+                serializer.validated_data.get("support"),
             )
         }
 
diff --git a/setup.cfg b/setup.cfg
index 392b6ee..d045583 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,6 +34,7 @@ install_requires =
     django-environ
     django-redis
     djangorestframework
+    markdown
     psycopg2-binary
     lxml
 
diff --git a/tests/search/test_activitypub.py b/tests/search/test_activitypub.py
index 382c7d1..3974e4e 100644
--- a/tests/search/test_activitypub.py
+++ b/tests/search/test_activitypub.py
@@ -1,3 +1,5 @@
+import pytest
+
 from retribute_api.search import activitypub
 
 
@@ -43,8 +45,26 @@ def test_extract_urls_from_attachments():
     assert activitypub.extract_urls_from_attachments(attachments) == expected
 
 
-def test_extract_urls_from_summary():
-    summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
-    expected = [{"summary": "Test", "url": "https://paypal.me/username"}]
-
-    assert activitypub.extract_urls_from_summary(summary) == expected
+@pytest.mark.parametrize(
+    "input, expected",
+    [
+        (
+            [
+                '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
+            ],
+            [{"summary": "Test", "url": "https://paypal.me/username"}],
+        ),
+        (
+            [
+                '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>',
+                "Markdown test [patreon](https://patreon.com/username)",
+            ],
+            [
+                {"summary": "Test", "url": "https://paypal.me/username"},
+                {"summary": "patreon", "url": "https://patreon.com/username"},
+            ],
+        ),
+    ],
+)
+def test_extract_urls_from_text_fields(input, expected):
+    assert activitypub.extract_urls_from_text_fields(*input) == expected
-- 
GitLab