Verified Commit 672a1940 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Support parsing of markdown in activity pub fields

parent 44a56cd5
import lxml.html
import markdown
from rest_framework import serializers
......@@ -27,15 +29,18 @@ def extract_urls_from_attachments(attachments):
return data
def extract_urls_from_summary(summary):
def extract_urls_from_text_fields(*fields):
data = []
if not summary:
return []
links = get_links(summary)
for link in links:
row = {"summary": link.text, "url": link.get("href")}
data.append(row)
for field in fields:
if not field:
continue
# ugly but only way to support PeerTube and Mastodon right now
html = markdown.markdown(field)
links = get_links(html)
for link in links:
row = {"summary": link.text, "url": link.get("href")}
data.append(row)
return data
......@@ -54,6 +59,7 @@ class AttachmentSerializer(serializers.Serializer):
class ActorSerializer(serializers.Serializer):
id = serializers.URLField()
summary = serializers.CharField(required=False)
support = serializers.CharField(required=False)
url = serializers.URLField(required=False)
attachment = serializers.ListField(
child=AttachmentSerializer(), min_length=0, required=False
......
......@@ -61,8 +61,9 @@ class Activitypub(Source):
"links": activitypub.extract_urls_from_attachments(
serializer.validated_data.get("attachment", [])
)
+ activitypub.extract_urls_from_summary(
serializer.validated_data["summary"]
+ activitypub.extract_urls_from_text_fields(
serializer.validated_data.get("summary"),
serializer.validated_data.get("support"),
)
}
......
......@@ -34,6 +34,7 @@ install_requires =
django-environ
django-redis
djangorestframework
markdown
psycopg2-binary
lxml
......
import pytest
from retribute_api.search import activitypub
......@@ -43,8 +45,26 @@ def test_extract_urls_from_attachments():
assert activitypub.extract_urls_from_attachments(attachments) == expected
def test_extract_urls_from_summary():
summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
expected = [{"summary": "Test", "url": "https://paypal.me/username"}]
assert activitypub.extract_urls_from_summary(summary) == expected
@pytest.mark.parametrize(
"input, expected",
[
(
[
'<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
],
[{"summary": "Test", "url": "https://paypal.me/username"}],
),
(
[
'<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>',
"Markdown test [patreon](https://patreon.com/username)",
],
[
{"summary": "Test", "url": "https://paypal.me/username"},
{"summary": "patreon", "url": "https://patreon.com/username"},
],
),
],
)
def test_extract_urls_from_text_fields(input, expected):
assert activitypub.extract_urls_from_text_fields(*input) == expected
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment