Skip to content
Snippets Groups Projects
Verified Commit 672a1940 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Support parsing of markdown in activity pub fields

parent 44a56cd5
Branches
No related tags found
No related merge requests found
import lxml.html
import markdown
from rest_framework import serializers
......@@ -27,12 +29,15 @@ def extract_urls_from_attachments(attachments):
return data
def extract_urls_from_summary(summary):
def extract_urls_from_text_fields(*fields):
data = []
if not summary:
return []
links = get_links(summary)
for field in fields:
if not field:
continue
# ugly but only way to support PeerTube and Mastodon right now
html = markdown.markdown(field)
links = get_links(html)
for link in links:
row = {"summary": link.text, "url": link.get("href")}
data.append(row)
......@@ -54,6 +59,7 @@ class AttachmentSerializer(serializers.Serializer):
class ActorSerializer(serializers.Serializer):
id = serializers.URLField()
summary = serializers.CharField(required=False)
support = serializers.CharField(required=False)
url = serializers.URLField(required=False)
attachment = serializers.ListField(
child=AttachmentSerializer(), min_length=0, required=False
......
......@@ -61,8 +61,9 @@ class Activitypub(Source):
"links": activitypub.extract_urls_from_attachments(
serializer.validated_data.get("attachment", [])
)
+ activitypub.extract_urls_from_summary(
serializer.validated_data["summary"]
+ activitypub.extract_urls_from_text_fields(
serializer.validated_data.get("summary"),
serializer.validated_data.get("support"),
)
}
......
......@@ -34,6 +34,7 @@ install_requires =
django-environ
django-redis
djangorestframework
markdown
psycopg2-binary
lxml
......
import pytest
from retribute_api.search import activitypub
......@@ -43,8 +45,26 @@ def test_extract_urls_from_attachments():
assert activitypub.extract_urls_from_attachments(attachments) == expected
def test_extract_urls_from_summary():
summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
expected = [{"summary": "Test", "url": "https://paypal.me/username"}]
assert activitypub.extract_urls_from_summary(summary) == expected
@pytest.mark.parametrize(
"input, expected",
[
(
[
'<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
],
[{"summary": "Test", "url": "https://paypal.me/username"}],
),
(
[
'<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>',
"Markdown test [patreon](https://patreon.com/username)",
],
[
{"summary": "Test", "url": "https://paypal.me/username"},
{"summary": "patreon", "url": "https://patreon.com/username"},
],
),
],
)
def test_extract_urls_from_text_fields(input, expected):
assert activitypub.extract_urls_from_text_fields(*input) == expected
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment