Skip to content
Snippets Groups Projects
Verified Commit 672a1940 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Support parsing of markdown in activity pub fields

parent 44a56cd5
Branches
No related tags found
No related merge requests found
import lxml.html import lxml.html
import markdown
from rest_framework import serializers from rest_framework import serializers
...@@ -27,15 +29,18 @@ def extract_urls_from_attachments(attachments): ...@@ -27,15 +29,18 @@ def extract_urls_from_attachments(attachments):
return data return data
def extract_urls_from_summary(summary): def extract_urls_from_text_fields(*fields):
data = [] data = []
if not summary:
return []
links = get_links(summary) for field in fields:
for link in links: if not field:
row = {"summary": link.text, "url": link.get("href")} continue
data.append(row) # ugly but only way to support PeerTube and Mastodon right now
html = markdown.markdown(field)
links = get_links(html)
for link in links:
row = {"summary": link.text, "url": link.get("href")}
data.append(row)
return data return data
...@@ -54,6 +59,7 @@ class AttachmentSerializer(serializers.Serializer): ...@@ -54,6 +59,7 @@ class AttachmentSerializer(serializers.Serializer):
class ActorSerializer(serializers.Serializer): class ActorSerializer(serializers.Serializer):
id = serializers.URLField() id = serializers.URLField()
summary = serializers.CharField(required=False) summary = serializers.CharField(required=False)
support = serializers.CharField(required=False)
url = serializers.URLField(required=False) url = serializers.URLField(required=False)
attachment = serializers.ListField( attachment = serializers.ListField(
child=AttachmentSerializer(), min_length=0, required=False child=AttachmentSerializer(), min_length=0, required=False
......
...@@ -61,8 +61,9 @@ class Activitypub(Source): ...@@ -61,8 +61,9 @@ class Activitypub(Source):
"links": activitypub.extract_urls_from_attachments( "links": activitypub.extract_urls_from_attachments(
serializer.validated_data.get("attachment", []) serializer.validated_data.get("attachment", [])
) )
+ activitypub.extract_urls_from_summary( + activitypub.extract_urls_from_text_fields(
serializer.validated_data["summary"] serializer.validated_data.get("summary"),
serializer.validated_data.get("support"),
) )
} }
......
...@@ -34,6 +34,7 @@ install_requires = ...@@ -34,6 +34,7 @@ install_requires =
django-environ django-environ
django-redis django-redis
djangorestframework djangorestframework
markdown
psycopg2-binary psycopg2-binary
lxml lxml
......
import pytest
from retribute_api.search import activitypub from retribute_api.search import activitypub
...@@ -43,8 +45,26 @@ def test_extract_urls_from_attachments(): ...@@ -43,8 +45,26 @@ def test_extract_urls_from_attachments():
assert activitypub.extract_urls_from_attachments(attachments) == expected assert activitypub.extract_urls_from_attachments(attachments) == expected
def test_extract_urls_from_summary(): @pytest.mark.parametrize(
summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>' "input, expected",
expected = [{"summary": "Test", "url": "https://paypal.me/username"}] [
(
assert activitypub.extract_urls_from_summary(summary) == expected [
'<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
],
[{"summary": "Test", "url": "https://paypal.me/username"}],
),
(
[
'<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>',
"Markdown test [patreon](https://patreon.com/username)",
],
[
{"summary": "Test", "url": "https://paypal.me/username"},
{"summary": "patreon", "url": "https://patreon.com/username"},
],
),
],
)
def test_extract_urls_from_text_fields(input, expected):
assert activitypub.extract_urls_from_text_fields(*input) == expected
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment