Verified Commit 76e08567 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Added url extraction from activitypub profiles

parent cb55f756
import lxml.html
from rest_framework import serializers
def get_urls(content):
links = []
dom = lxml.html.fromstring(content)
for link in dom.xpath("//a/@href"):
links.append(link)
return links
def extract_urls_from_attachments(attachments):
data = []
for attachment in attachments:
if attachment["type"] != "PropertyValue":
continue
urls = get_urls(attachment["value"])
if not urls:
continue
row = {"summary": attachment["name"], "url": urls[0]}
data.append(row)
return data
class TagSerializer(serializers.Serializer):
name = serializers.CharField()
type = serializers.CharField()
class AttachmentSerializer(serializers.Serializer):
type = serializers.CharField()
name = serializers.CharField()
value = serializers.CharField()
class ActorSerializer(serializers.Serializer):
attachment = serializers.ListField(child=AttachmentSerializer(), min_length=1)
tag = serializers.ListField(child=TagSerializer(), min_length=0)
......@@ -31,6 +31,8 @@ install_requires =
django-redis
djangorestframework
psycopg2-binary
lxml
[options.entry_points]
......
from retribute_api.search import activitypub
def test_profile_serializer():
payload = {
"tag": [{"type": "Hashtag", "name": "#nobot"}],
"attachment": [
{
"type": "PropertyValue",
"name": "patreon",
"value": '<a href="https://patreon.com/username" rel="me nofollow noopener">Test</a>',
}
],
}
serializer = activitypub.ActorSerializer(data=payload)
assert serializer.is_valid(raise_exception=True) is True
assert serializer.validated_data == payload
def test_extract_urls_from_attachments():
attachments = [
{
"type": "PropertyValue",
"name": "Support me on Patreon",
"value": '<a href="https://patreon.com/username" rel="me nofollow noopener" target="_blank">This is my Patreon</a>',
},
{
"type": "PropertyValue",
"name": "Support me on Ko-Fi",
"value": '<a href="https://ko-fi.com/username" rel="me nofollow noopener" target="_blank">This is my Ko-Fi</a>',
},
{"type": "PropertyValue", "name": "Irrelevant text", "value": "No link"},
]
expected = [
{"summary": "Support me on Patreon", "url": "https://patreon.com/username"},
{"summary": "Support me on Ko-Fi", "url": "https://ko-fi.com/username"},
]
assert activitypub.extract_urls_from_attachments(attachments) == expected
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment