Verified Commit 76e08567 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Added url extraction from activitypub profiles

parent cb55f756
import lxml.html
from rest_framework import serializers
def get_urls(content):
links = []
dom = lxml.html.fromstring(content)
for link in dom.xpath("//a/@href"):
return links
def extract_urls_from_attachments(attachments):
data = []
for attachment in attachments:
if attachment["type"] != "PropertyValue":
urls = get_urls(attachment["value"])
if not urls:
row = {"summary": attachment["name"], "url": urls[0]}
return data
class TagSerializer(serializers.Serializer):
name = serializers.CharField()
type = serializers.CharField()
class AttachmentSerializer(serializers.Serializer):
type = serializers.CharField()
name = serializers.CharField()
value = serializers.CharField()
class ActorSerializer(serializers.Serializer):
attachment = serializers.ListField(child=AttachmentSerializer(), min_length=1)
tag = serializers.ListField(child=TagSerializer(), min_length=0)
......@@ -31,6 +31,8 @@ install_requires =
from import activitypub
def test_profile_serializer():
payload = {
"tag": [{"type": "Hashtag", "name": "#nobot"}],
"attachment": [
"type": "PropertyValue",
"name": "patreon",
"value": '<a href="" rel="me nofollow noopener">Test</a>',
serializer = activitypub.ActorSerializer(data=payload)
assert serializer.is_valid(raise_exception=True) is True
assert serializer.validated_data == payload
def test_extract_urls_from_attachments():
attachments = [
"type": "PropertyValue",
"name": "Support me on Patreon",
"value": '<a href="" rel="me nofollow noopener" target="_blank">This is my Patreon</a>',
"type": "PropertyValue",
"name": "Support me on Ko-Fi",
"value": '<a href="" rel="me nofollow noopener" target="_blank">This is my Ko-Fi</a>',
{"type": "PropertyValue", "name": "Irrelevant text", "value": "No link"},
expected = [
{"summary": "Support me on Patreon", "url": ""},
{"summary": "Support me on Ko-Fi", "url": ""},
assert activitypub.extract_urls_from_attachments(attachments) == expected
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment