Skip to content
Snippets Groups Projects
Verified Commit 76e08567 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Added url extraction from activitypub profiles

parent cb55f756
No related branches found
No related tags found
No related merge requests found
import lxml.html
from rest_framework import serializers
def get_urls(content):
links = []
dom = lxml.html.fromstring(content)
for link in dom.xpath("//a/@href"):
links.append(link)
return links
def extract_urls_from_attachments(attachments):
data = []
for attachment in attachments:
if attachment["type"] != "PropertyValue":
continue
urls = get_urls(attachment["value"])
if not urls:
continue
row = {"summary": attachment["name"], "url": urls[0]}
data.append(row)
return data
class TagSerializer(serializers.Serializer):
name = serializers.CharField()
type = serializers.CharField()
class AttachmentSerializer(serializers.Serializer):
type = serializers.CharField()
name = serializers.CharField()
value = serializers.CharField()
class ActorSerializer(serializers.Serializer):
attachment = serializers.ListField(child=AttachmentSerializer(), min_length=1)
tag = serializers.ListField(child=TagSerializer(), min_length=0)
...@@ -31,6 +31,8 @@ install_requires = ...@@ -31,6 +31,8 @@ install_requires =
django-redis django-redis
djangorestframework djangorestframework
psycopg2-binary psycopg2-binary
lxml
[options.entry_points] [options.entry_points]
......
from retribute_api.search import activitypub
def test_profile_serializer():
payload = {
"tag": [{"type": "Hashtag", "name": "#nobot"}],
"attachment": [
{
"type": "PropertyValue",
"name": "patreon",
"value": '<a href="https://patreon.com/username" rel="me nofollow noopener">Test</a>',
}
],
}
serializer = activitypub.ActorSerializer(data=payload)
assert serializer.is_valid(raise_exception=True) is True
assert serializer.validated_data == payload
def test_extract_urls_from_attachments():
attachments = [
{
"type": "PropertyValue",
"name": "Support me on Patreon",
"value": '<a href="https://patreon.com/username" rel="me nofollow noopener" target="_blank">This is my Patreon</a>',
},
{
"type": "PropertyValue",
"name": "Support me on Ko-Fi",
"value": '<a href="https://ko-fi.com/username" rel="me nofollow noopener" target="_blank">This is my Ko-Fi</a>',
},
{"type": "PropertyValue", "name": "Irrelevant text", "value": "No link"},
]
expected = [
{"summary": "Support me on Patreon", "url": "https://patreon.com/username"},
{"summary": "Support me on Ko-Fi", "url": "https://ko-fi.com/username"},
]
assert activitypub.extract_urls_from_attachments(attachments) == expected
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment