Skip to content
Snippets Groups Projects
Verified Commit 2dbdcf33 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Added OpenCollective, Bandcamp, and more permissive matching

parent 56222dcb
No related branches found
No related tags found
No related merge requests found
......@@ -20,11 +20,16 @@ registry = Registry()
class Provider(object):
id = None
additional_ids = []
domain_regex = None
username_regex = None
def match_by_name(self, value):
v = value.lower().strip()
return self.id == v or v in self.additional_ids
class BasicUsernameProvider(Provider):
class BasicUsernameInPathProvider(Provider):
username_regex = r"^\/([\w\.]+)/?$"
def match_from_url(self, parsed_url):
......@@ -40,31 +45,60 @@ class BasicUsernameProvider(Provider):
return {"provider": self.id, "id": username}
class BasicUsernameInDomainProvider(Provider):
username_regex = r"^\/([\w\.]+)/?$"
def match_from_url(self, parsed_url):
if not self.username_regex:
return
result = re.match(self.username_regex, parsed_url.hostname)
if not result:
return
username = result.groups()[0]
return {"provider": self.id, "id": username}
@registry.register
class Patreon(BasicUsernameProvider):
class Patreon(BasicUsernameInPathProvider):
id = "patreon"
domain_regex = r"^(\w+\.)*patreon\.com"
@registry.register
class Paypal(BasicUsernameProvider):
class Paypal(BasicUsernameInPathProvider):
id = "paypal"
domain_regex = r"^(\w+\.)*paypal\.me"
@registry.register
class Tipeee(BasicUsernameProvider):
class Tipeee(BasicUsernameInPathProvider):
id = "tipeee"
domain_regex = r"^(\w+\.)*tipeee\.com"
@registry.register
class Liberapay(BasicUsernameProvider):
class Liberapay(BasicUsernameInPathProvider):
id = "liberapay"
domain_regex = r"^(\w+\.)*liberapay\.com"
@registry.register
class KoFi(BasicUsernameProvider):
class KoFi(BasicUsernameInPathProvider):
id = "ko-fi"
additional_ids = ["kofi"]
domain_regex = r"^(\w+\.)*ko-fi\.com"
@registry.register
class OpenCollective(BasicUsernameInPathProvider):
id = "opencollective"
additional_ids = ["open collective"]
domain_regex = r"^(\w+\.)*opencollective\.com"
@registry.register
class BandCamp(BasicUsernameInDomainProvider):
id = "bandcamp"
additional_ids = ["band camp"]
username_regex = r"^(\w+)\.bandcamp\.com"
......@@ -2,10 +2,12 @@ import lxml.html
from rest_framework import serializers
def get_urls(content):
def get_links(content):
links = []
dom = lxml.html.fromstring(content)
for link in dom.xpath("//a/@href"):
for link in dom.xpath("//a"):
if not link.get("href"):
continue
links.append(link)
return links
......@@ -16,10 +18,23 @@ def extract_urls_from_attachments(attachments):
if attachment["type"] != "PropertyValue":
continue
urls = get_urls(attachment["value"])
if not urls:
links = get_links(attachment["value"])
if not links:
continue
row = {"summary": attachment["name"], "url": urls[0]}
row = {"summary": attachment["name"], "url": links[0].get("href")}
data.append(row)
return data
def extract_urls_from_summary(summary):
data = []
if not summary:
return []
links = get_links(summary)
for link in links:
row = {"summary": link.text, "url": link.get("href")}
data.append(row)
return data
......@@ -38,6 +53,7 @@ class AttachmentSerializer(serializers.Serializer):
class ActorSerializer(serializers.Serializer):
id = serializers.URLField()
summary = serializers.CharField(required=False)
url = serializers.URLField(required=False)
attachment = serializers.ListField(child=AttachmentSerializer(), min_length=1)
tag = serializers.ListField(child=TagSerializer(), min_length=0)
......@@ -57,6 +57,9 @@ class Activitypub(Source):
"links": activitypub.extract_urls_from_attachments(
serializer.validated_data["attachment"]
)
+ activitypub.extract_urls_from_summary(
serializer.validated_data["summary"]
)
}
return data
......@@ -78,9 +81,20 @@ class Webfinger(Source):
def result_to_retribute_profile(lookup_type, lookup, data):
path = settings.BASE_URL + "/compat"
now = timezone.now()
valid_means = [
(link, means.extract_from_url(link["url"])) for link in data["links"]
]
valid_means = []
for link in data["links"]:
link_data, mean_data = (link, means.extract_from_url(link["url"]))
if mean_data:
valid_means.append((link_data, mean_data))
continue
# we try to match by "guessing" the provider based on the link
# summary
for id, m in providers.registry:
if m.match_by_name(link_data["summary"]):
mean_data = {"provider": id, "id": link_data["url"]}
valid_means.append((link_data, mean_data))
break
valid_means = list(filter(lambda v: v[1] is not None, valid_means))
if not valid_means:
raise exceptions.MeansNotFound()
......
......@@ -41,3 +41,10 @@ def test_extract_urls_from_attachments():
]
assert activitypub.extract_urls_from_attachments(attachments) == expected
def test_extract_urls_from_summary():
summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
expected = [{"summary": "Test", "url": "https://paypal.me/username"}]
assert activitypub.extract_urls_from_summary(summary) == expected
......@@ -14,6 +14,16 @@ from retribute_api.search import means
("https://www.paypal.me/username", {"provider": "paypal", "id": "username"}),
("https://fr.tipeee.com/username", {"provider": "tipeee", "id": "username"}),
("https://tipeee.com/username", {"provider": "tipeee", "id": "username"}),
(
"https://opencollective.com/username",
{"provider": "opencollective", "id": "username"},
),
("https://username.bandcamp.com", {"provider": "bandcamp", "id": "username"}),
("https://username.bandcamp.com", {"provider": "bandcamp", "id": "username"}),
(
"https://username.bandcamp.com/noop",
{"provider": "bandcamp", "id": "username"},
),
("https://patreon.com/username/nope", None),
],
)
......
......@@ -23,6 +23,7 @@ async def test_webfinger_source(mocker, session, responses, dummycache):
actor_response = {
"id": "https://test.domain",
"tag": [{"type": "Hashtag", "name": "#helo"}],
"summary": '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>',
"attachment": [
{
"type": "PropertyValue",
......@@ -34,6 +35,11 @@ async def test_webfinger_source(mocker, session, responses, dummycache):
"name": "Tip me on Ko-Fi",
"value": '<a href="https://ko-fi.com/username" rel="me nofollow noopener">Test</a>',
},
{
"type": "PropertyValue",
"name": "Bandcamp",
"value": '<a href="https://custom.domain" rel="me nofollow noopener">Test</a>',
},
],
}
responses.get("https://domain.test/users/user", payload=actor_response)
......@@ -42,6 +48,8 @@ async def test_webfinger_source(mocker, session, responses, dummycache):
"links": [
{"summary": "Support me on Patreon", "url": "https://patreon.com/username"},
{"summary": "Tip me on Ko-Fi", "url": "https://ko-fi.com/username"},
{"summary": "Bandcamp", "url": "https://custom.domain"},
{"summary": "Test", "url": "https://paypal.me/username"},
]
}
......@@ -67,6 +75,7 @@ def test_source_result_to_retribute_profile(settings, now):
"links": [
{"summary": "Support me on Patreon", "url": "https://patreon.com/username"},
{"summary": "Tip me on Ko-Fi", "url": "https://ko-fi.com/username"},
{"summary": "Bandcamp", "url": "https://custom.domain/username"},
]
}
......@@ -81,16 +90,23 @@ def test_source_result_to_retribute_profile(settings, now):
"provider": "patreon",
"id": "username",
"summary": "Support me on Patreon",
"weight": 1,
"weight": 2,
"url": "https://patreon.com/username",
},
{
"provider": "ko-fi",
"summary": "Tip me on Ko-Fi",
"id": "username",
"weight": 0,
"weight": 1,
"url": "https://ko-fi.com/username",
},
{
"provider": "bandcamp",
"id": "https://custom.domain/username",
"summary": "Bandcamp",
"weight": 0,
"url": "https://custom.domain/username",
},
],
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment