Verified Commit 2dbdcf33 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Added OpenCollective, Bandcamp, and more permissive matching

parent 56222dcb
......@@ -20,11 +20,16 @@ registry = Registry()
class Provider(object):
id = None
additional_ids = []
domain_regex = None
username_regex = None
def match_by_name(self, value):
v = value.lower().strip()
return self.id == v or v in self.additional_ids
class BasicUsernameProvider(Provider):
class BasicUsernameInPathProvider(Provider):
username_regex = r"^\/([\w\.]+)/?$"
def match_from_url(self, parsed_url):
......@@ -40,31 +45,60 @@ class BasicUsernameProvider(Provider):
return {"provider": self.id, "id": username}
class BasicUsernameInDomainProvider(Provider):
username_regex = r"^\/([\w\.]+)/?$"
def match_from_url(self, parsed_url):
if not self.username_regex:
return
result = re.match(self.username_regex, parsed_url.hostname)
if not result:
return
username = result.groups()[0]
return {"provider": self.id, "id": username}
@registry.register
class Patreon(BasicUsernameProvider):
class Patreon(BasicUsernameInPathProvider):
id = "patreon"
domain_regex = r"^(\w+\.)*patreon\.com"
@registry.register
class Paypal(BasicUsernameProvider):
class Paypal(BasicUsernameInPathProvider):
id = "paypal"
domain_regex = r"^(\w+\.)*paypal\.me"
@registry.register
class Tipeee(BasicUsernameProvider):
class Tipeee(BasicUsernameInPathProvider):
id = "tipeee"
domain_regex = r"^(\w+\.)*tipeee\.com"
@registry.register
class Liberapay(BasicUsernameProvider):
class Liberapay(BasicUsernameInPathProvider):
id = "liberapay"
domain_regex = r"^(\w+\.)*liberapay\.com"
@registry.register
class KoFi(BasicUsernameProvider):
class KoFi(BasicUsernameInPathProvider):
id = "ko-fi"
additional_ids = ["kofi"]
domain_regex = r"^(\w+\.)*ko-fi\.com"
@registry.register
class OpenCollective(BasicUsernameInPathProvider):
id = "opencollective"
additional_ids = ["open collective"]
domain_regex = r"^(\w+\.)*opencollective\.com"
@registry.register
class BandCamp(BasicUsernameInDomainProvider):
id = "bandcamp"
additional_ids = ["band camp"]
username_regex = r"^(\w+)\.bandcamp\.com"
......@@ -2,10 +2,12 @@ import lxml.html
from rest_framework import serializers
def get_urls(content):
def get_links(content):
links = []
dom = lxml.html.fromstring(content)
for link in dom.xpath("//a/@href"):
for link in dom.xpath("//a"):
if not link.get("href"):
continue
links.append(link)
return links
......@@ -16,10 +18,23 @@ def extract_urls_from_attachments(attachments):
if attachment["type"] != "PropertyValue":
continue
urls = get_urls(attachment["value"])
if not urls:
links = get_links(attachment["value"])
if not links:
continue
row = {"summary": attachment["name"], "url": urls[0]}
row = {"summary": attachment["name"], "url": links[0].get("href")}
data.append(row)
return data
def extract_urls_from_summary(summary):
data = []
if not summary:
return []
links = get_links(summary)
for link in links:
row = {"summary": link.text, "url": link.get("href")}
data.append(row)
return data
......@@ -38,6 +53,7 @@ class AttachmentSerializer(serializers.Serializer):
class ActorSerializer(serializers.Serializer):
id = serializers.URLField()
summary = serializers.CharField(required=False)
url = serializers.URLField(required=False)
attachment = serializers.ListField(child=AttachmentSerializer(), min_length=1)
tag = serializers.ListField(child=TagSerializer(), min_length=0)
......@@ -57,6 +57,9 @@ class Activitypub(Source):
"links": activitypub.extract_urls_from_attachments(
serializer.validated_data["attachment"]
)
+ activitypub.extract_urls_from_summary(
serializer.validated_data["summary"]
)
}
return data
......@@ -78,9 +81,20 @@ class Webfinger(Source):
def result_to_retribute_profile(lookup_type, lookup, data):
path = settings.BASE_URL + "/compat"
now = timezone.now()
valid_means = [
(link, means.extract_from_url(link["url"])) for link in data["links"]
]
valid_means = []
for link in data["links"]:
link_data, mean_data = (link, means.extract_from_url(link["url"]))
if mean_data:
valid_means.append((link_data, mean_data))
continue
# we try to match by "guessing" the provider based on the link
# summary
for id, m in providers.registry:
if m.match_by_name(link_data["summary"]):
mean_data = {"provider": id, "id": link_data["url"]}
valid_means.append((link_data, mean_data))
break
valid_means = list(filter(lambda v: v[1] is not None, valid_means))
if not valid_means:
raise exceptions.MeansNotFound()
......
......@@ -41,3 +41,10 @@ def test_extract_urls_from_attachments():
]
assert activitypub.extract_urls_from_attachments(attachments) == expected
def test_extract_urls_from_summary():
summary = '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>'
expected = [{"summary": "Test", "url": "https://paypal.me/username"}]
assert activitypub.extract_urls_from_summary(summary) == expected
......@@ -14,6 +14,16 @@ from retribute_api.search import means
("https://www.paypal.me/username", {"provider": "paypal", "id": "username"}),
("https://fr.tipeee.com/username", {"provider": "tipeee", "id": "username"}),
("https://tipeee.com/username", {"provider": "tipeee", "id": "username"}),
(
"https://opencollective.com/username",
{"provider": "opencollective", "id": "username"},
),
("https://username.bandcamp.com", {"provider": "bandcamp", "id": "username"}),
("https://username.bandcamp.com", {"provider": "bandcamp", "id": "username"}),
(
"https://username.bandcamp.com/noop",
{"provider": "bandcamp", "id": "username"},
),
("https://patreon.com/username/nope", None),
],
)
......
......@@ -23,6 +23,7 @@ async def test_webfinger_source(mocker, session, responses, dummycache):
actor_response = {
"id": "https://test.domain",
"tag": [{"type": "Hashtag", "name": "#helo"}],
"summary": '<p>This is a link to my Paypal account too: <a href="https://paypal.me/username">Test</a>',
"attachment": [
{
"type": "PropertyValue",
......@@ -34,6 +35,11 @@ async def test_webfinger_source(mocker, session, responses, dummycache):
"name": "Tip me on Ko-Fi",
"value": '<a href="https://ko-fi.com/username" rel="me nofollow noopener">Test</a>',
},
{
"type": "PropertyValue",
"name": "Bandcamp",
"value": '<a href="https://custom.domain" rel="me nofollow noopener">Test</a>',
},
],
}
responses.get("https://domain.test/users/user", payload=actor_response)
......@@ -42,6 +48,8 @@ async def test_webfinger_source(mocker, session, responses, dummycache):
"links": [
{"summary": "Support me on Patreon", "url": "https://patreon.com/username"},
{"summary": "Tip me on Ko-Fi", "url": "https://ko-fi.com/username"},
{"summary": "Bandcamp", "url": "https://custom.domain"},
{"summary": "Test", "url": "https://paypal.me/username"},
]
}
......@@ -67,6 +75,7 @@ def test_source_result_to_retribute_profile(settings, now):
"links": [
{"summary": "Support me on Patreon", "url": "https://patreon.com/username"},
{"summary": "Tip me on Ko-Fi", "url": "https://ko-fi.com/username"},
{"summary": "Bandcamp", "url": "https://custom.domain/username"},
]
}
......@@ -81,16 +90,23 @@ def test_source_result_to_retribute_profile(settings, now):
"provider": "patreon",
"id": "username",
"summary": "Support me on Patreon",
"weight": 1,
"weight": 2,
"url": "https://patreon.com/username",
},
{
"provider": "ko-fi",
"summary": "Tip me on Ko-Fi",
"id": "username",
"weight": 0,
"weight": 1,
"url": "https://ko-fi.com/username",
},
{
"provider": "bandcamp",
"id": "https://custom.domain/username",
"summary": "Bandcamp",
"weight": 0,
"url": "https://custom.domain/username",
},
],
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment