Commit 78b68737 authored by Eliot Berriot's avatar Eliot Berriot 💬

Use new nodeinfo endpoint and publish result using pages

parent 279a56c9
Pipeline #952 failed with stages
in 19 seconds
instances:
pages:
# Simply publish the json file as an artifact
stage: build
image: python:3
before_script:
- mkdir artifacts
artifacts:
name: "instances"
paths:
- data.json
script: python collect.py instances.txt data.json
- artifacts
script: python collect.py instances.txt artifacts/
only:
- master@funkwhale/instances
......@@ -17,13 +17,22 @@ class UnreachableInstance(Exception):
def main():
parser = argparse.ArgumentParser()
parser.add_argument('list_file')
parser.add_argument('data_file')
parser.add_argument('output_prefix')
args = parser.parse_args()
instances = get_instances(args.list_file)
print('Loaded {} instances from {}'.format(len(instances), args.list_file))
data = get_data(instances)
with open(args.data_file, 'w') as f:
all_file = '{}all.json'.format(args.output_prefix)
open_signup_file = '{}open-signup.json'.format(args.output_prefix)
with open(all_file, 'w') as f:
f.write(json.dumps(data, sort_keys=True, indent=2))
with open(open_signup_file, 'w') as f:
d = data.copy()
d['instances'] = [i for i in d['instances'] if i['info'].get('openRegistrations')]
d['count'] = len(d['instances'])
f.write(json.dumps(d, sort_keys=True, indent=2))
def get_instances(list_file):
......@@ -37,27 +46,53 @@ def get_instances(list_file):
def get_data(instances):
data = {
'fetchDate': datetime.datetime.now().isoformat(),
'instances': []
}
for i, url in enumerate(instances):
print('[{}/{}] fetching {}'.format(i+1, len(instances), url))
data['instances'].append(get_instance_data(url))
d = get_instance_data(url)
if d:
data['instances'].append(d)
data['count'] = len(data['instances'])
return data
def get_nodeinfo_url(url):
well_known_url = '{}/.well-known/nodeinfo'.format(url)
data = request(
well_known_url,
headers={'Accept': 'application/jrd+json,application/json'})
try:
links = data['links']
except (TypeError, AttributeError):
raise InvalidPayload(data)
for link in links:
rel = link.get('rel')
href = link.get('href')
if href and rel == 'http://nodeinfo.diaspora.software/ns/schema/2.0':
return href
raise InvalidPayload('No nodeinfo url')
def get_instance_data(url):
stats_url = '{}/api/v1/instance/stats/'.format(url)
settings_url = '{}/api/v1/instance/settings/'.format(url)
try:
nodeinfo_url = get_nodeinfo_url(url)
except (InvalidPayload, UnreachableInstance) as e:
print('Error while fetching nodeinfo for {}: {}'.format(url, e))
return
parsed = urllib.parse.urlparse(url)
data = {
'name': parsed.netloc,
'fetchDate': datetime.datetime.now().isoformat(),
'nodeinfoUrl': nodeinfo_url,
'info': {}
}
try:
settings = get_settings_data(settings_url)
stats = get_stats_data(stats_url)
nodeinfo = get_nodeinfo_data(nodeinfo_url)
if not nodeinfo:
return
except (InvalidPayload, UnreachableInstance) as e:
print('Error while fetching {}: {}'.format(url, e))
data['fetchSuccess'] = False
......@@ -65,49 +100,182 @@ def get_instance_data(url):
else:
data['fetchSuccess'] = True
data['up'] = True
data.update(stats)
data['openRegistrations'] = settings['openRegistrations']
data['info']['shortDescription'] = settings['shortDescription']
data['info']['fullDescription'] = settings['fullDescription']
data['info'] = nodeinfo
return data
def get_settings_data(url):
data = request(url)
by_key = {s['identifier']: s for s in data}
nodeinfo_config = [
{
'field': 'software.name',
'to_python': str,
},
{
'field': 'software.version',
'to_python': str,
},
{
'field': 'openRegistrations',
'to_python': bool,
},
{
'field': 'users',
'nodeinfo_field': 'usage.users.total',
'to_python': int,
},
{
'field': 'shortDescription',
'nodeinfo_field': 'metadata.shortDescription',
'to_python': str,
},
{
'field': 'longDescription',
'nodeinfo_field': 'metadata.longDescription',
'to_python': str,
},
{
'field': 'name',
'nodeinfo_field': 'metadata.nodeName',
'to_python': str,
},
{
'field': 'library.artists',
'nodeinfo_field': 'metadata.library.artists.total',
'to_python': int,
'required': False,
},
{
'field': 'library.albums',
'nodeinfo_field': 'metadata.library.albums.total',
'to_python': int,
'required': False,
},
{
'field': 'library.tracks',
'nodeinfo_field': 'metadata.library.tracks.total',
'to_python': int,
'required': False,
},
{
'field': 'library.music.hours',
'nodeinfo_field': 'metadata.library.music.hours',
'to_python': int,
'required': False,
},
{
'field': 'library.federationNeedsApproval',
'nodeinfo_field': 'metadata.library.federationNeedsApproval',
'to_python': bool,
},
{
'field': 'library.federationEnabled',
'nodeinfo_field': 'metadata.library.federationEnabled',
'to_python': bool,
},
{
'field': 'library.anonymousCanListen',
'nodeinfo_field': 'metadata.library.anonymousCanListen',
'to_python': bool,
},
{
'field': 'usage.favorites.tracks',
'nodeinfo_field': 'metadata.usage.favorites.tracks.total',
'to_python': int,
'required': False,
},
{
'field': 'usage.favorites.listenings',
'nodeinfo_field': 'metadata.usage.listenings.total',
'to_python': int,
'required': False,
},
]
class ValidationError(Exception):
def __init__(self, field, message):
self.field = field
self.message = message
def recursive_set(data, key, value):
parts = key.split('.')
d = data
for i, part in enumerate(parts):
if i + 1 < len(parts):
d = d.setdefault(part, {})
else:
d[part] = value
def recursive_get(data, key):
NOT_FOUND = object()
value = data
for part in key.split('.'):
value = value[part]
return value
def handle_field(data, conf):
key = conf.get('nodeinfo_field', conf['field'])
try:
return {
'openRegistrations': by_key['users__registration_enabled']['value'],
'shortDescription': by_key['instance__short_description']['value'],
'fullDescription': by_key['instance__long_description']['value'],
}
except KeyError as e:
raise InvalidPayload(str(e))
raw_value = recursive_get(data, key)
except (KeyError, TypeError, AttributeError):
if conf.get('required', True):
raise ValidationError(conf['field'], 'This field is required')
return None
if raw_value is None:
if conf.get('allow_null', False):
return
else:
raise ValidationError(conf['field'], 'Null values not allowed')
def get_stats_data(url):
data = request(url)
cleaner = conf.get('to_python', lambda v: v)
try:
return {
'users': data['users'],
'tracks': data['tracks'],
'albums': data['albums'],
'artists': data['artists'],
'trackFavorites': data['track_favorites'],
'listenings': data['listenings'],
'musicDuration': data['music_duration'],
}
except KeyError as e:
raise InvalidPayload(str())
def request(url):
python_value = cleaner(raw_value)
except (TypeError, ValueError):
raise ValidationError(conf['field'], 'Invalid format for {}'.format(raw_value))
return python_value
class ValidationErrorGroup(ValidationError):
def __init__(self, errors):
self.errors = errors
def clean_data(data, fields):
errors = {}
cleaned_data = {}
for field in fields:
try:
value = handle_field(data, field)
except ValidationError as e:
errors[e.field] = e
raise
else:
recursive_set(cleaned_data, field['field'], value)
if errors:
raise ValidationErrorGroup(errors)
return cleaned_data
def get_nodeinfo_data(url):
data = request(url)
return clean_data(data, nodeinfo_config)
def request(url, headers={}):
headers.update({
'User-Agent': 'funkwhale/instances bot <https://code.eliotberriot.com/funkwhale/instances>'
})
try:
req = urllib.request.Request(
url,
headers={
'User-Agent': 'funkwhale/instances bot <https://code.eliotberriot.com/funkwhale/instances>'})
headers=headers)
with urllib.request.urlopen(req) as response:
assert response.status == 200, 'Wrong backend response ({})'.format(reponse.status)
return json.loads(response.read().decode('utf-8'))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment