Skip to content
Snippets Groups Projects
Commit e5b46402 authored by Eliot Berriot's avatar Eliot Berriot
Browse files

Fix #308: Licenses

parent e97f1f0e
Branches
Tags
No related merge requests found
Showing
with 5163 additions and 7 deletions
......@@ -94,3 +94,4 @@ docs/swagger
_build
front/src/translations.json
front/locales/en_US/LC_MESSAGES/app.po
*.prof
......@@ -19,6 +19,7 @@ router.register(r"libraries", views.LibraryViewSet, "libraries")
router.register(r"listen", views.ListenViewSet, "listen")
router.register(r"artists", views.ArtistViewSet, "artists")
router.register(r"albums", views.AlbumViewSet, "albums")
router.register(r"licenses", views.LicenseViewSet, "licenses")
router.register(r"playlists", playlists_views.PlaylistViewSet, "playlists")
router.register(
r"playlist-tracks", playlists_views.PlaylistTrackViewSet, "playlist-tracks"
......
......@@ -731,6 +731,8 @@ class TrackSerializer(MusicEntitySerializer):
position = serializers.IntegerField(min_value=0, allow_null=True, required=False)
artists = serializers.ListField(child=ArtistSerializer(), min_length=1)
album = AlbumSerializer()
license = serializers.URLField(allow_null=True, required=False)
copyright = serializers.CharField(allow_null=True, required=False)
def to_representation(self, instance):
d = {
......@@ -740,6 +742,10 @@ class TrackSerializer(MusicEntitySerializer):
"published": instance.creation_date.isoformat(),
"musicbrainzId": str(instance.mbid) if instance.mbid else None,
"position": instance.position,
"license": instance.local_license["identifiers"][0]
if instance.local_license
else None,
"copyright": instance.copyright if instance.copyright else None,
"artists": [
ArtistSerializer(
instance.artist, context={"include_ap_context": False}
......
......@@ -4,9 +4,9 @@ import factory
from funkwhale_api.factories import ManyToManyFromList, registry
from funkwhale_api.federation import factories as federation_factories
from funkwhale_api.music import licenses
from funkwhale_api.users import factories as users_factories
SAMPLES_PATH = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"tests",
......@@ -30,6 +30,29 @@ def playable_factory(field):
return inner
def deduce_from_conf(field):
@factory.lazy_attribute
def inner(self):
return licenses.LICENSES_BY_ID[self.code][field]
return inner
@registry.register
class LicenseFactory(factory.django.DjangoModelFactory):
code = "cc-by-4.0"
url = deduce_from_conf("url")
commercial = deduce_from_conf("commercial")
redistribute = deduce_from_conf("redistribute")
copyleft = deduce_from_conf("copyleft")
attribution = deduce_from_conf("attribution")
derivative = deduce_from_conf("derivative")
class Meta:
model = "music.License"
django_get_or_create = ("code",)
@registry.register
class ArtistFactory(factory.django.DjangoModelFactory):
name = factory.Faker("name")
......@@ -70,6 +93,15 @@ class TrackFactory(factory.django.DjangoModelFactory):
class Meta:
model = "music.Track"
@factory.post_generation
def license(self, created, extracted, **kwargs):
if not created:
return
if extracted:
self.license = LicenseFactory(code=extracted)
self.save()
@registry.register
class UploadFactory(factory.django.DjangoModelFactory):
......
......@@ -34,6 +34,7 @@ class TrackFilter(filters.FilterSet):
"playable": ["exact"],
"artist": ["exact"],
"album": ["exact"],
"license": ["exact"],
}
def filter_playable(self, queryset, name, value):
......
import logging
import re
from django.db import transaction
from . import models
logger = logging.getLogger(__name__)
MODEL_FIELDS = [
"redistribute",
"derivative",
"attribution",
"copyleft",
"commercial",
"url",
]
@transaction.atomic
def load(data):
"""
Load/update database objects with our hardcoded data
"""
existing = models.License.objects.all()
existing_by_code = {e.code: e for e in existing}
to_create = []
for row in data:
try:
license = existing_by_code[row["code"]]
except KeyError:
logger.info("Loading new license: {}".format(row["code"]))
to_create.append(
models.License(code=row["code"], **{f: row[f] for f in MODEL_FIELDS})
)
else:
logger.info("Updating license: {}".format(row["code"]))
stored = [getattr(license, f) for f in MODEL_FIELDS]
wanted = [row[f] for f in MODEL_FIELDS]
if wanted == stored:
continue
# the object in database needs an update
for f in MODEL_FIELDS:
setattr(license, f, row[f])
license.save()
models.License.objects.bulk_create(to_create)
return sorted(models.License.objects.all(), key=lambda o: o.code)
_cache = None
def match(*values):
"""
Given a string, extracted from music file tags, return corresponding License
instance, if found
"""
global _cache
for value in values:
if not value:
continue
# we are looking for the first url in our value
# This regex is not perfect, but it's good enough for now
urls = re.findall(
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
value,
)
if not urls:
logger.debug('Impossible to guess license from string "{}"'.format(value))
continue
url = urls[0]
if _cache:
existing = _cache
else:
existing = load(LICENSES)
_cache = existing
for license in existing:
if license.conf is None:
continue
for i in license.conf["identifiers"]:
if match_urls(url, i):
return license
def match_urls(*urls):
"""
We want to ensure the two url match but don't care for protocol
or trailing slashes
"""
urls = [u.rstrip("/") for u in urls]
urls = [u.lstrip("http://") for u in urls]
urls = [u.lstrip("https://") for u in urls]
return len(set(urls)) == 1
def get_cc_license(version, perks, country=None, country_name=None):
if len(perks) == 0:
raise ValueError("No perks!")
url_template = "//creativecommons.org/licenses/{type}/{version}/"
code_parts = []
name_parts = []
perks_data = [
("by", "Attribution"),
("nc", "NonCommercial"),
("sa", "ShareAlike"),
("nd", "NoDerivatives"),
]
for perk, name in perks_data:
if perk in perks:
code_parts.append(perk)
name_parts.append(name)
url = url_template.format(version=version, type="-".join(code_parts))
code_parts.append(version)
name = "Creative commons - {perks} {version}".format(
perks="-".join(name_parts), version=version
)
if country:
code_parts.append(country)
name += " {}".format(country_name)
url += country + "/"
data = {
"name": name,
"code": "cc-{}".format("-".join(code_parts)),
"redistribute": True,
"commercial": "nc" not in perks,
"derivative": "nd" not in perks,
"copyleft": "sa" in perks,
"attribution": "by" in perks,
"url": "https:" + url,
"identifiers": ["http:" + url],
}
return data
COUNTRIES = {
"ar": "Argentina",
"au": "Australia",
"at": "Austria",
"be": "Belgium",
"br": "Brazil",
"bg": "Bulgaria",
"ca": "Canada",
"cl": "Chile",
"cn": "China Mainland",
"co": "Colombia",
"cr": "Costa Rica",
"hr": "Croatia",
"cz": "Czech Republic",
"dk": "Denmark",
"ec": "Ecuador",
"eg": "Egypt",
"ee": "Estonia",
"fi": "Finland",
"fr": "France",
"de": "Germany",
"gr": "Greece",
"gt": "Guatemala",
"hk": "Hong Kong",
"hu": "Hungary",
"igo": "IGO",
"in": "India",
"ie": "Ireland",
"il": "Israel",
"it": "Italy",
"jp": "Japan",
"lu": "Luxembourg",
"mk": "Macedonia",
"my": "Malaysia",
"mt": "Malta",
"mx": "Mexico",
"nl": "Netherlands",
"nz": "New Zealand",
"no": "Norway",
"pe": "Peru",
"ph": "Philippines",
"pl": "Poland",
"pt": "Portugal",
"pr": "Puerto Rico",
"ro": "Romania",
"rs": "Serbia",
"sg": "Singapore",
"si": "Slovenia",
"za": "South Africa",
"kr": "South Korea",
"es": "Spain",
"se": "Sweden",
"ch": "Switzerland",
"tw": "Taiwan",
"th": "Thailand",
"uk": "UK: England & Wales",
"scotland": "UK: Scotland",
"ug": "Uganda",
"us": "United States",
"ve": "Venezuela",
"vn": "Vietnam",
}
CC_30_COUNTRIES = [
"at",
"au",
"br",
"ch",
"cl",
"cn",
"cr",
"cz",
"de",
"ec",
"ee",
"eg",
"es",
"fr",
"gr",
"gt",
"hk",
"hr",
"ie",
"igo",
"it",
"lu",
"nl",
"no",
"nz",
"ph",
"pl",
"pr",
"pt",
"ro",
"rs",
"sg",
"th",
"tw",
"ug",
"us",
"ve",
"vn",
"za",
]
CC_25_COUNTRIES = [
"ar",
"bg",
"ca",
"co",
"dk",
"hu",
"il",
"in",
"mk",
"mt",
"mx",
"my",
"pe",
"scotland",
]
LICENSES = [
# a non-exhaustive list: http://musique-libre.org/doc/le-tableau-des-licences-libres-et-ouvertes-de-dogmazic/
{
"code": "cc0-1.0",
"name": "CC0 - Public domain",
"redistribute": True,
"derivative": True,
"commercial": True,
"attribution": False,
"copyleft": False,
"url": "https://creativecommons.org/publicdomain/zero/1.0/",
"identifiers": [
# note the http here.
# This is the kind of URL that is embedded in music files metadata
"http://creativecommons.org/publicdomain/zero/1.0/"
],
},
# Creative commons version 4.0
get_cc_license(version="4.0", perks=["by"]),
get_cc_license(version="4.0", perks=["by", "sa"]),
get_cc_license(version="4.0", perks=["by", "nc"]),
get_cc_license(version="4.0", perks=["by", "nc", "sa"]),
get_cc_license(version="4.0", perks=["by", "nc", "nd"]),
get_cc_license(version="4.0", perks=["by", "nd"]),
# Creative commons version 3.0
get_cc_license(version="3.0", perks=["by"]),
get_cc_license(version="3.0", perks=["by", "sa"]),
get_cc_license(version="3.0", perks=["by", "nc"]),
get_cc_license(version="3.0", perks=["by", "nc", "sa"]),
get_cc_license(version="3.0", perks=["by", "nc", "nd"]),
get_cc_license(version="3.0", perks=["by", "nd"]),
# Creative commons version 2.5
get_cc_license(version="2.5", perks=["by"]),
get_cc_license(version="2.5", perks=["by", "sa"]),
get_cc_license(version="2.5", perks=["by", "nc"]),
get_cc_license(version="2.5", perks=["by", "nc", "sa"]),
get_cc_license(version="2.5", perks=["by", "nc", "nd"]),
get_cc_license(version="2.5", perks=["by", "nd"]),
# Creative commons version 2.0
get_cc_license(version="2.0", perks=["by"]),
get_cc_license(version="2.0", perks=["by", "sa"]),
get_cc_license(version="2.0", perks=["by", "nc"]),
get_cc_license(version="2.0", perks=["by", "nc", "sa"]),
get_cc_license(version="2.0", perks=["by", "nc", "nd"]),
get_cc_license(version="2.0", perks=["by", "nd"]),
# Creative commons version 1.0
get_cc_license(version="1.0", perks=["by"]),
get_cc_license(version="1.0", perks=["by", "sa"]),
get_cc_license(version="1.0", perks=["by", "nc"]),
get_cc_license(version="1.0", perks=["by", "nc", "sa"]),
get_cc_license(version="1.0", perks=["by", "nc", "nd"]),
get_cc_license(version="1.0", perks=["by", "nd"]),
]
# generate ported (by country) CC licenses:
for country in CC_30_COUNTRIES:
name = COUNTRIES[country]
LICENSES += [
get_cc_license(version="3.0", perks=["by"], country=country, country_name=name),
get_cc_license(
version="3.0", perks=["by", "sa"], country=country, country_name=name
),
get_cc_license(
version="3.0", perks=["by", "nc"], country=country, country_name=name
),
get_cc_license(
version="3.0", perks=["by", "nc", "sa"], country=country, country_name=name
),
get_cc_license(
version="3.0", perks=["by", "nc", "nd"], country=country, country_name=name
),
get_cc_license(
version="3.0", perks=["by", "nd"], country=country, country_name=name
),
]
for country in CC_25_COUNTRIES:
name = COUNTRIES[country]
LICENSES += [
get_cc_license(version="2.5", perks=["by"], country=country, country_name=name),
get_cc_license(
version="2.5", perks=["by", "sa"], country=country, country_name=name
),
get_cc_license(
version="2.5", perks=["by", "nc"], country=country, country_name=name
),
get_cc_license(
version="2.5", perks=["by", "nc", "sa"], country=country, country_name=name
),
get_cc_license(
version="2.5", perks=["by", "nc", "nd"], country=country, country_name=name
),
get_cc_license(
version="2.5", perks=["by", "nd"], country=country, country_name=name
),
]
LICENSES = sorted(LICENSES, key=lambda l: l["code"])
LICENSES_BY_ID = {l["code"]: l for l in LICENSES}
from django.core.management.base import BaseCommand, CommandError
import requests.exceptions
from funkwhale_api.music import licenses
class Command(BaseCommand):
help = "Check that specified licenses URLs are actually reachable"
def handle(self, *args, **options):
errored = []
objs = licenses.LICENSES
total = len(objs)
for i, data in enumerate(objs):
self.stderr.write("{}/{} Checking {}...".format(i + 1, total, data["code"]))
response = requests.get(data["url"])
try:
response.raise_for_status()
except requests.exceptions.RequestException:
self.stderr.write("!!! Error while fetching {}!".format(data["code"]))
errored.append((data, response))
if errored:
self.stdout.write("{} licenses were not reachable!".format(len(errored)))
for row, response in errored:
self.stdout.write(
"- {}: error {} at url {}".format(
row["code"], response.status_code, row["url"]
)
)
raise CommandError()
else:
self.stdout.write("All licenses are valid and reachable :)")
......@@ -25,10 +25,18 @@ def get_id3_tag(f, k):
if k == "pictures":
return f.tags.getall("APIC")
# First we try to grab the standard key
possible_attributes = [("text", True), ("url", False)]
for attr, select_first in possible_attributes:
try:
return f.tags[k].text[0]
v = getattr(f.tags[k], attr)
if select_first:
v = v[0]
return v
except KeyError:
pass
break
except AttributeError:
continue
# then we fallback on parsing non standard tags
all_tags = f.tags.getall("TXXX")
try:
......@@ -162,6 +170,8 @@ CONF = {
"musicbrainz_artistid": {},
"musicbrainz_albumartistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
"license": {},
"copyright": {},
},
},
"OggVorbis": {
......@@ -183,6 +193,8 @@ CONF = {
"musicbrainz_artistid": {},
"musicbrainz_albumartistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
"license": {},
"copyright": {},
},
},
"OggTheora": {
......@@ -201,6 +213,9 @@ CONF = {
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
"musicbrainz_recordingid": {"field": "MusicBrainz Track Id"},
# somehow, I cannot successfully create an ogg theora file
# with the proper license field
# "license": {"field": "license"},
},
},
"MP3": {
......@@ -221,6 +236,8 @@ CONF = {
"getter": get_mp3_recording_id,
},
"pictures": {},
"license": {"field": "WCOP"},
"copyright": {"field": "TCOP"},
},
},
"FLAC": {
......@@ -242,6 +259,8 @@ CONF = {
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
"test": {},
"pictures": {},
"license": {},
"copyright": {},
},
},
}
......@@ -257,6 +276,8 @@ ALL_FIELDS = [
"musicbrainz_artistid",
"musicbrainz_albumartistid",
"musicbrainz_recordingid",
"license",
"copyright",
]
......
# Generated by Django 2.0.9 on 2018-11-27 03:25
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('music', '0033_auto_20181023_1837'),
]
operations = [
migrations.CreateModel(
name='License',
fields=[
('code', models.CharField(max_length=100, primary_key=True, serialize=False)),
('url', models.URLField(max_length=500)),
('copyleft', models.BooleanField()),
('commercial', models.BooleanField()),
('attribution', models.BooleanField()),
('derivative', models.BooleanField()),
('redistribute', models.BooleanField()),
],
),
migrations.AlterField(
model_name='uploadversion',
name='mimetype',
field=models.CharField(choices=[('audio/ogg', 'ogg'), ('audio/mpeg', 'mp3'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50),
),
migrations.AddField(
model_name='track',
name='license',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='tracks', to='music.License'),
),
]
# Generated by Django 2.0.9 on 2018-12-03 15:15
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('music', '0034_auto_20181127_0325'),
]
operations = [
migrations.AddField(
model_name='track',
name='copyright',
field=models.CharField(blank=True, max_length=500, null=True),
),
migrations.AlterField(
model_name='track',
name='license',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='tracks', to='music.License'),
),
]
......@@ -113,6 +113,33 @@ class APIModelMixin(models.Model):
return super().save(**kwargs)
class License(models.Model):
code = models.CharField(primary_key=True, max_length=100)
url = models.URLField(max_length=500)
# if true, license is a copyleft license, meaning that derivative
# work must be shared under the same license
copyleft = models.BooleanField()
# if true, commercial use of the work is allowed
commercial = models.BooleanField()
# if true, attribution to the original author is required when reusing
# the work
attribution = models.BooleanField()
# if true, derivative work are allowed
derivative = models.BooleanField()
# if true, redistribution of the wor is allowed
redistribute = models.BooleanField()
@property
def conf(self):
from . import licenses
for row in licenses.LICENSES:
if self.code == row["code"]:
return row
logger.warning("%s do not match any registered license", self.code)
class ArtistQuerySet(models.QuerySet):
def with_albums_count(self):
return self.annotate(_albums_count=models.Count("albums"))
......@@ -430,6 +457,14 @@ class Track(APIModelMixin):
work = models.ForeignKey(
Work, related_name="tracks", null=True, blank=True, on_delete=models.CASCADE
)
license = models.ForeignKey(
License,
null=True,
blank=True,
on_delete=models.DO_NOTHING,
related_name="tracks",
)
copyright = models.CharField(max_length=500, null=True, blank=True)
federation_namespace = "tracks"
musicbrainz_model = "recording"
api = musicbrainz.api.recordings
......@@ -547,6 +582,17 @@ class Track(APIModelMixin):
def listen_url(self):
return reverse("api:v1:listen-detail", kwargs={"uuid": self.uuid})
@property
def local_license(self):
"""
Since license primary keys are strings, and we can get the data
from our hardcoded licenses.LICENSES list, there is no need
for extra SQL joins / queries.
"""
from . import licenses
return licenses.LICENSES_BY_ID.get(self.license_id)
class UploadQuerySet(models.QuerySet):
def playable_by(self, actor, include=True):
......
......@@ -14,6 +14,21 @@ from . import filters, models, tasks
cover_field = VersatileImageFieldSerializer(allow_null=True, sizes="square")
class LicenseSerializer(serializers.Serializer):
id = serializers.SerializerMethodField()
url = serializers.URLField()
code = serializers.CharField()
name = serializers.CharField()
redistribute = serializers.BooleanField()
derivative = serializers.BooleanField()
commercial = serializers.BooleanField()
attribution = serializers.BooleanField()
copyleft = serializers.BooleanField()
def get_id(self, obj):
return obj["identifiers"][0]
class ArtistAlbumSerializer(serializers.ModelSerializer):
tracks_count = serializers.SerializerMethodField()
cover = cover_field
......@@ -76,6 +91,8 @@ class AlbumTrackSerializer(serializers.ModelSerializer):
"uploads",
"listen_url",
"duration",
"copyright",
"license",
)
def get_uploads(self, obj):
......@@ -179,6 +196,8 @@ class TrackSerializer(serializers.ModelSerializer):
"lyrics",
"uploads",
"listen_url",
"copyright",
"license",
)
def get_lyrics(self, obj):
......
......@@ -16,6 +16,7 @@ from funkwhale_api.federation import routes
from funkwhale_api.federation import library as lb
from funkwhale_api.taskapp import celery
from . import licenses
from . import lyrics as lyrics_utils
from . import models
from . import metadata
......@@ -276,6 +277,8 @@ def federation_audio_track_to_metadata(payload):
"artist": payload["artists"][0]["name"],
"album_artist": payload["album"]["artists"][0]["name"],
"date": payload["album"].get("released"),
"license": payload.get("license"),
"copyright": payload.get("copyright"),
# musicbrainz
"musicbrainz_recordingid": str(musicbrainz_recordingid)
if musicbrainz_recordingid
......@@ -496,6 +499,8 @@ def get_track_from_import_metadata(data):
"position": track_number,
"fid": track_fid,
"from_activity_id": from_activity_id,
"license": licenses.match(data.get("license"), data.get("copyright")),
"copyright": data.get("copyright"),
}
if data.get("fdate"):
defaults["creation_date"] = data.get("fdate")
......
......@@ -22,7 +22,7 @@ from funkwhale_api.federation.authentication import SignatureAuthentication
from funkwhale_api.federation import api_serializers as federation_api_serializers
from funkwhale_api.federation import routes
from . import filters, models, serializers, tasks, utils
from . import filters, licenses, models, serializers, tasks, utils
logger = logging.getLogger(__name__)
......@@ -481,3 +481,28 @@ class Search(views.APIView):
)
return qs.filter(query_obj)[: self.max_results]
class LicenseViewSet(viewsets.ReadOnlyModelViewSet):
permission_classes = [common_permissions.ConditionalAuthentication]
serializer_class = serializers.LicenseSerializer
queryset = models.License.objects.all().order_by("code")
lookup_value_regex = ".*"
def get_queryset(self):
# ensure our licenses are up to date in DB
licenses.load(licenses.LICENSES)
return super().get_queryset()
def get_serializer(self, *args, **kwargs):
if len(args) == 0:
return super().get_serializer(*args, **kwargs)
# our serializer works with license dict, not License instances
# so we pass those instead
instance_or_qs = args[0]
try:
first_arg = instance_or_qs.conf
except AttributeError:
first_arg = [i.conf for i in instance_or_qs if i.conf]
return super().get_serializer(*((first_arg,) + args[1:]), **kwargs)
......@@ -12,3 +12,4 @@ pytest-xdist
pytest-cov
pytest-env
requests-mock
pytest-profiling
......@@ -632,7 +632,7 @@ def test_activity_pub_album_serializer_to_ap(factories):
def test_activity_pub_track_serializer_to_ap(factories):
track = factories["music.Track"]()
track = factories["music.Track"](license="cc-by-4.0", copyright="test")
expected = {
"@context": serializers.AP_CONTEXT,
"published": track.creation_date.isoformat(),
......@@ -641,6 +641,8 @@ def test_activity_pub_track_serializer_to_ap(factories):
"id": track.fid,
"name": track.title,
"position": track.position,
"license": track.license.conf["identifiers"][0],
"copyright": "test",
"artists": [
serializers.ArtistSerializer(
track.artist, context={"include_ap_context": False}
......
This diff is collapsed.
No preview for this file type
No preview for this file type
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment