Commit e5b46402 authored by Eliot Berriot's avatar Eliot Berriot 💬

Fix #308: Licenses

parent e97f1f0e
......@@ -94,3 +94,4 @@ docs/swagger
_build
front/src/translations.json
front/locales/en_US/LC_MESSAGES/app.po
*.prof
......@@ -19,6 +19,7 @@ router.register(r"libraries", views.LibraryViewSet, "libraries")
router.register(r"listen", views.ListenViewSet, "listen")
router.register(r"artists", views.ArtistViewSet, "artists")
router.register(r"albums", views.AlbumViewSet, "albums")
router.register(r"licenses", views.LicenseViewSet, "licenses")
router.register(r"playlists", playlists_views.PlaylistViewSet, "playlists")
router.register(
r"playlist-tracks", playlists_views.PlaylistTrackViewSet, "playlist-tracks"
......
......@@ -731,6 +731,8 @@ class TrackSerializer(MusicEntitySerializer):
position = serializers.IntegerField(min_value=0, allow_null=True, required=False)
artists = serializers.ListField(child=ArtistSerializer(), min_length=1)
album = AlbumSerializer()
license = serializers.URLField(allow_null=True, required=False)
copyright = serializers.CharField(allow_null=True, required=False)
def to_representation(self, instance):
d = {
......@@ -740,6 +742,10 @@ class TrackSerializer(MusicEntitySerializer):
"published": instance.creation_date.isoformat(),
"musicbrainzId": str(instance.mbid) if instance.mbid else None,
"position": instance.position,
"license": instance.local_license["identifiers"][0]
if instance.local_license
else None,
"copyright": instance.copyright if instance.copyright else None,
"artists": [
ArtistSerializer(
instance.artist, context={"include_ap_context": False}
......
......@@ -4,9 +4,9 @@ import factory
from funkwhale_api.factories import ManyToManyFromList, registry
from funkwhale_api.federation import factories as federation_factories
from funkwhale_api.music import licenses
from funkwhale_api.users import factories as users_factories
SAMPLES_PATH = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"tests",
......@@ -30,6 +30,29 @@ def playable_factory(field):
return inner
def deduce_from_conf(field):
@factory.lazy_attribute
def inner(self):
return licenses.LICENSES_BY_ID[self.code][field]
return inner
@registry.register
class LicenseFactory(factory.django.DjangoModelFactory):
code = "cc-by-4.0"
url = deduce_from_conf("url")
commercial = deduce_from_conf("commercial")
redistribute = deduce_from_conf("redistribute")
copyleft = deduce_from_conf("copyleft")
attribution = deduce_from_conf("attribution")
derivative = deduce_from_conf("derivative")
class Meta:
model = "music.License"
django_get_or_create = ("code",)
@registry.register
class ArtistFactory(factory.django.DjangoModelFactory):
name = factory.Faker("name")
......@@ -70,6 +93,15 @@ class TrackFactory(factory.django.DjangoModelFactory):
class Meta:
model = "music.Track"
@factory.post_generation
def license(self, created, extracted, **kwargs):
if not created:
return
if extracted:
self.license = LicenseFactory(code=extracted)
self.save()
@registry.register
class UploadFactory(factory.django.DjangoModelFactory):
......
......@@ -34,6 +34,7 @@ class TrackFilter(filters.FilterSet):
"playable": ["exact"],
"artist": ["exact"],
"album": ["exact"],
"license": ["exact"],
}
def filter_playable(self, queryset, name, value):
......
This diff is collapsed.
from django.core.management.base import BaseCommand, CommandError
import requests.exceptions
from funkwhale_api.music import licenses
class Command(BaseCommand):
help = "Check that specified licenses URLs are actually reachable"
def handle(self, *args, **options):
errored = []
objs = licenses.LICENSES
total = len(objs)
for i, data in enumerate(objs):
self.stderr.write("{}/{} Checking {}...".format(i + 1, total, data["code"]))
response = requests.get(data["url"])
try:
response.raise_for_status()
except requests.exceptions.RequestException:
self.stderr.write("!!! Error while fetching {}!".format(data["code"]))
errored.append((data, response))
if errored:
self.stdout.write("{} licenses were not reachable!".format(len(errored)))
for row, response in errored:
self.stdout.write(
"- {}: error {} at url {}".format(
row["code"], response.status_code, row["url"]
)
)
raise CommandError()
else:
self.stdout.write("All licenses are valid and reachable :)")
......@@ -25,10 +25,18 @@ def get_id3_tag(f, k):
if k == "pictures":
return f.tags.getall("APIC")
# First we try to grab the standard key
try:
return f.tags[k].text[0]
except KeyError:
pass
possible_attributes = [("text", True), ("url", False)]
for attr, select_first in possible_attributes:
try:
v = getattr(f.tags[k], attr)
if select_first:
v = v[0]
return v
except KeyError:
break
except AttributeError:
continue
# then we fallback on parsing non standard tags
all_tags = f.tags.getall("TXXX")
try:
......@@ -162,6 +170,8 @@ CONF = {
"musicbrainz_artistid": {},
"musicbrainz_albumartistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
"license": {},
"copyright": {},
},
},
"OggVorbis": {
......@@ -183,6 +193,8 @@ CONF = {
"musicbrainz_artistid": {},
"musicbrainz_albumartistid": {},
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
"license": {},
"copyright": {},
},
},
"OggTheora": {
......@@ -201,6 +213,9 @@ CONF = {
"musicbrainz_artistid": {"field": "MusicBrainz Artist Id"},
"musicbrainz_albumartistid": {"field": "MusicBrainz Album Artist Id"},
"musicbrainz_recordingid": {"field": "MusicBrainz Track Id"},
# somehow, I cannot successfully create an ogg theora file
# with the proper license field
# "license": {"field": "license"},
},
},
"MP3": {
......@@ -221,6 +236,8 @@ CONF = {
"getter": get_mp3_recording_id,
},
"pictures": {},
"license": {"field": "WCOP"},
"copyright": {"field": "TCOP"},
},
},
"FLAC": {
......@@ -242,6 +259,8 @@ CONF = {
"musicbrainz_recordingid": {"field": "musicbrainz_trackid"},
"test": {},
"pictures": {},
"license": {},
"copyright": {},
},
},
}
......@@ -257,6 +276,8 @@ ALL_FIELDS = [
"musicbrainz_artistid",
"musicbrainz_albumartistid",
"musicbrainz_recordingid",
"license",
"copyright",
]
......
# Generated by Django 2.0.9 on 2018-11-27 03:25
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('music', '0033_auto_20181023_1837'),
]
operations = [
migrations.CreateModel(
name='License',
fields=[
('code', models.CharField(max_length=100, primary_key=True, serialize=False)),
('url', models.URLField(max_length=500)),
('copyleft', models.BooleanField()),
('commercial', models.BooleanField()),
('attribution', models.BooleanField()),
('derivative', models.BooleanField()),
('redistribute', models.BooleanField()),
],
),
migrations.AlterField(
model_name='uploadversion',
name='mimetype',
field=models.CharField(choices=[('audio/ogg', 'ogg'), ('audio/mpeg', 'mp3'), ('audio/x-flac', 'flac'), ('audio/flac', 'flac')], max_length=50),
),
migrations.AddField(
model_name='track',
name='license',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='tracks', to='music.License'),
),
]
# Generated by Django 2.0.9 on 2018-12-03 15:15
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('music', '0034_auto_20181127_0325'),
]
operations = [
migrations.AddField(
model_name='track',
name='copyright',
field=models.CharField(blank=True, max_length=500, null=True),
),
migrations.AlterField(
model_name='track',
name='license',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name='tracks', to='music.License'),
),
]
......@@ -113,6 +113,33 @@ class APIModelMixin(models.Model):
return super().save(**kwargs)
class License(models.Model):
code = models.CharField(primary_key=True, max_length=100)
url = models.URLField(max_length=500)
# if true, license is a copyleft license, meaning that derivative
# work must be shared under the same license
copyleft = models.BooleanField()
# if true, commercial use of the work is allowed
commercial = models.BooleanField()
# if true, attribution to the original author is required when reusing
# the work
attribution = models.BooleanField()
# if true, derivative work are allowed
derivative = models.BooleanField()
# if true, redistribution of the wor is allowed
redistribute = models.BooleanField()
@property
def conf(self):
from . import licenses
for row in licenses.LICENSES:
if self.code == row["code"]:
return row
logger.warning("%s do not match any registered license", self.code)
class ArtistQuerySet(models.QuerySet):
def with_albums_count(self):
return self.annotate(_albums_count=models.Count("albums"))
......@@ -430,6 +457,14 @@ class Track(APIModelMixin):
work = models.ForeignKey(
Work, related_name="tracks", null=True, blank=True, on_delete=models.CASCADE
)
license = models.ForeignKey(
License,
null=True,
blank=True,
on_delete=models.DO_NOTHING,
related_name="tracks",
)
copyright = models.CharField(max_length=500, null=True, blank=True)
federation_namespace = "tracks"
musicbrainz_model = "recording"
api = musicbrainz.api.recordings
......@@ -547,6 +582,17 @@ class Track(APIModelMixin):
def listen_url(self):
return reverse("api:v1:listen-detail", kwargs={"uuid": self.uuid})
@property
def local_license(self):
"""
Since license primary keys are strings, and we can get the data
from our hardcoded licenses.LICENSES list, there is no need
for extra SQL joins / queries.
"""
from . import licenses
return licenses.LICENSES_BY_ID.get(self.license_id)
class UploadQuerySet(models.QuerySet):
def playable_by(self, actor, include=True):
......
......@@ -14,6 +14,21 @@ from . import filters, models, tasks
cover_field = VersatileImageFieldSerializer(allow_null=True, sizes="square")
class LicenseSerializer(serializers.Serializer):
id = serializers.SerializerMethodField()
url = serializers.URLField()
code = serializers.CharField()
name = serializers.CharField()
redistribute = serializers.BooleanField()
derivative = serializers.BooleanField()
commercial = serializers.BooleanField()
attribution = serializers.BooleanField()
copyleft = serializers.BooleanField()
def get_id(self, obj):
return obj["identifiers"][0]
class ArtistAlbumSerializer(serializers.ModelSerializer):
tracks_count = serializers.SerializerMethodField()
cover = cover_field
......@@ -76,6 +91,8 @@ class AlbumTrackSerializer(serializers.ModelSerializer):
"uploads",
"listen_url",
"duration",
"copyright",
"license",
)
def get_uploads(self, obj):
......@@ -179,6 +196,8 @@ class TrackSerializer(serializers.ModelSerializer):
"lyrics",
"uploads",
"listen_url",
"copyright",
"license",
)
def get_lyrics(self, obj):
......
......@@ -16,6 +16,7 @@ from funkwhale_api.federation import routes
from funkwhale_api.federation import library as lb
from funkwhale_api.taskapp import celery
from . import licenses
from . import lyrics as lyrics_utils
from . import models
from . import metadata
......@@ -276,6 +277,8 @@ def federation_audio_track_to_metadata(payload):
"artist": payload["artists"][0]["name"],
"album_artist": payload["album"]["artists"][0]["name"],
"date": payload["album"].get("released"),
"license": payload.get("license"),
"copyright": payload.get("copyright"),
# musicbrainz
"musicbrainz_recordingid": str(musicbrainz_recordingid)
if musicbrainz_recordingid
......@@ -496,6 +499,8 @@ def get_track_from_import_metadata(data):
"position": track_number,
"fid": track_fid,
"from_activity_id": from_activity_id,
"license": licenses.match(data.get("license"), data.get("copyright")),
"copyright": data.get("copyright"),
}
if data.get("fdate"):
defaults["creation_date"] = data.get("fdate")
......
......@@ -22,7 +22,7 @@ from funkwhale_api.federation.authentication import SignatureAuthentication
from funkwhale_api.federation import api_serializers as federation_api_serializers
from funkwhale_api.federation import routes
from . import filters, models, serializers, tasks, utils
from . import filters, licenses, models, serializers, tasks, utils
logger = logging.getLogger(__name__)
......@@ -481,3 +481,28 @@ class Search(views.APIView):
)
return qs.filter(query_obj)[: self.max_results]
class LicenseViewSet(viewsets.ReadOnlyModelViewSet):
permission_classes = [common_permissions.ConditionalAuthentication]
serializer_class = serializers.LicenseSerializer
queryset = models.License.objects.all().order_by("code")
lookup_value_regex = ".*"
def get_queryset(self):
# ensure our licenses are up to date in DB
licenses.load(licenses.LICENSES)
return super().get_queryset()
def get_serializer(self, *args, **kwargs):
if len(args) == 0:
return super().get_serializer(*args, **kwargs)
# our serializer works with license dict, not License instances
# so we pass those instead
instance_or_qs = args[0]
try:
first_arg = instance_or_qs.conf
except AttributeError:
first_arg = [i.conf for i in instance_or_qs if i.conf]
return super().get_serializer(*((first_arg,) + args[1:]), **kwargs)
......@@ -12,3 +12,4 @@ pytest-xdist
pytest-cov
pytest-env
requests-mock
pytest-profiling
......@@ -632,7 +632,7 @@ def test_activity_pub_album_serializer_to_ap(factories):
def test_activity_pub_track_serializer_to_ap(factories):
track = factories["music.Track"]()
track = factories["music.Track"](license="cc-by-4.0", copyright="test")
expected = {
"@context": serializers.AP_CONTEXT,
"published": track.creation_date.isoformat(),
......@@ -641,6 +641,8 @@ def test_activity_pub_track_serializer_to_ap(factories):
"id": track.fid,
"name": track.title,
"position": track.position,
"license": track.license.conf["identifiers"][0],
"copyright": "test",
"artists": [
serializers.ArtistSerializer(
track.artist, context={"include_ap_context": False}
......
This diff is collapsed.
No preview for this file type
No preview for this file type
import json
import os
import pytest
from funkwhale_api.music import models
from funkwhale_api.music import licenses
@pytest.fixture
def purge_license_cache():
licenses._cache = None
yield
licenses._cache = None
def test_licenses_do_not_change():
"""
We have 100s of licenses static data, and we want to ensure
that this data do not change without notice.
So we generate a json file based on this data,
and ensure our python data match our JSON file.
"""
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "licenses.json")
from_python = licenses.LICENSES
if os.path.exists(path):
with open(path) as f:
from_file = json.loads(f.read())
assert from_file == from_python
else:
# we write the file
with open(path, "w") as f:
f.write(json.dumps(from_python, indent=4))
def test_load_licenses_create(db):
license_data = {
"code": "dummy",
"url": "http://dummy",
"redistribute": True,
"derivative": True,
"commercial": True,
"attribution": True,
"copyleft": True,
}
licenses.load([license_data])
license = models.License.objects.get(pk=license_data["code"])
assert license.url == license_data["url"]
assert license.redistribute == license_data["redistribute"]
assert license.derivative == license_data["derivative"]
assert license.copyleft == license_data["copyleft"]
assert license.commercial == license_data["commercial"]
assert license.attribution == license_data["attribution"]
def test_load_hardcoded_licenses_works(db):
licenses.load(licenses.LICENSES)
def test_license_data():
for data in licenses.LICENSES:
assert data["identifiers"][0].startswith("http") is True
required_fields = [
"code",
"name",
"url",
"derivative",
"commercial",
"redistribute",
"attribution",
]
for field in required_fields:
assert field in required_fields
def test_load_licenses_update(factories):
license = models.License.objects.create(
code="dummy",
url="http://oldurl",
redistribute=True,
derivative=True,
commercial=True,
attribution=True,
copyleft=True,
)
license_data = {
"code": "dummy",
"url": "http://newurl",
"redistribute": False,
"derivative": False,
"commercial": True,
"attribution": True,
"copyleft": True,
}
licenses.load([license_data])
license.refresh_from_db()
assert license.url == license_data["url"]
assert license.derivative == license_data["derivative"]
assert license.copyleft == license_data["copyleft"]
assert license.commercial == license_data["commercial"]
assert license.attribution == license_data["attribution"]
def test_load_skip_update_if_no_change(factories, mocker):
license = models.License.objects.create(
code="dummy",
url="http://oldurl",
redistribute=True,
derivative=True,
commercial=True,
attribution=True,
copyleft=True,
)
update_or_create = mocker.patch.object(models.License.objects, "update_or_create")
save = mocker.patch.object(models.License, "save")
# we load licenses but with same data
licenses.load(
[
{
"code": "dummy",
"url": license.url,
"derivative": license.derivative,
"redistribute": license.redistribute,
"commercial": license.commercial,
"attribution": license.attribution,
"copyleft": license.copyleft,
}
]
)
save.assert_not_called()
update_or_create.assert_not_called()
@pytest.mark.parametrize(
"value, expected",
[
(["http://creativecommons.org/licenses/by-sa/4.0/"], "cc-by-sa-4.0"),
(["https://creativecommons.org/licenses/by-sa/4.0/"], "cc-by-sa-4.0"),
(["https://creativecommons.org/licenses/by-sa/4.0"], "cc-by-sa-4.0"),
(
[
"License for this work is: http://creativecommons.org/licenses/by-sa/4.0/"
],
"cc-by-sa-4.0",
),
(
[
"License: http://creativecommons.org/licenses/by-sa/4.0/ not http://creativecommons.org/publicdomain/zero/1.0/" # noqa
],
"cc-by-sa-4.0",
),
(
[None, "Copyright 2018 http://creativecommons.org/licenses/by-sa/4.0/"],
"cc-by-sa-4.0",
),
(
[
"Unknown",
"Copyright 2018 http://creativecommons.org/licenses/by-sa/4.0/",
],
"cc-by-sa-4.0",
),
(["Unknown"], None),
([""], None),
],
)
def test_match(value, expected, db, mocker, purge_license_cache):
load = mocker.spy(licenses, "load")
result = licenses.match(*value)
if expected:
assert result == models.License.objects.get(code=expected)
load.assert_called_once_with(licenses.LICENSES)
else:
assert result is None
def test_match_cache(mocker, db, purge_license_cache):
assert licenses._cache is None
licenses.match("http://test.com")
assert licenses._cache == sorted(models.License.objects.all(), key=lambda o: o.code)
load = mocker.patch.object(licenses, "load")
assert licenses.match(
"http://creativecommons.org/licenses/by-sa/4.0/"
) == models.License.objects.get(code="cc-by-sa-4.0")
load.assert_not_called()
......@@ -24,6 +24,8 @@ def test_get_all_metadata_at_once():
"musicbrainz_recordingid": uuid.UUID("bd21ac48-46d8-4e78-925f-d9cc2a294656"),
"musicbrainz_artistid": uuid.UUID("013c8e5b-d72a-4cd3-8dee-6c64d6125823"),
"musicbrainz_albumartistid": uuid.UUID("013c8e5b-d72a-4cd3-8dee-6c64d6125823"),
"license": "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/",
"copyright": "Someone",
}
assert data.all() == expected
......@@ -45,6 +47,8 @@ def test_get_all_metadata_at_once():
"musicbrainz_albumartistid",
uuid.UUID("013c8e5b-d72a-4cd3-8dee-6c64d6125823"),
),
("license", "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/"),
("copyright", "Someone"),
],
)
def test_can_get_metadata_from_ogg_file(field, value):
......@@ -70,6 +74,8 @@ def test_can_get_metadata_from_ogg_file(field, value):
"musicbrainz_albumartistid",
uuid.UUID("013c8e5b-d72a-4cd3-8dee-6c64d6125823"),
),
("license", "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/"),
("copyright", "Someone"),
],
)
def test_can_get_metadata_from_opus_file(field, value):
......@@ -95,6 +101,9 @@ def test_can_get_metadata_from_opus_file(field, value):
"musicbrainz_albumartistid",
uuid.UUID("c3bc80a6-1f4a-4e17-8cf0-6b1efe8302f1"),
),
# somehow, I cannot successfully create an ogg theora file
# with the proper license field
# ("license", "Dummy license: http://creativecommons.org/licenses/by-sa/4.0/"),
],
)
def test_can_get_metadata_from_ogg_theora_file(field, value):
......@@ -120,6 +129,8 @@ def test_can_get_metadata_from_ogg_theora_file(field, value):
"musicbrainz_albumartistid",
uuid.UUID("9c6bddde-6228-4d9f-ad0d-03f6fcb19e13"),
),
("license", "https://creativecommons.org/licenses/by-nc-nd/2.5/"),
("copyright", "Someone"),
],
)
def test_can_get_metadata_from_id3_mp3_file(field, value):
......@@ -159,6 +170,8 @@ def test_can_get_pictures(name):
"musicbrainz_albumartistid",