[bandcamp:weekly] Improve and extract more metadata (closes #12758)
This commit is contained in:
parent
62bafabc09
commit
6d923aab35
1 changed files with 54 additions and 24 deletions
|
@ -14,6 +14,7 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
KNOWN_EXTENSIONS,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
@ -22,7 +23,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
|
@ -156,7 +157,7 @@ class BandcampIE(InfoExtractor):
|
||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
|
@ -225,7 +226,9 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if BandcampWeeklyIE.suitable(url) else super(BandcampAlbumIE, cls).suitable(url)
|
return (False
|
||||||
|
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||||
|
else super(BandcampAlbumIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -258,16 +261,22 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class BandcampWeeklyIE(InfoExtractor):
|
class BandcampWeeklyIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:bandcamp_weekly'
|
IE_NAME = 'Bandcamp:weekly'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*&)?show=(?P<id>\d+)(?:$|[&#])'
|
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://bandcamp.com/?show=224',
|
'url': 'https://bandcamp.com/?show=224',
|
||||||
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '224',
|
'id': '224',
|
||||||
'ext': 'opus',
|
'ext': 'opus',
|
||||||
'title': 'BC Weekly April 4th 2017: Magic Moments',
|
'title': 'BC Weekly April 4th 2017 - Magic Moments',
|
||||||
'description': 'Stones Throw\'s Vex Ruffin, plus up and coming singer Salami Rose Joe Louis, in conversation about their fantastic DIY albums.',
|
'description': 'md5:5d48150916e8e02d030623a48512c874',
|
||||||
|
'duration': 5829.77,
|
||||||
|
'release_date': '20170404',
|
||||||
|
'series': 'Bandcamp Weekly',
|
||||||
|
'episode': 'Magic Moments',
|
||||||
|
'episode_number': 208,
|
||||||
|
'episode_id': '224',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
|
@ -288,32 +297,53 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
|
|
||||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||||
# which happens to expose the latest Bandcamp Weekly episode.
|
# which happens to expose the latest Bandcamp Weekly episode.
|
||||||
video_id = compat_str(show['show_id'])
|
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||||
|
|
||||||
def to_format_dictionaries(audio_stream):
|
formats = []
|
||||||
dictionaries = [{'format_id': kvp[0], 'url': kvp[1]} for kvp in audio_stream.items()]
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
known_extensions = ['mp3', 'opus']
|
if not isinstance(format_url, compat_str):
|
||||||
|
continue
|
||||||
for dictionary in dictionaries:
|
for known_ext in KNOWN_EXTENSIONS:
|
||||||
for ext in known_extensions:
|
if known_ext in format_id:
|
||||||
if ext in dictionary['format_id']:
|
ext = known_ext
|
||||||
dictionary['ext'] = ext
|
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
return dictionaries
|
ext = None
|
||||||
|
formats.append({
|
||||||
formats = to_format_dictionaries(show['audio_stream'])
|
'format_id': format_id,
|
||||||
|
'url': format_url,
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||||
|
subtitle = show.get('subtitle')
|
||||||
|
if subtitle:
|
||||||
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
|
episode_number = None
|
||||||
|
seq = blob.get('bcw_seq')
|
||||||
|
|
||||||
|
if seq and isinstance(seq, list):
|
||||||
|
try:
|
||||||
|
episode_number = next(
|
||||||
|
int_or_none(e.get('episode_number'))
|
||||||
|
for e in seq
|
||||||
|
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': show['audio_title'] + ': ' + show['subtitle'],
|
'title': title,
|
||||||
'description': show.get('desc'),
|
'description': show.get('desc') or show.get('short_desc'),
|
||||||
'duration': float_or_none(show.get('audio_duration')),
|
'duration': float_or_none(show.get('audio_duration')),
|
||||||
'webpage_url': 'https://bandcamp.com/?show=' + video_id,
|
|
||||||
'is_live': False,
|
'is_live': False,
|
||||||
'release_date': unified_strdate(show.get('published_date')),
|
'release_date': unified_strdate(show.get('published_date')),
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
|
'episode': show.get('subtitle'),
|
||||||
|
'episode_number': episode_number,
|
||||||
'episode_id': compat_str(video_id),
|
'episode_id': compat_str(video_id),
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue