Update to ytdl-commit-6508688
Make default upload_/release_date a compat_str6508688e88
Except: * "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531 -01824d275b
-39a98b09a2
-f0a05a55c2
-4186e81777
This commit is contained in:
parent
72e995f122
commit
50e93e03a7
21 changed files with 621 additions and 140 deletions
|
@ -2397,7 +2397,7 @@ class YoutubeDL(object):
|
||||||
sanitize_string_field(info_dict, 'id')
|
sanitize_string_field(info_dict, 'id')
|
||||||
sanitize_numeric_fields(info_dict)
|
sanitize_numeric_fields(info_dict)
|
||||||
if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
|
if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
|
||||||
self.report_warning('"duration" field is negative, there is an error in extractor')
|
self.report_warning('"duration" field is negative, there is an error in extractor')
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
|
|
|
@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||||
'id': '2800002704436634',
|
'id': '2800002704436634',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'CASIMA7.22',
|
'title': 'CASIMA7.22',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'uploader': 'CASIMA Official Store',
|
'uploader': 'CASIMA Official Store',
|
||||||
'timestamp': 1500717600,
|
'timestamp': 1500717600,
|
||||||
'upload_date': '20170722',
|
'upload_date': '20170722',
|
||||||
|
|
87
yt_dlp/extractor/alsace20tv.py
Normal file
87
yt_dlp/extractor/alsace20tv.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
dict_get,
|
||||||
|
get_element_by_class,
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Alsace20TVBaseIE(InfoExtractor):
|
||||||
|
def _extract_video(self, video_id, url=None):
|
||||||
|
info = self._download_json(
|
||||||
|
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||||
|
video_id) or {}
|
||||||
|
title = info.get('titre')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for res, fmt_url in (info.get('files') or {}).items():
|
||||||
|
formats.extend(
|
||||||
|
self._extract_smil_formats(fmt_url, video_id, fatal=False)
|
||||||
|
if '/smil:_' in fmt_url
|
||||||
|
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||||
|
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||||
|
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||||
|
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
|
||||||
|
'view_count': int_or_none(info.get('nb_vues')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Alsace20TVIE(Alsace20TVBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lyNHCXpYJh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
|
||||||
|
'title': 'Votre JT du jeudi 3 février',
|
||||||
|
'upload_date': '20220203',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'duration': 1073,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_video(video_id, url)
|
||||||
|
|
||||||
|
|
||||||
|
class Alsace20TVEmbedIE(Alsace20TVBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
|
||||||
|
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lyNHCXpYJh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Votre JT du jeudi 3 février',
|
||||||
|
'upload_date': '20220203',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_video(video_id)
|
|
@ -3,7 +3,9 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
clean_podcast_url,
|
clean_podcast_url,
|
||||||
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -14,16 +16,17 @@ class ApplePodcastsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||||
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1000482637777',
|
'id': '1000482637777',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '207 - Whitney Webb Returns',
|
'title': '207 - Whitney Webb Returns',
|
||||||
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||||
'upload_date': '20200705',
|
'upload_date': '20200705',
|
||||||
'timestamp': 1593921600,
|
'timestamp': 1593932400,
|
||||||
'duration': 6425,
|
'duration': 6454,
|
||||||
'series': 'The Tim Dillon Show',
|
'series': 'The Tim Dillon Show',
|
||||||
|
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||||
|
@ -39,24 +42,47 @@ class ApplePodcastsIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
episode_id = self._match_id(url)
|
episode_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, episode_id)
|
webpage = self._download_webpage(url, episode_id)
|
||||||
ember_data = self._parse_json(self._search_regex(
|
episode_data = {}
|
||||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
ember_data = {}
|
||||||
webpage, 'ember data'), episode_id)
|
# new page type 2021-11
|
||||||
ember_data = ember_data.get(episode_id) or ember_data
|
amp_data = self._parse_json(self._search_regex(
|
||||||
episode = ember_data['data']['attributes']
|
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||||
|
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||||
|
amp_data = try_get(amp_data,
|
||||||
|
lambda a: self._parse_json(
|
||||||
|
next(a[x] for x in iter(a) if episode_id in x),
|
||||||
|
episode_id),
|
||||||
|
dict) or {}
|
||||||
|
amp_data = amp_data.get('d') or []
|
||||||
|
episode_data = try_get(
|
||||||
|
amp_data,
|
||||||
|
lambda a: next(x for x in a
|
||||||
|
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||||
|
dict)
|
||||||
|
if not episode_data:
|
||||||
|
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||||
|
ember_data = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||||
|
webpage, 'ember data'), episode_id) or {}
|
||||||
|
ember_data = ember_data.get(episode_id) or ember_data
|
||||||
|
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||||
|
episode = episode_data['attributes']
|
||||||
description = episode.get('description') or {}
|
description = episode.get('description') or {}
|
||||||
|
|
||||||
series = None
|
series = None
|
||||||
for inc in (ember_data.get('included') or []):
|
for inc in (amp_data or ember_data.get('included') or []):
|
||||||
if inc.get('type') == 'media/podcast':
|
if inc.get('type') == 'media/podcast':
|
||||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||||
|
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': episode_id,
|
'id': episode_id,
|
||||||
'title': episode['name'],
|
'title': episode.get('name'),
|
||||||
'url': clean_podcast_url(episode['assetUrl']),
|
'url': clean_podcast_url(episode['assetUrl']),
|
||||||
'description': description.get('standard') or description.get('short'),
|
'description': description.get('standard') or description.get('short'),
|
||||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||||
'series': series,
|
'series': series,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'vcodec': 'none',
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -253,3 +254,44 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
title = collection.get('title')
|
title = collection.get('title')
|
||||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVCategoryIE(ArteTVBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'politics-and-society',
|
||||||
|
'title': 'Politics and society',
|
||||||
|
'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 13,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return (
|
||||||
|
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
||||||
|
and super(ArteTVCategoryIE, cls).suitable(url))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lang, playlist_id = self._match_valid_url(url).groups()
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for video in re.finditer(
|
||||||
|
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
||||||
|
webpage):
|
||||||
|
video = video.group('url')
|
||||||
|
if video == url:
|
||||||
|
continue
|
||||||
|
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||||
|
items.append(video)
|
||||||
|
|
||||||
|
title = (self._og_search_title(webpage, default=None)
|
||||||
|
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
|
||||||
|
title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
|
||||||
|
|
||||||
|
return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
|
||||||
|
description=self._og_search_description(webpage, default=None))
|
||||||
|
|
|
@ -29,6 +29,7 @@ class AudiomackIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# audiomack wrapper around soundcloud song
|
# audiomack wrapper around soundcloud song
|
||||||
|
# Needs new test URL.
|
||||||
{
|
{
|
||||||
'add_ie': ['Soundcloud'],
|
'add_ie': ['Soundcloud'],
|
||||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||||
|
|
|
@ -11,6 +11,7 @@ from ..compat import (
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_error,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -38,7 +39,7 @@ from ..utils import (
|
||||||
class BBCCoUkIE(InfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?bbc\.co\.uk/
|
(?:www\.)?bbc\.co\.uk/
|
||||||
|
@ -394,9 +395,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
href, programme_id, mpd_id=format_id, fatal=False))
|
href, programme_id, mpd_id=format_id, fatal=False))
|
||||||
elif transfer_format == 'hls':
|
elif transfer_format == 'hls':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
# TODO: let expected_status be passed into _extract_xxx_formats() instead
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
try:
|
||||||
m3u8_id=format_id, fatal=False))
|
fmts = self._extract_m3u8_formats(
|
||||||
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
|
||||||
|
and e.exc_info[1].code in (403, 404)):
|
||||||
|
raise
|
||||||
|
fmts = []
|
||||||
|
formats.extend(fmts)
|
||||||
elif transfer_format == 'hds':
|
elif transfer_format == 'hds':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
href, programme_id, f4m_id=format_id, fatal=False))
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
|
@ -784,21 +793,33 @@ class BBCIE(BBCCoUkIE):
|
||||||
'timestamp': 1437785037,
|
'timestamp': 1437785037,
|
||||||
'upload_date': '20150725',
|
'upload_date': '20150725',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||||
|
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p0b71qth',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Why France is making this woman a national hero',
|
||||||
|
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
|
'timestamp': 1638230731,
|
||||||
|
'upload_date': '20211130',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# bbcthreeConfig
|
||||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p06556y7',
|
'id': 'p06556y7',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
'title': 'Things Not To Say to people that live on council estates',
|
||||||
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
|
||||||
|
'duration': 360,
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# window.__PRELOADED_STATE__
|
# window.__PRELOADED_STATE__
|
||||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||||
|
@ -1171,9 +1192,16 @@ class BBCIE(BBCCoUkIE):
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
initial_data = self._parse_json(self._parse_json(self._search_regex(
|
initial_data = self._search_regex(
|
||||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage,
|
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||||
'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False)
|
'quoted preload state', default=None)
|
||||||
|
if initial_data is None:
|
||||||
|
initial_data = self._search_regex(
|
||||||
|
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
||||||
|
'preload state', default={})
|
||||||
|
else:
|
||||||
|
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||||
|
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||||
if initial_data:
|
if initial_data:
|
||||||
def parse_media(media):
|
def parse_media(media):
|
||||||
if not media:
|
if not media:
|
||||||
|
@ -1214,7 +1242,10 @@ class BBCIE(BBCCoUkIE):
|
||||||
if name == 'media-experience':
|
if name == 'media-experience':
|
||||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||||
elif name == 'article':
|
elif name == 'article':
|
||||||
for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []):
|
for block in (try_get(resp,
|
||||||
|
(lambda x: x['data']['blocks'],
|
||||||
|
lambda x: x['data']['content']['model']['blocks'],),
|
||||||
|
list) or []):
|
||||||
if block.get('type') != 'media':
|
if block.get('type') != 'media':
|
||||||
continue
|
continue
|
||||||
parse_media(block.get('model'))
|
parse_media(block.get('model'))
|
||||||
|
|
|
@ -34,9 +34,11 @@ class BigoIE(InfoExtractor):
|
||||||
'https://bigo.tv/studio/getInternalStudioInfo',
|
'https://bigo.tv/studio/getInternalStudioInfo',
|
||||||
user_id, data=urlencode_postdata({'siteId': user_id}))
|
user_id, data=urlencode_postdata({'siteId': user_id}))
|
||||||
|
|
||||||
|
if not isinstance(info_raw, dict):
|
||||||
|
raise ExtractorError('Received invalid JSON data')
|
||||||
if info_raw.get('code'):
|
if info_raw.get('code'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
f'{info_raw["msg"]} (code {info_raw["code"]})', expected=True)
|
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||||
info = info_raw.get('data') or {}
|
info = info_raw.get('data') or {}
|
||||||
|
|
||||||
if not info.get('alive'):
|
if not info.get('alive'):
|
||||||
|
@ -44,7 +46,7 @@ class BigoIE(InfoExtractor):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': info.get('roomId') or user_id,
|
'id': info.get('roomId') or user_id,
|
||||||
'title': info.get('roomTopic'),
|
'title': info.get('roomTopic') or info.get('nick_name') or user_id,
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'url': info.get('hls_src'),
|
'url': info.get('hls_src'),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
|
148
yt_dlp/extractor/cpac.py
Normal file
148
yt_dlp/extractor/cpac.py
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
# compat_range
|
||||||
|
try:
|
||||||
|
if callable(xrange):
|
||||||
|
range = xrange
|
||||||
|
except (NameError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CPACIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cpac'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
|
||||||
|
_TEST = {
|
||||||
|
# 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
|
||||||
|
'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||||
|
'md5': 'e46ad699caafd7aa6024279f2614e8fa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20220215',
|
||||||
|
'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
|
||||||
|
'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
|
||||||
|
'timestamp': 1644901200,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'hls_prefer_native': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
url_lang = 'fr' if '/l-episode?' in url else 'en'
|
||||||
|
|
||||||
|
content = self._download_json(
|
||||||
|
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
|
||||||
|
video_id)
|
||||||
|
video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str)
|
||||||
|
formats = []
|
||||||
|
if video_url:
|
||||||
|
content = content['page']
|
||||||
|
title = str_or_none(content['details']['title_%s_t' % (url_lang, )])
|
||||||
|
formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
|
||||||
|
for fmt in formats:
|
||||||
|
# prefer language to match URL
|
||||||
|
fmt_lang = fmt.get('language')
|
||||||
|
if fmt_lang == url_lang:
|
||||||
|
fmt['language_preference'] = 10
|
||||||
|
elif not fmt_lang:
|
||||||
|
fmt['language_preference'] = -1
|
||||||
|
else:
|
||||||
|
fmt['language_preference'] = -10
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
|
||||||
|
|
||||||
|
def is_live(v_type):
|
||||||
|
return (v_type == 'live') if v_type is not None else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
|
||||||
|
'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
|
||||||
|
'category': [category] if category else None,
|
||||||
|
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
|
||||||
|
'is_live': is_live(content['details'].get('type')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CPACPlaylistIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cpac:playlist'
|
||||||
|
_VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cpac.ca/program?id=6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'id=6',
|
||||||
|
'title': 'Headline Politics',
|
||||||
|
'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
|
||||||
|
},
|
||||||
|
'playlist_count': 10,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'key=hudson',
|
||||||
|
'title': 'hudson',
|
||||||
|
},
|
||||||
|
'playlist_count': 22,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/search?programId=50',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'programId=50',
|
||||||
|
'title': '50',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/emission?id=6',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
|
||||||
|
pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
|
||||||
|
api_url = (
|
||||||
|
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s'
|
||||||
|
% (pl_type, video_id, ))
|
||||||
|
content = self._download_json(api_url, video_id)
|
||||||
|
entries = []
|
||||||
|
total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
|
||||||
|
for page in range(1, total_pages + 1):
|
||||||
|
if page > 1:
|
||||||
|
api_url = update_url_query(api_url, {'page': '%d' % (page, ), })
|
||||||
|
content = self._download_json(
|
||||||
|
api_url, video_id,
|
||||||
|
note='Downloading continuation - %d' % (page, ),
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
|
||||||
|
episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )]))
|
||||||
|
if episode_url:
|
||||||
|
entries.append(episode_url)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
(self.url_result(entry) for entry in entries),
|
||||||
|
playlist_id=video_id,
|
||||||
|
playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1],
|
||||||
|
playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]),
|
||||||
|
)
|
|
@ -68,6 +68,10 @@ from .anvato import AnvatoIE
|
||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
from .allocine import AllocineIE
|
from .allocine import AllocineIE
|
||||||
from .aliexpress import AliExpressLiveIE
|
from .aliexpress import AliExpressLiveIE
|
||||||
|
from .alsace20tv import (
|
||||||
|
Alsace20TVIE,
|
||||||
|
Alsace20TVEmbedIE,
|
||||||
|
)
|
||||||
from .apa import APAIE
|
from .apa import APAIE
|
||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
from .appleconnect import AppleConnectIE
|
from .appleconnect import AppleConnectIE
|
||||||
|
@ -91,6 +95,7 @@ from .arte import (
|
||||||
ArteTVIE,
|
ArteTVIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
|
ArteTVCategoryIE,
|
||||||
)
|
)
|
||||||
from .arnes import ArnesIE
|
from .arnes import ArnesIE
|
||||||
from .asiancrush import (
|
from .asiancrush import (
|
||||||
|
@ -306,6 +311,10 @@ from .commonprotocols import (
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .contv import CONtvIE
|
from .contv import CONtvIE
|
||||||
from .corus import CorusIE
|
from .corus import CorusIE
|
||||||
|
from .cpac import (
|
||||||
|
CPACIE,
|
||||||
|
CPACPlaylistIE,
|
||||||
|
)
|
||||||
from .cozytv import CozyTVIE
|
from .cozytv import CozyTVIE
|
||||||
from .cracked import CrackedIE
|
from .cracked import CrackedIE
|
||||||
from .crackle import CrackleIE
|
from .crackle import CrackleIE
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
strip_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,14 +23,30 @@ class NuvidIE(InfoExtractor):
|
||||||
'title': 'italian babe',
|
'title': 'italian babe',
|
||||||
'duration': 321.0,
|
'duration': 321.0,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://m.nuvid.com/video/6523263',
|
'url': 'https://m.nuvid.com/video/6523263',
|
||||||
|
'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6523263',
|
'id': '6523263',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': 18,
|
|
||||||
'title': 'Slut brunette college student anal dorm',
|
'title': 'Slut brunette college student anal dorm',
|
||||||
|
'duration': 421.0,
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
|
'thumbnails': list,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.nuvid.com/video/6415801/',
|
||||||
|
'md5': '638d5ececb138d5753593f751ae3f697',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6415801',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'My best friend wanted to fuck my wife for a long time',
|
||||||
|
'duration': 1882,
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -46,6 +65,16 @@ class NuvidIE(InfoExtractor):
|
||||||
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
|
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
|
||||||
})
|
})
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://m.nuvid.com/video/%s' % (video_id, ),
|
||||||
|
video_id, 'Downloading video page', fatal=False) or ''
|
||||||
|
|
||||||
|
title = strip_or_none(video_data.get('title') or self._html_search_regex(
|
||||||
|
(r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''',
|
||||||
|
r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''',
|
||||||
|
r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''),
|
||||||
|
webpage, 'title', group='title'))
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': source,
|
'url': source,
|
||||||
'format_id': qualities.get(quality),
|
'format_id': qualities.get(quality),
|
||||||
|
@ -55,19 +84,19 @@ class NuvidIE(InfoExtractor):
|
||||||
self._check_formats(formats, video_id)
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = video_data.get('title')
|
duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format'))
|
||||||
thumbnail_base_url = try_get(video_data, lambda x: x['thumbs']['url'])
|
thumbnails = [
|
||||||
thumbnail_extension = try_get(video_data, lambda x: x['thumbs']['extension'])
|
{'url': thumb_url} for thumb_url in re.findall(
|
||||||
thumbnail_id = self._search_regex(
|
r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>', webpage)
|
||||||
r'/media/videos/tmb/6523263/preview/(/d+)' + thumbnail_extension, video_data.get('poster', ''), 'thumbnail id', default=19)
|
if url_or_none(thumb_url)]
|
||||||
thumbnail = f'{thumbnail_base_url}player/{thumbnail_id}{thumbnail_extension}'
|
if url_or_none(video_data.get('poster')):
|
||||||
duration = parse_duration(video_data.get('duration') or video_data.get('duration_format'))
|
thumbnails.append({'url': video_data['poster'], 'preference': 1})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnails': thumbnails,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,8 @@ import re
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none
|
int_or_none,
|
||||||
|
str_to_int
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -179,7 +180,7 @@ class RUTVIE(InfoExtractor):
|
||||||
'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
|
'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
|
||||||
'rtmp_live': True,
|
'rtmp_live': True,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'vbr': int(quality),
|
'vbr': str_to_int(quality),
|
||||||
'quality': preference,
|
'quality': preference,
|
||||||
}
|
}
|
||||||
elif transport == 'm3u8':
|
elif transport == 'm3u8':
|
||||||
|
|
|
@ -22,6 +22,20 @@ class StreamCZIE(InfoExtractor):
|
||||||
'title': 'Bůh',
|
'title': 'Bůh',
|
||||||
'display_id': 'buh',
|
'display_id': 'buh',
|
||||||
'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
|
'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
|
||||||
|
'duration': 1369.6,
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937',
|
||||||
|
'md5': '41fd358000086a1ccdb068c77809b158',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '64087937',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna',
|
||||||
|
'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna',
|
||||||
|
'description': 'md5:97a811000a6460266029d6c1c2ebcd59',
|
||||||
|
'duration': 50.2,
|
||||||
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
|
'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
|
||||||
|
@ -31,7 +45,9 @@ class StreamCZIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
|
'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
|
||||||
'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
|
'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
|
||||||
'description': 'md5:1dcb5e010eb697dedc5942f76c5b3744',
|
'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf',
|
||||||
|
'duration': 442.84,
|
||||||
|
'view_count': int,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
|
@ -1,19 +1,15 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
from .dplay import DPlayIE
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from .common import InfoExtractor
|
|
||||||
from .jwplatform import JWPlatformIE
|
|
||||||
from .nexx import NexxIE
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
NO_DEFAULT,
|
ExtractorError,
|
||||||
parse_qs,
|
extract_attributes,
|
||||||
smuggle_url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class Tele5IE(InfoExtractor):
|
class Tele5IE(DPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_GEO_COUNTRIES = ['DE']
|
_GEO_COUNTRIES = ['DE']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -28,6 +24,7 @@ class Tele5IE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'No longer available: "404 Seite nicht gefunden"',
|
||||||
}, {
|
}, {
|
||||||
# jwplatform, nexx unavailable
|
# jwplatform, nexx unavailable
|
||||||
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
|
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
|
||||||
|
@ -42,7 +39,20 @@ class Tele5IE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [JWPlatformIE.ie_key()],
|
'skip': 'No longer available, redirects to Filme page',
|
||||||
|
}, {
|
||||||
|
'url': 'https://tele5.de/mediathek/angel-of-mine/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1252360',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20220109',
|
||||||
|
'timestamp': 1641762000,
|
||||||
|
'title': 'Angel of Mine',
|
||||||
|
'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -64,45 +74,18 @@ class Tele5IE(InfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
qs = parse_qs(url)
|
video_id = self._match_id(url)
|
||||||
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
|
||||||
NEXX_ID_RE = r'\d{6,}'
|
player_info = extract_attributes(player_element)
|
||||||
JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
|
asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
|
||||||
|
endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
|
||||||
def nexx_result(nexx_id):
|
source_type = player_info.get('sourcetype')
|
||||||
return self.url_result(
|
if source_type:
|
||||||
'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
|
endpoint = '%s-%s' % (source_type, endpoint)
|
||||||
ie=NexxIE.ie_key(), video_id=nexx_id)
|
try:
|
||||||
|
return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
|
||||||
nexx_id = jwplatform_id = None
|
except ExtractorError as e:
|
||||||
|
if getattr(e, 'message', '') == 'Missing deviceId in context':
|
||||||
if video_id:
|
self.report_drm(video_id)
|
||||||
if re.match(NEXX_ID_RE, video_id):
|
raise
|
||||||
return nexx_result(video_id)
|
|
||||||
elif re.match(JWPLATFORM_ID_RE, video_id):
|
|
||||||
jwplatform_id = video_id
|
|
||||||
|
|
||||||
if not nexx_id:
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
def extract_id(pattern, name, default=NO_DEFAULT):
|
|
||||||
return self._html_search_regex(
|
|
||||||
(r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
|
|
||||||
r'\s+id\s*=\s*["\']player_(%s)' % pattern,
|
|
||||||
r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
|
|
||||||
default=default)
|
|
||||||
|
|
||||||
nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
|
|
||||||
if nexx_id:
|
|
||||||
return nexx_result(nexx_id)
|
|
||||||
|
|
||||||
if not jwplatform_id:
|
|
||||||
jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
|
|
||||||
|
|
||||||
return self.url_result(
|
|
||||||
smuggle_url(
|
|
||||||
'jwplatform:%s' % jwplatform_id,
|
|
||||||
{'geo_countries': self._GEO_COUNTRIES}),
|
|
||||||
ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
|
|
||||||
|
|
|
@ -41,8 +41,16 @@ class TV2DKIE(InfoExtractor):
|
||||||
'duration': 1347,
|
'duration': 1347,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'add_ie': ['Kaltura'],
|
||||||
'skip_download': True,
|
}, {
|
||||||
|
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1_7iwll9n0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20211027',
|
||||||
|
'title': 'Gadekamp #6 - Højhuse i København',
|
||||||
|
'uploader_id': 'tv2lorry',
|
||||||
|
'timestamp': 1635345229,
|
||||||
},
|
},
|
||||||
'add_ie': ['Kaltura'],
|
'add_ie': ['Kaltura'],
|
||||||
}, {
|
}, {
|
||||||
|
@ -91,11 +99,14 @@ class TV2DKIE(InfoExtractor):
|
||||||
add_entry(partner_id, kaltura_id)
|
add_entry(partner_id, kaltura_id)
|
||||||
if not entries:
|
if not entries:
|
||||||
kaltura_id = self._search_regex(
|
kaltura_id = self._search_regex(
|
||||||
r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
|
(r'entry_id\s*:\s*["\']([0-9a-z_]+)',
|
||||||
|
r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
|
||||||
partner_id = self._search_regex(
|
partner_id = self._search_regex(
|
||||||
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
||||||
'partner id')
|
'partner id')
|
||||||
add_entry(partner_id, kaltura_id)
|
add_entry(partner_id, kaltura_id)
|
||||||
|
if len(entries) == 1:
|
||||||
|
return entries[0]
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -95,7 +95,6 @@ class UOLIE(InfoExtractor):
|
||||||
if v:
|
if v:
|
||||||
query[k] = v
|
query[k] = v
|
||||||
f_url = update_url_query(f_url, query)
|
f_url = update_url_query(f_url, query)
|
||||||
format_id = format_id
|
|
||||||
if format_id == 'HLS':
|
if format_id == 'HLS':
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
f_url, media_id, 'mp4', 'm3u8_native',
|
f_url, media_id, 'mp4', 'm3u8_native',
|
||||||
|
|
|
@ -4,7 +4,11 @@ from __future__ import unicode_literals
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
dict_get,
|
dict_get,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
ISO639Utils,
|
||||||
|
parse_age_limit,
|
||||||
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -23,9 +27,10 @@ class URPlayIE(InfoExtractor):
|
||||||
'upload_date': '20171214',
|
'upload_date': '20171214',
|
||||||
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
|
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
|
||||||
'duration': 2269,
|
'duration': 2269,
|
||||||
'categories': ['Kultur & historia'],
|
'categories': ['Vetenskap & teknik'],
|
||||||
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
|
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
|
||||||
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
||||||
|
'age_limit': 15,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||||
|
@ -50,11 +55,16 @@ class URPlayIE(InfoExtractor):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
url = url.replace('skola.se/Produkter', 'play.se/program')
|
url = url.replace('skola.se/Produkter', 'play.se/program')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
vid = int(video_id)
|
urplayer_data = self._search_nextjs_data(webpage, video_id, fatal=False) or {}
|
||||||
accessible_episodes = self._parse_json(self._html_search_regex(
|
if urplayer_data:
|
||||||
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
|
||||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
if not urplayer_data:
|
||||||
urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
|
raise ExtractorError('Unable to parse __NEXT_DATA__')
|
||||||
|
else:
|
||||||
|
accessible_episodes = self._parse_json(self._html_search_regex(
|
||||||
|
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
||||||
|
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
||||||
|
urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
|
||||||
episode = urplayer_data['title']
|
episode = urplayer_data['title']
|
||||||
|
|
||||||
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
||||||
|
@ -72,11 +82,28 @@ class URPlayIE(InfoExtractor):
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location")
|
|
||||||
if subs:
|
def parse_lang_code(code):
|
||||||
subtitles.setdefault('Svenska', []).append({
|
"3-character language code or None (utils candidate)"
|
||||||
'url': subs,
|
if code is None:
|
||||||
})
|
return
|
||||||
|
lang = code.lower()
|
||||||
|
if not ISO639Utils.long2short(lang):
|
||||||
|
lang = ISO639Utils.short2long(lang)
|
||||||
|
return lang or None
|
||||||
|
|
||||||
|
for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
|
||||||
|
if (k in ('sd', 'hd') or not isinstance(v, dict)):
|
||||||
|
continue
|
||||||
|
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
|
||||||
|
if not sttl_url:
|
||||||
|
continue
|
||||||
|
lang = parse_lang_code(lang)
|
||||||
|
if not lang:
|
||||||
|
continue
|
||||||
|
sttl = subtitles.get(lang) or []
|
||||||
|
sttl.append({'ext': k, 'url': sttl_url, })
|
||||||
|
subtitles[lang] = sttl
|
||||||
|
|
||||||
image = urplayer_data.get('image') or {}
|
image = urplayer_data.get('image') or {}
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
|
@ -98,7 +125,6 @@ class URPlayIE(InfoExtractor):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'subtitles': subtitles,
|
|
||||||
'title': '%s : %s' % (series_title, episode) if series_title else episode,
|
'title': '%s : %s' % (series_title, episode) if series_title else episode,
|
||||||
'description': urplayer_data.get('description'),
|
'description': urplayer_data.get('description'),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
|
@ -111,4 +137,7 @@ class URPlayIE(InfoExtractor):
|
||||||
'season': series.get('label'),
|
'season': series.get('label'),
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
||||||
|
'age_limit': parse_age_limit(min(try_get(a, lambda x: x['from'], int) or 0
|
||||||
|
for a in urplayer_data.get('ageRanges', []))),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
|
@ -111,7 +111,6 @@ class VideaIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_page = self._download_webpage(url, video_id)
|
video_page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
if 'videa.hu/player' in url:
|
if 'videa.hu/player' in url:
|
||||||
|
@ -146,7 +145,7 @@ class VideaIE(InfoExtractor):
|
||||||
compat_b64decode(b64_info), key), video_id)
|
compat_b64decode(b64_info), key), video_id)
|
||||||
|
|
||||||
video = xpath_element(info, './video', 'video')
|
video = xpath_element(info, './video', 'video')
|
||||||
if not video:
|
if video is None:
|
||||||
raise ExtractorError(xpath_element(
|
raise ExtractorError(xpath_element(
|
||||||
info, './error', fatal=True), expected=True)
|
info, './error', fatal=True), expected=True)
|
||||||
sources = xpath_element(
|
sources = xpath_element(
|
||||||
|
@ -163,9 +162,9 @@ class VideaIE(InfoExtractor):
|
||||||
source_exp = source.get('exp')
|
source_exp = source.get('exp')
|
||||||
if not (source_url and source_name):
|
if not (source_url and source_name):
|
||||||
continue
|
continue
|
||||||
hash_value = None
|
hash_value = (
|
||||||
if hash_values:
|
xpath_text(hash_values, 'hash_value_' + source_name)
|
||||||
hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
|
if hash_values is not None else None)
|
||||||
if hash_value and source_exp:
|
if hash_value and source_exp:
|
||||||
source_url = update_url_query(source_url, {
|
source_url = update_url_query(source_url, {
|
||||||
'md5': hash_value,
|
'md5': hash_value,
|
||||||
|
|
|
@ -636,6 +636,24 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
'url': 'https://vimeo.com/392479337/a52724358e',
|
'url': 'https://vimeo.com/392479337/a52724358e',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# similar, but all numeric: ID must be 581039021, not 9603038895
|
||||||
|
# issue #29690
|
||||||
|
'url': 'https://vimeo.com/581039021/9603038895',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '581039021',
|
||||||
|
# these have to be provided but we don't care
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1627621014,
|
||||||
|
'title': 're:.+',
|
||||||
|
'uploader_id': 're:.+',
|
||||||
|
'uploader': 're:.+',
|
||||||
|
'upload_date': r're:\d+',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
# https://gettingthingsdone.com/workflowmap/
|
# https://gettingthingsdone.com/workflowmap/
|
||||||
# vimeo embed with check-password page protected by Referer header
|
# vimeo embed with check-password page protected by Referer header
|
||||||
]
|
]
|
||||||
|
|
|
@ -10,6 +10,7 @@ from ..compat import (
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
|
@ -22,13 +23,14 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class WDRIE(InfoExtractor):
|
class WDRIE(InfoExtractor):
|
||||||
|
__API_URL_TPL = '//deviceids-medp.wdr.de/ondemand/%s/%s'
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?:deviceids-medp\.wdr\.de/ondemand/\d+/|
|
(?:deviceids-medp\.wdr\.de/ondemand/\d+/|
|
||||||
kinder\.wdr\.de/(?!mediathek/)[^#?]+-)
|
kinder\.wdr\.de/(?!mediathek/)[^#?]+-)
|
||||||
(?P<id>\d+)\.(?:js|assetjsonp)
|
(?P<id>\d+)\.(?:js|assetjsonp)
|
||||||
'''
|
'''
|
||||||
_GEO_COUNTRIES = ['DE']
|
_GEO_COUNTRIES = ['DE']
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
|
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-1557833',
|
'id': 'mdb-1557833',
|
||||||
|
@ -36,11 +38,19 @@ class WDRIE(InfoExtractor):
|
||||||
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
|
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
|
||||||
'upload_date': '20180112',
|
'upload_date': '20180112',
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
|
def _asset_url(self, wdr_id):
|
||||||
|
id_len = max(len(wdr_id), 5)
|
||||||
|
return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js'))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
if url.startswith('wdr:'):
|
||||||
|
video_id = url[4:]
|
||||||
|
url = self._asset_url(video_id)
|
||||||
|
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
url, video_id, transform_source=strip_jsonp)
|
url, video_id, transform_source=strip_jsonp)
|
||||||
|
|
||||||
|
@ -126,10 +136,10 @@ class WDRIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WDRPageIE(InfoExtractor):
|
class WDRPageIE(WDRIE):
|
||||||
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
|
_MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$'
|
||||||
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
||||||
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
|
@ -170,11 +180,11 @@ class WDRPageIE(InfoExtractor):
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-1406149',
|
'id': 'mdb-2296252',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': r're:^WDR Fernsehen im Livestream (?:\(nur in Deutschland erreichbar\) )?[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'alt_title': 'WDR Fernsehen Live',
|
'alt_title': 'WDR Fernsehen Live',
|
||||||
'upload_date': '20150101',
|
'upload_date': '20201112',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -183,7 +193,7 @@ class WDRPageIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 6,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'aktuelle-stunde-120',
|
'id': 'aktuelle-stunde-120',
|
||||||
},
|
},
|
||||||
|
@ -191,10 +201,10 @@ class WDRPageIE(InfoExtractor):
|
||||||
{
|
{
|
||||||
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-1552552',
|
'id': 'mdb-2627637',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': 're:^[0-9]{8}$',
|
'upload_date': 're:^[0-9]{8}$',
|
||||||
'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
|
'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$',
|
||||||
},
|
},
|
||||||
'skip': 'The id changes from week to week because of the new episode'
|
'skip': 'The id changes from week to week because of the new episode'
|
||||||
},
|
},
|
||||||
|
@ -207,6 +217,7 @@ class WDRPageIE(InfoExtractor):
|
||||||
'upload_date': '20130919',
|
'upload_date': '20130919',
|
||||||
'title': 'Sachgeschichte - Achterbahn ',
|
'title': 'Sachgeschichte - Achterbahn ',
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
||||||
|
@ -232,6 +243,7 @@ class WDRPageIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
||||||
|
@ -245,7 +257,7 @@ class WDRPageIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
mobj = self._match_valid_url(url)
|
||||||
display_id = mobj.group('display_id')
|
display_id = dict_get(mobj.groupdict(), ('display_id', 'maus_id'), 'wdrmaus')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
@ -271,6 +283,14 @@ class WDRPageIE(InfoExtractor):
|
||||||
jsonp_url = try_get(
|
jsonp_url = try_get(
|
||||||
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
||||||
if jsonp_url:
|
if jsonp_url:
|
||||||
|
# metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps
|
||||||
|
clip_id = media_link_obj['mediaObj'].get('ref')
|
||||||
|
if jsonp_url.endswith('.assetjsonp'):
|
||||||
|
asset = self._download_json(
|
||||||
|
jsonp_url, display_id, fatal=False, transform_source=strip_jsonp)
|
||||||
|
clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str)
|
||||||
|
if clip_id:
|
||||||
|
jsonp_url = self._asset_url(clip_id[4:])
|
||||||
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
||||||
|
|
||||||
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
|
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
|
||||||
|
@ -290,16 +310,14 @@ class WDRPageIE(InfoExtractor):
|
||||||
class WDRElefantIE(InfoExtractor):
|
class WDRElefantIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
|
'url': 'http://www.wdrmaus.de/elefantenseite/#elefantenkino_wippe',
|
||||||
|
# adaptive stream: unstable file MD5
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Folge Oster-Spezial 2015',
|
'title': 'Wippe',
|
||||||
'id': 'mdb-1088195',
|
'id': 'mdb-1198320',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': None,
|
'age_limit': None,
|
||||||
'upload_date': '20150406'
|
'upload_date': '20071003'
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -334,6 +352,7 @@ class WDRMobileIE(InfoExtractor):
|
||||||
/[0-9]+/[0-9]+/
|
/[0-9]+/[0-9]+/
|
||||||
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
||||||
IE_NAME = 'wdr:mobile'
|
IE_NAME = 'wdr:mobile'
|
||||||
|
_WORKING = False # no such domain
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|
|
@ -136,6 +136,34 @@ class ZDFBaseIE(InfoExtractor):
|
||||||
class ZDFIE(ZDFBaseIE):
|
class ZDFIE(ZDFBaseIE):
|
||||||
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
|
||||||
|
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
|
||||||
|
'md5': '34ec321e7eb34231fd88616c65c92db0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '210222_phx_nachgehakt_corona_protest',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||||
|
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||||
|
'duration': 1691,
|
||||||
|
'timestamp': 1613948400,
|
||||||
|
'upload_date': '20210221',
|
||||||
|
},
|
||||||
|
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||||
|
}, {
|
||||||
|
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
||||||
|
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
||||||
|
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '141007_ab18_10wochensommer_film',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ab 18! - 10 Wochen Sommer',
|
||||||
|
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
|
||||||
|
'duration': 2660,
|
||||||
|
'timestamp': 1608604200,
|
||||||
|
'upload_date': '20201222',
|
||||||
|
},
|
||||||
|
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||||
|
}, {
|
||||||
'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html',
|
'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '211230_sendung_hjo',
|
'id': '211230_sendung_hjo',
|
||||||
|
@ -195,13 +223,16 @@ class ZDFIE(ZDFBaseIE):
|
||||||
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
|
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
|
'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html',
|
||||||
'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
|
'info_dict': {
|
||||||
'only_matching': True
|
'id': 'video_artede_083871-001-A',
|
||||||
}, {
|
'ext': 'mp4',
|
||||||
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
'title': 'Tödliche Flucht (1/6)',
|
||||||
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315',
|
||||||
'only_matching': True
|
'duration': 3193.0,
|
||||||
|
'timestamp': 1641355200,
|
||||||
|
'upload_date': '20220105',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_entry(self, url, player, content, video_id):
|
def _extract_entry(self, url, player, content, video_id):
|
||||||
|
|
Loading…
Reference in a new issue