Update to ytdl-commit-8a158a9

[NHK] Use new API URL
6508688e88

Closes #2337, Closes #4063
This commit is contained in:
pukkandan 2022-06-20 10:14:12 +05:30
parent 7b2c3f47c6
commit 6d1b34896e
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
12 changed files with 296 additions and 155 deletions

View file

@ -71,7 +71,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
# NEW FEATURES # NEW FEATURES
* Based on **youtube-dl 2021.12.17 [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a)** ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) * Based on **youtube-dl 2021.12.17 [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a936c8b002ef536e9e2b778ded02c09c0fa)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc 2020.11.11-3 [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API

View file

@ -102,9 +102,10 @@ def generator(test_case, tname):
def print_skipping(reason): def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason)) print('Skipping %s: %s' % (test_case['name'], reason))
self.skipTest(reason)
if not ie.working(): if not ie.working():
print_skipping('IE marked as not _WORKING') print_skipping('IE marked as not _WORKING')
return
for tc in test_cases: for tc in test_cases:
info_dict = tc.get('info_dict', {}) info_dict = tc.get('info_dict', {})
@ -118,11 +119,10 @@ def generator(test_case, tname):
if 'skip' in test_case: if 'skip' in test_case:
print_skipping(test_case['skip']) print_skipping(test_case['skip'])
return
for other_ie in other_ies: for other_ie in other_ies:
if not other_ie.working(): if not other_ie.working():
print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
return
params = get_params(test_case.get('params', {})) params = get_params(test_case.get('params', {}))
params['outtmpl'] = tname + '_' + params['outtmpl'] params['outtmpl'] = tname + '_' + params['outtmpl']

View file

@ -38,6 +38,9 @@ class BaseTestSubtitles(unittest.TestCase):
self.DL = FakeYDL() self.DL = FakeYDL()
self.ie = self.IE() self.ie = self.IE()
self.DL.add_info_extractor(self.ie) self.DL.add_info_extractor(self.ie)
if not self.IE.working():
print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
self.skipTest('IE marked as not _WORKING')
def getInfoDict(self): def getInfoDict(self):
info_dict = self.DL.extract_info(self.url, download=False) info_dict = self.DL.extract_info(self.url, download=False)
@ -57,6 +60,21 @@ class BaseTestSubtitles(unittest.TestCase):
@is_download_test @is_download_test
class TestYoutubeSubtitles(BaseTestSubtitles): class TestYoutubeSubtitles(BaseTestSubtitles):
# Available subtitles for QRS8MkLhQmM:
# Language formats
# ru vtt, ttml, srv3, srv2, srv1, json3
# fr vtt, ttml, srv3, srv2, srv1, json3
# en vtt, ttml, srv3, srv2, srv1, json3
# nl vtt, ttml, srv3, srv2, srv1, json3
# de vtt, ttml, srv3, srv2, srv1, json3
# ko vtt, ttml, srv3, srv2, srv1, json3
# it vtt, ttml, srv3, srv2, srv1, json3
# zh-Hant vtt, ttml, srv3, srv2, srv1, json3
# hi vtt, ttml, srv3, srv2, srv1, json3
# pt-BR vtt, ttml, srv3, srv2, srv1, json3
# es-MX vtt, ttml, srv3, srv2, srv1, json3
# ja vtt, ttml, srv3, srv2, srv1, json3
# pl vtt, ttml, srv3, srv2, srv1, json3
url = 'QRS8MkLhQmM' url = 'QRS8MkLhQmM'
IE = YoutubeIE IE = YoutubeIE
@ -65,47 +83,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13) self.assertEqual(len(subtitles.keys()), 13)
self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b') self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769') self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
for lang in ['fr', 'de']: for lang in ['fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_youtube_subtitles_ttml_format(self): def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'ttml' self.DL.params['subtitlesformat'] = fmt
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2') self.assertEqual(md5(subtitles[lang]), md5_hash)
def test_youtube_subtitles_ttml_format(self):
self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
def test_youtube_subtitles_vtt_format(self): def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
self.DL.params['subtitlesformat'] = 'vtt'
def test_youtube_subtitles_json3_format(self):
self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
def _test_automatic_captions(self, url, lang):
self.url = url
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = [lang]
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') self.assertTrue(subtitles[lang] is not None)
def test_youtube_automatic_captions(self): def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo' # Available automatic captions for 8YoUxe5ncPo:
self.DL.params['writeautomaticsub'] = True # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
self.DL.params['subtitleslangs'] = ['it'] # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
subtitles = self.getSubtitles() # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
self.assertTrue(subtitles['it'] is not None) # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
# bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
def test_youtube_no_automatic_captions(self): # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
self.url = 'QRS8MkLhQmM' # mt, ms, mr, ug, ta, my, af, sw, is, am,
self.DL.params['writeautomaticsub'] = True # *it*, iw, sv, ar,
subtitles = self.getSubtitles() # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
self.assertTrue(not subtitles) # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
# ky, sd
# ...
self._test_automatic_captions('8YoUxe5ncPo', 'it')
@unittest.skip('Video unavailable')
def test_youtube_translated_subtitles(self): def test_youtube_translated_subtitles(self):
# This video has a subtitles track, which can be translated # This video has a subtitles track, which can be translated (#4555)
self.url = 'i0ZabxXmH4Y' self._test_automatic_captions('Ky9eprVWzlI', 'it')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self): def test_youtube_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles') self.DL.expect_warning('video doesn\'t have subtitles')
self.url = 'n5BB19UTcdA' # Available automatic captions for 8YoUxe5ncPo:
# ...
# 8YoUxe5ncPo has no subtitles
self.url = '8YoUxe5ncPo'
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
@ -137,6 +168,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
@is_download_test @is_download_test
@unittest.skip('IE broken')
class TestTedSubtitles(BaseTestSubtitles): class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TedTalkIE IE = TedTalkIE
@ -162,12 +194,12 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'}) self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'})
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
def test_nosubtitles(self): def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles') self.DL.expect_warning('video doesn\'t have subtitles')
self.url = 'http://vimeo.com/56015672' self.url = 'http://vimeo.com/68093876'
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
@ -175,6 +207,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
@is_download_test @is_download_test
@unittest.skip('IE broken')
class TestWallaSubtitles(BaseTestSubtitles): class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE IE = WallaIE
@ -197,6 +230,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
@is_download_test @is_download_test
@unittest.skip('IE broken')
class TestCeskaTelevizeSubtitles(BaseTestSubtitles): class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE IE = CeskaTelevizeIE
@ -219,6 +253,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
@is_download_test @is_download_test
@unittest.skip('IE broken')
class TestLyndaSubtitles(BaseTestSubtitles): class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE IE = LyndaIE
@ -232,6 +267,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
@is_download_test @is_download_test
@unittest.skip('IE broken')
class TestNPOSubtitles(BaseTestSubtitles): class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860' url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE IE = NPOIE
@ -245,6 +281,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
@is_download_test @is_download_test
@unittest.skip('IE broken')
class TestMTVSubtitles(BaseTestSubtitles): class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans' url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
IE = ComedyCentralIE IE = ComedyCentralIE
@ -269,8 +306,8 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), {'no'}) self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2') self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
@is_download_test @is_download_test
@ -295,6 +332,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
@is_download_test @is_download_test
@unittest.skip('IE broken - DRM only')
class TestVikiSubtitles(BaseTestSubtitles): class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18' url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE IE = VikiIE
@ -323,6 +361,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
@is_download_test @is_download_test
@unittest.skip('IE broken')
class TestThePlatformFeedSubtitles(BaseTestSubtitles): class TestThePlatformFeedSubtitles(BaseTestSubtitles):
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207' url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
IE = ThePlatformFeedIE IE = ThePlatformFeedIE
@ -360,7 +399,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(set(subtitles.keys()), {'en'})
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c') self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
def test_subtitles_in_page(self): def test_subtitles_in_page(self):
self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree' self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
@ -368,7 +407,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(set(subtitles.keys()), {'en'})
self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c') self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
@is_download_test @is_download_test

View file

@ -401,10 +401,15 @@ def validate_options(opts):
if opts.no_sponsorblock: if opts.no_sponsorblock:
opts.sponsorblock_mark = opts.sponsorblock_remove = set() opts.sponsorblock_mark = opts.sponsorblock_remove = set()
default_downloader = None
for proto, path in opts.external_downloader.items(): for proto, path in opts.external_downloader.items():
if get_external_downloader(path) is None: ed = get_external_downloader(path)
if ed is None:
raise ValueError( raise ValueError(
f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"') f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"')
elif ed and proto == 'default':
default_downloader = ed.get_basename()
warnings, deprecation_warnings = [], [] warnings, deprecation_warnings = [], []
# Common mistake: -f best # Common mistake: -f best
@ -415,13 +420,18 @@ def validate_options(opts):
'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
# --(postprocessor/downloader)-args without name # --(postprocessor/downloader)-args without name
def report_args_compat(name, value, key1, key2=None): def report_args_compat(name, value, key1, key2=None, where=None):
if key1 in value and key2 not in value: if key1 in value and key2 not in value:
warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s') warnings.append(f'{name.title()} arguments given without specifying name. '
f'The arguments will be given to {where or f"all {name}s"}')
return True return True
return False return False
report_args_compat('external downloader', opts.external_downloader_args, 'default') if report_args_compat('external downloader', opts.external_downloader_args,
'default', where=default_downloader) and default_downloader:
# Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1
opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default'))
if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'): if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'):
opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat')
opts.postprocessor_args.setdefault('sponskrub', []) opts.postprocessor_args.setdefault('sponskrub', [])

View file

@ -752,6 +752,7 @@ from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .koo import KooIE from .koo import KooIE
from .kth import KTHIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .kusi import KUSIIE from .kusi import KUSIIE

View file

@ -677,6 +677,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
'vcodec': 'none' 'vcodec': 'none'
}] }]
for a_format in formats:
a_format.setdefault('http_headers', {}).update({
'Referer': url,
})
song = self._call_api('song/info', au_id) song = self._call_api('song/info', au_id)
title = song['title'] title = song['title']
statistic = song.get('statistic') or {} statistic = song.get('statistic') or {}

View file

@ -382,5 +382,5 @@ class KalturaIE(InfoExtractor):
'duration': info.get('duration'), 'duration': info.get('duration'),
'timestamp': info.get('createdAt'), 'timestamp': info.get('createdAt'),
'uploader_id': format_field(info, 'userId', ignore=('None', None)), 'uploader_id': format_field(info, 'userId', ignore=('None', None)),
'view_count': info.get('plays'), 'view_count': int_or_none(info.get('plays')),
} }

28
yt_dlp/extractor/kth.py Normal file
View file

@ -0,0 +1,28 @@
from .common import InfoExtractor
from ..utils import smuggle_url
class KTHIE(InfoExtractor):
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
_TEST = {
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
'info_dict': {
'id': '0_uoop6oz9',
'ext': 'mp4',
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
'thumbnail': 're:https?://.+/thumbnail/.+',
'duration': 3516,
'timestamp': 1647345358,
'upload_date': '20220315',
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
result = self.url_result(
smuggle_url('kaltura:308:%s' % video_id, {
'service_url': 'https://api.kaltura.nordu.net'}),
'Kaltura')
return result

View file

@ -1,11 +1,15 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError,
int_or_none, int_or_none,
parse_duration, merge_dicts,
parse_iso8601,
qualities, qualities,
try_get, try_get,
unified_strdate,
urljoin, urljoin,
) )
@ -14,120 +18,139 @@ class NDRBaseIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
display_id = next(group for group in mobj.groups() if group) display_id = next(group for group in mobj.groups() if group)
id = mobj.group('id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
return self._extract_embed(webpage, display_id, id) return self._extract_embed(webpage, display_id, url)
class NDRIE(NDRBaseIE): class NDRIE(NDRBaseIE):
IE_NAME = 'ndr' IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Norddeutscher Rundfunk' IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
_VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[\da-z]+)\.html' _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
_TESTS = [{ _TESTS = [{
# httpVideo, same content id
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
'md5': '6515bc255dc5c5f8c85bbc38e035a659',
'info_dict': { 'info_dict': {
'id': 'hafengeburtstag988', 'id': 'hafengeburtstag988',
'display_id': 'Party-Poette-und-Parade',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Party, Pötte und Parade', 'title': 'Party, Pötte und Parade',
'thumbnail': 'https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg',
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c', 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
'series': None, 'uploader': 'ndrtv',
'channel': 'NDR Fernsehen', 'timestamp': 1431255671,
'upload_date': '20150508', 'upload_date': '20150510',
'duration': 3498, 'duration': 3498,
}, },
}, { 'params': {
'url': 'https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html', 'skip_download': True,
'only_matching': True
}, {
'url': 'https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html',
'info_dict': {
'id': 'kommunalwahl1296',
'ext': 'mp4',
'title': 'Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik',
'thumbnail': 'https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg',
'description': 'md5:5c6e2ad744cef499135735a1036d7aa7',
'series': 'Hallo Niedersachsen',
'channel': 'NDR Fernsehen',
'upload_date': '20210913',
'duration': 438,
}, },
'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html', # httpVideo, different content id
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
'md5': '1043ff203eab307f0c51702ec49e9a71',
'info_dict': { 'info_dict': {
'id': 'sendung1091858', 'id': 'osna272',
'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring', 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
'thumbnail': 'https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg', 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
'description': 'md5:700f6de264010585012a72f97b0ac0c9', 'uploader': 'ndrtv',
'series': 'extra 3', 'timestamp': 1442059200,
'channel': 'NDR Fernsehen', 'upload_date': '20150912',
'upload_date': '20201111', 'duration': 510,
'duration': 1749, },
} 'params': {
'skip_download': True,
},
'skip': 'No longer available',
}, { }, {
# httpAudio, same content id
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
'info_dict': { 'info_dict': {
'id': 'audio51535', 'id': 'audio51535',
'display_id': 'La-Valette-entgeht-der-Hinrichtung',
'ext': 'mp3', 'ext': 'mp3',
'title': 'La Valette entgeht der Hinrichtung', 'title': 'La Valette entgeht der Hinrichtung',
'thumbnail': 'https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg',
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
'upload_date': '20140729', 'uploader': 'ndrinfo',
'duration': 884.0, 'timestamp': 1631711863,
'upload_date': '20210915',
'duration': 884,
}, },
'expected_warnings': ['unable to extract json url'], 'params': {
'skip_download': True,
},
}, {
# with subtitles
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
'info_dict': {
'id': 'extra18674',
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
'ext': 'mp4',
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
'description': 'md5:700f6de264010585012a72f97b0ac0c9',
'uploader': 'ndrtv',
'upload_date': '20201207',
'timestamp': 1614349457,
'duration': 1749,
'subtitles': {
'de': [{
'ext': 'ttml',
'url': r're:^https://www\.ndr\.de.+',
}],
},
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
'only_matching': True,
}] }]
def _extract_embed(self, webpage, display_id, id): def _extract_embed(self, webpage, display_id, url):
formats = [] embed_url = (
base_url = 'https://www.ndr.de' self._html_search_meta(
json_url = self._search_regex(r'<iframe[^>]+src=\"([^\"]+)_theme-ndrde[^\.]*\.html\"', webpage, 'embedURL', webpage, 'embed URL',
'json url', fatal=False) default=None)
if json_url: or self._search_regex(
data_json = self._download_json(base_url + json_url.replace('ardplayer_image', 'ardjson_image') + '.json', r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
id, fatal=False) 'embed URL', group='url', default=None)
info_json = data_json.get('_info', {}) or self._search_regex(
media_json = try_get(data_json, lambda x: x['_mediaArray'][0]['_mediaStreamArray']) r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
for media in media_json: 'embed URL', group='url', default=''))
if media.get('_quality') == 'auto': # some more work needed if we only found sophoraID
formats.extend(self._extract_m3u8_formats(media['_stream'], id)) if re.match(r'^[a-z]+\d+$', embed_url):
subtitles = {} # get the initial part of the url path,. eg /panorama/archiv/2022/
sub_url = data_json.get('_subtitleUrl') parsed_url = compat_urllib_parse_urlparse(url)
if sub_url: path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
subtitles.setdefault('de', []).append({ # find tell-tale image with the actual ID
'url': base_url + sub_url, ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
}) # or try to use special knowledge!
self._sort_formats(formats) NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
return { embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
'id': id, if not embed_url:
'title': info_json.get('clipTitle'), raise ExtractorError('Unable to extract embedUrl')
'thumbnail': base_url + data_json.get('_previewImage'),
'description': info_json.get('clipDescription'), description = self._search_regex(
'series': info_json.get('seriesTitle') or None, r'<p[^>]+itemprop="description">([^<]+)</p>',
'channel': info_json.get('channelTitle'), webpage, 'description', default=None) or self._og_search_description(webpage)
'upload_date': unified_strdate(info_json.get('clipDate')), timestamp = parse_iso8601(
'duration': data_json.get('_duration'), self._search_regex(
'formats': formats, (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
'subtitles': subtitles, r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
} webpage, 'upload date', group='cont', default=None))
else: info = self._search_json_ld(webpage, display_id, default={})
json_url = base_url + self._search_regex(r'apiUrl\s?=\s?\'([^\']+)\'', webpage, 'json url').replace( return merge_dicts({
'_belongsToPodcast-', '') '_type': 'url_transparent',
data_json = self._download_json(json_url, id, fatal=False) 'url': embed_url,
return { 'display_id': display_id,
'id': id, 'description': description,
'title': data_json.get('title'), 'timestamp': timestamp,
'thumbnail': base_url + data_json.get('poster'), }, info)
'description': data_json.get('summary'),
'upload_date': unified_strdate(data_json.get('publicationDate')),
'duration': parse_duration(data_json.get('duration')),
'formats': [{
'url': try_get(data_json, (lambda x: x['audio'][0]['url'], lambda x: x['files'][0]['url'])),
'vcodec': 'none',
'ext': 'mp3',
}],
}
class NJoyIE(NDRBaseIE): class NJoyIE(NDRBaseIE):
@ -151,19 +174,19 @@ class NJoyIE(NDRBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'No longer available',
}, { }, {
# httpVideo, different content id # httpVideo, different content id
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html', 'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
'md5': '417660fffa90e6df2fda19f1b40a64d8', 'md5': '417660fffa90e6df2fda19f1b40a64d8',
'info_dict': { 'info_dict': {
'id': 'dockville882', 'id': 'livestream283',
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-', 'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
'ext': 'mp4', 'ext': 'mp3',
'title': '"Ich hab noch nie" mit Felix Jaehn', 'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
'description': 'md5:85dd312d53be1b99e1f998a16452a2f3', 'description': 'md5:681698f527b8601e511e7b79edde7d2c',
'uploader': 'njoy', 'uploader': 'njoy',
'upload_date': '20150822', 'upload_date': '20210830',
'duration': 211,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -173,18 +196,25 @@ class NJoyIE(NDRBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_embed(self, webpage, display_id, id): def _extract_embed(self, webpage, display_id, url=None):
# find tell-tale URL with the actual ID, or ...
video_id = self._search_regex( video_id = self._search_regex(
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id') (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
description = self._search_regex( r'<iframe[^>]+id="pp_([\da-z]+)"', ),
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>', webpage, 'NDR id', default=None)
webpage, 'description', fatal=False)
description = (
self._html_search_meta('description', webpage)
or self._search_regex(
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
webpage, 'description', fatal=False))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'NDREmbedBase', 'ie_key': 'NDREmbedBase',
'url': 'ndr:%s' % video_id, 'url': 'ndr:%s' % video_id,
'display_id': display_id, 'display_id': display_id,
'description': description, 'description': description,
'title': display_id.replace('-', ' ').strip(),
} }
@ -287,7 +317,7 @@ class NDREmbedBaseIE(InfoExtractor):
class NDREmbedIE(NDREmbedBaseIE): class NDREmbedIE(NDREmbedBaseIE):
IE_NAME = 'ndr:embed' IE_NAME = 'ndr:embed'
_VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
@ -300,6 +330,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'upload_date': '20150907', 'upload_date': '20150907',
'duration': 132, 'duration': 132,
}, },
'skip': 'No longer available',
}, { }, {
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html', 'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
'md5': '002085c44bae38802d94ae5802a36e78', 'md5': '002085c44bae38802d94ae5802a36e78',
@ -315,6 +346,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'No longer available',
}, { }, {
'url': 'http://www.ndr.de/info/audio51535-player.html', 'url': 'http://www.ndr.de/info/audio51535-player.html',
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
@ -324,7 +356,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'title': 'La Valette entgeht der Hinrichtung', 'title': 'La Valette entgeht der Hinrichtung',
'is_live': False, 'is_live': False,
'uploader': 'ndrinfo', 'uploader': 'ndrinfo',
'upload_date': '20140729', 'upload_date': '20210915',
'duration': 884, 'duration': 884,
}, },
'params': { 'params': {
@ -345,15 +377,17 @@ class NDREmbedIE(NDREmbedBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'No longer available',
}, { }, {
# httpVideoLive # httpVideoLive
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html', 'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
'info_dict': { 'info_dict': {
'id': 'livestream217', 'id': 'livestream217',
'ext': 'flv', 'ext': 'mp4',
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True, 'is_live': True,
'upload_date': '20150910', 'upload_date': '20210409',
'uploader': 'ndrtv',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -391,9 +425,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Zehn Jahre Reeperbahn Festival - die Doku', 'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
'is_live': False, 'is_live': False,
'upload_date': '20150807', 'upload_date': '20200826',
'duration': 1011, 'duration': 1011,
}, },
'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
# httpAudio # httpAudio
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html', 'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
@ -410,6 +445,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'No longer available',
}, { }, {
# httpAudioLive, no explicit ext # httpAudioLive, no explicit ext
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html', 'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
@ -419,7 +455,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True, 'is_live': True,
'uploader': 'njoy', 'uploader': 'njoy',
'upload_date': '20150810', 'upload_date': '20210830',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View file

@ -11,7 +11,7 @@ from ..utils import (
class NhkBaseIE(InfoExtractor): class NhkBaseIE(InfoExtractor):
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json' _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand' _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
_TYPE_REGEX = r'/(?P<type>video|audio)/' _TYPE_REGEX = r'/(?P<type>video|audio)/'
@ -27,7 +27,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None): def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups() lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
if episode_id.isdigit(): if len(episode_id) == 7:
episode_id = episode_id[:4] + '-' + episode_id[4:] episode_id = episode_id[:4] + '-' + episode_id[4:]
is_video = m_type == 'video' is_video = m_type == 'video'
@ -89,7 +89,8 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE): class NhkVodIE(NhkBaseIE):
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX) # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# Content available only for a limited period of time. Visit # Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{ _TESTS = [{
@ -129,6 +130,19 @@ class NhkVodIE(NhkBaseIE):
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
'only_matching': True, 'only_matching': True,
}, {
# video, alphabetic character in ID #29670
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
'only_matching': True,
'info_dict': {
'id': 'qfjay6cg',
'ext': 'mp4',
'title': 'DESIGN TALKS plus - Fishermens Finery',
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
'upload_date': '20210615',
'timestamp': 1623722008,
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -21,7 +21,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor): class XHamsterIE(InfoExtractor):
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)' _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:.+?\.)?%s/ (?:.+?\.)?%s/
@ -32,7 +32,7 @@ class XHamsterIE(InfoExtractor):
''' % _DOMAINS ''' % _DOMAINS
_TESTS = [{ _TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
'md5': '98b4687efb1ffd331c4197854dc09e8f', 'md5': '34e1ab926db5dc2750fed9e1f34304bb',
'info_dict': { 'info_dict': {
'id': '1509445', 'id': '1509445',
'display_id': 'femaleagent-shy-beauty-takes-the-bait', 'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@ -41,6 +41,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1350194821, 'timestamp': 1350194821,
'upload_date': '20121014', 'upload_date': '20121014',
'uploader': 'Ruseful2011', 'uploader': 'Ruseful2011',
'uploader_id': 'ruseful2011',
'duration': 893, 'duration': 893,
'age_limit': 18, 'age_limit': 18,
}, },
@ -70,6 +71,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1454948101, 'timestamp': 1454948101,
'upload_date': '20160208', 'upload_date': '20160208',
'uploader': 'parejafree', 'uploader': 'parejafree',
'uploader_id': 'parejafree',
'duration': 72, 'duration': 72,
'age_limit': 18, 'age_limit': 18,
}, },
@ -115,6 +117,9 @@ class XHamsterIE(InfoExtractor):
}, { }, {
'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx', 'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -244,7 +249,6 @@ class XHamsterIE(InfoExtractor):
categories = None categories = None
uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL'])) uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
@ -263,7 +267,7 @@ class XHamsterIE(InfoExtractor):
'dislike_count': int_or_none(try_get( 'dislike_count': int_or_none(try_get(
video, lambda x: x['rating']['dislikes'], int)), video, lambda x: x['rating']['dislikes'], int)),
'comment_count': int_or_none(video.get('views')), 'comment_count': int_or_none(video.get('views')),
'age_limit': age_limit, 'age_limit': age_limit if age_limit is not None else 18,
'categories': categories, 'categories': categories,
'formats': formats, 'formats': formats,
} }
@ -423,6 +427,9 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan', 'id': 'firatkaan',
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
}, {
'url': 'https://xhday.com/users/mobhunter',
'only_matching': True,
}] }]
def _entries(self, user_id): def _entries(self, user_id):

View file

@ -135,9 +135,10 @@ class YouPornIE(InfoExtractor):
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._html_search_regex( upload_date = unified_strdate(self._html_search_regex(
[r'UPLOADED:\s*<span>([^<]+)', (r'UPLOADED:\s*<span>([^<]+)',
r'Date\s+[Aa]dded:\s*<span>([^<]+)', r'Date\s+[Aa]dded:\s*<span>([^<]+)',
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'], r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
webpage, 'upload date', fatal=False)) webpage, 'upload date', fatal=False))
age_limit = self._rta_search(webpage) age_limit = self._rta_search(webpage)