[ATV.at] Fix extractor for ATV.at (#816)

Authored-by: NeroBurner, coletdjnz
Fixes https://github.com/ytdl-org/youtube-dl/issues/29079
This commit is contained in:
coletdjnz 2021-08-30 09:34:39 +12:00 committed by GitHub
parent 7e55872286
commit 9a292a620c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
dict_get,
int_or_none, int_or_none,
unescapeHTML, unescapeHTML,
) )
@ -12,64 +13,62 @@ from ..utils import (
class ATVAtIE(InfoExtractor): class ATVAtIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)' _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', 'url': 'https://www.atv.at/bauer-sucht-frau-die-zweite-chance/folge-1/d3390693/',
'md5': 'c3b6b975fb3150fc628572939df205f2', 'md5': 'c471605591009dfb6e6c54f7e62e2807',
'info_dict': { 'info_dict': {
'id': '1698447', 'id': '3390684',
'ext': 'mp4', 'ext': 'mp4',
'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', 'title': 'Bauer sucht Frau - Die zweite Chance Folge 1',
} }
}, { }, {
'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', 'url': 'https://www.atv.at/bauer-sucht-frau-staffel-17/fuenfte-eventfolge/d3339537/',
'only_matching': True, 'only_matching': True,
}] }]
def _process_source_entry(self, source, part_id):
source_url = source.get('url')
if not source_url:
return
if determine_ext(source_url) == 'm3u8':
return self._extract_m3u8_formats(
source_url, part_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
else:
return [{
'url': source_url,
}]
def _process_entry(self, entry):
part_id = entry.get('id')
if not part_id:
return
formats = []
for source in entry.get('sources', []):
formats.extend(self._process_source_entry(source, part_id) or [])
self._sort_formats(formats)
return {
'id': part_id,
'title': entry.get('title'),
'duration': int_or_none(entry.get('duration')),
'formats': formats
}
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(unescapeHTML(self._search_regex( video_data = self._parse_json(unescapeHTML(self._search_regex(
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1', r'var\splaylist\s*=\s*(?P<json>\[.*\]);',
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
webpage, 'player data', group='json')), webpage, 'player data', group='json')),
display_id)['config']['initial_video'] display_id)
video_id = video_data['id'] first_video = video_data[0]
video_title = video_data['title'] video_id = first_video['id']
video_title = dict_get(first_video, ('tvShowTitle', 'title'))
parts = []
for part in video_data.get('parts', []):
part_id = part['id']
part_title = part['title']
formats = []
for source in part.get('sources', []):
source_url = source.get('src')
if not source_url:
continue
ext = determine_ext(source_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, part_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': source.get('delivery'),
'url': source_url,
})
self._sort_formats(formats)
parts.append({
'id': part_id,
'title': part_title,
'thumbnail': part.get('preview_image_url'),
'duration': int_or_none(part.get('duration')),
'is_live': part.get('is_livestream'),
'formats': formats,
})
return { return {
'_type': 'multi_video', '_type': 'multi_video',
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'entries': parts, 'entries': (self._process_entry(entry) for entry in video_data),
} }