[tv5mondeplus] Fix extractor (#739)

Authored by: korli
This commit is contained in:
Jérôme Duval 2021-08-20 22:34:51 +02:00 committed by GitHub
parent 45b0596290
commit f79ec47d71
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -7,6 +7,7 @@ from ..utils import (
extract_attributes, extract_attributes,
int_or_none, int_or_none,
parse_duration, parse_duration,
try_get,
) )
@ -15,28 +16,28 @@ class TV5MondePlusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
# movie # movie
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/rendez-vous-a-atlit', 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/ceux-qui-travaillent',
'md5': '8cbde5ea7b296cf635073e27895e227f', 'md5': '32fa0cde16a4480d1251502a66856d5f',
'info_dict': { 'info_dict': {
'id': '822a4756-0712-7329-1859-a13ac7fd1407', 'id': 'dc57a011-ec4b-4648-2a9a-4f03f8352ed3',
'display_id': 'rendez-vous-a-atlit', 'display_id': 'ceux-qui-travaillent',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Rendez-vous à Atlit', 'title': 'Ceux qui travaillent',
'description': 'md5:2893a4c5e1dbac3eedff2d87956e4efb', 'description': 'md5:570e8bb688036ace873b2d50d24c026d',
'upload_date': '20200130', 'upload_date': '20210819',
}, },
}, { }, {
# series episode # series episode
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/c-est-la-vie-ennemie-juree', 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/vestiaires-caro-actrice',
'info_dict': { 'info_dict': {
'id': '0df7007c-4900-3936-c601-87a13a93a068', 'id': '9e9d599e-23af-6915-843e-ecbf62e97925',
'display_id': 'c-est-la-vie-ennemie-juree', 'display_id': 'vestiaires-caro-actrice',
'ext': 'mp4', 'ext': 'mp4',
'title': "C'est la vie - Ennemie jurée", 'title': "Vestiaires - Caro actrice",
'description': 'md5:dfb5c63087b6f35fe0cc0af4fe44287e', 'description': 'md5:db15d2e1976641e08377f942778058ea',
'upload_date': '20200130', 'upload_date': '20210819',
'series': "C'est la vie", 'series': "Vestiaires",
'episode': 'Ennemie jurée', 'episode': 'Caro actrice',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -63,7 +64,7 @@ class TV5MondePlusIE(InfoExtractor):
webpage, 'video player loader')) webpage, 'video player loader'))
video_files = self._parse_json( video_files = self._parse_json(
vpl_data['data-broadcast'], display_id).get('files', []) vpl_data['data-broadcast'], display_id)
formats = [] formats = []
for video_file in video_files: for video_file in video_files:
v_url = video_file.get('url') v_url = video_file.get('url')
@ -81,6 +82,11 @@ class TV5MondePlusIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
metadata = self._parse_json(
vpl_data['data-metadata'], display_id)
duration = (int_or_none(try_get(metadata, lambda x: x['content']['duration']))
or parse_duration(self._html_search_meta('duration', webpage)))
description = self._html_search_regex( description = self._html_search_regex(
r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage, r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
'description', fatal=False) 'description', fatal=False)
@ -109,7 +115,7 @@ class TV5MondePlusIE(InfoExtractor):
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': vpl_data.get('data-image'), 'thumbnail': vpl_data.get('data-image'),
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)), 'duration': duration,
'upload_date': upload_date, 'upload_date': upload_date,
'formats': formats, 'formats': formats,
'series': series, 'series': series,