[telemundo] add extractor (#327)

Closes #284
Authored by: king-millez
This commit is contained in:
king-millez 2021-05-22 21:47:49 +10:00 committed by GitHub
parent 81a23040eb
commit 13a49340ed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 59 additions and 0 deletions

View file

@ -1292,6 +1292,7 @@ from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE from .telegraaf import TelegraafIE
from .telemb import TeleMBIE from .telemb import TeleMBIE
from .telemundo import TelemundoIE
from .telequebec import ( from .telequebec import (
TeleQuebecIE, TeleQuebecIE,
TeleQuebecSquatIE, TeleQuebecSquatIE,

View file

@ -0,0 +1,58 @@
# coding=utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
try_get,
unified_timestamp,
HEADRequest,
)
class TelemundoIE(InfoExtractor):
_VALID_URL = r'https?:\/\/(?:www\.)?telemundo\.com\/.+?video\/[^\/]+(?P<id>tmvo\d{7})'
_TESTS = [{
'url': 'https://www.telemundo.com/noticias/noticias-telemundo-en-la-noche/empleo/video/esta-aplicacion-gratuita-esta-ayudando-los-latinos-encontrar-trabajo-en-estados-unidos-tmvo9829325',
'info_dict': {
'id': 'tmvo9829325',
'timestamp': 1621396800,
'title': 'Esta aplicación gratuita está ayudando a los latinos a encontrar trabajo en Estados Unidos',
'uploader': 'Telemundo',
'uploader_id': 'NBCU_Telemundo',
'ext': 'mp4',
'upload_date': '20210519',
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://www.telemundo.com/shows/al-rojo-vivo/empleo/video/personajes-de-times-square-piden-que-la-ciudad-de-nueva-york-los-deje-volver-trabajar-tmvo9816272',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
metadata = self._parse_json(
self._search_regex(r'<[^>]+id="__NEXT_DATA__"[^>]+>([^<]+)', webpage, 'JSON metadata'), video_id)
redirect_url = try_get(
metadata,
lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['videoAssets'][0]['publicUrl'])
m3u8_url = self._request_webpage(HEADRequest(
redirect_url + '?format=redirect&manifest=m3u&format=redirect&Tracking=true&Embedded=true&formats=MPEG4'),
video_id, 'Processing m3u8').geturl()
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)
date = unified_timestamp(try_get(
metadata, lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['datePublished'].split(' ', 1)[1]))
return {
'url': url,
'id': video_id,
'title': self._search_regex(r'<h1[^>]+>([^<]+)', webpage, 'title', fatal=False),
'formats': formats,
'timestamp': date,
'uploader': 'Telemundo',
'uploader_id': self._search_regex(r'https?:\/\/(?:[^/]+\/){3}video\/(?P<id>[^\/]+)', m3u8_url, 'Akamai account', fatal=False)
}