[extractor/mediastream] Improve WinSports
and embed extraction (#6426)
Closes #6419, Closes #6527 Authored by: bashonly
This commit is contained in:
parent
071670cbea
commit
03025b6e10
1 changed files with 66 additions and 36 deletions
|
@ -2,16 +2,44 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
remove_end,
|
remove_end,
|
||||||
str_or_none,
|
|
||||||
strip_or_none,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MediaStreamIE(InfoExtractor):
|
class MediaStreamBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)'
|
_EMBED_BASE_URL = 'https://mdstrm.com/embed'
|
||||||
|
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||||
|
|
||||||
|
def _extract_mediastream_urls(self, webpage):
|
||||||
|
yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
|
||||||
|
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||||
|
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||||
|
|
||||||
|
for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream\.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
|
||||||
|
yield f'{self._EMBED_BASE_URL}/{mobj.group("video_id")}'
|
||||||
|
|
||||||
|
yield from re.findall(
|
||||||
|
rf'<iframe[^>]+\bsrc="({self._BASE_URL_RE}/\w+)', webpage)
|
||||||
|
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'''(?x)
|
||||||
|
<(?:div|ps-mediastream)[^>]+
|
||||||
|
(class="[^"]*MediaStreamVideoPlayer)[^"]*"[^>]+
|
||||||
|
data-video-id="(?P<video_id>\w+)"
|
||||||
|
(?:\s*data-video-type="(?P<video_type>[^"]+))?
|
||||||
|
(?:[^>]*>\s*<div[^>]+\1[^"]*"[^>]+data-mediastream=["\'][^>]+
|
||||||
|
https://mdstrm\.com/(?P<live>live-stream))?
|
||||||
|
''', webpage):
|
||||||
|
|
||||||
|
video_type = 'live-stream' if mobj.group('video_type') == 'live' or mobj.group('live') else 'embed'
|
||||||
|
yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
|
||||||
|
|
||||||
|
|
||||||
|
class MediaStreamIE(MediaStreamBaseIE):
|
||||||
|
_VALID_URL = MediaStreamBaseIE._BASE_URL_RE + r'/(?P<id>\w+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
|
'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
|
||||||
|
@ -23,6 +51,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
|
'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_WEBPAGE_TESTS = [{
|
_WEBPAGE_TESTS = [{
|
||||||
|
@ -35,9 +64,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {'skip_download': 'Livestream'},
|
||||||
'skip_download': 'Livestream'
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
|
'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
|
||||||
'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
|
'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
|
||||||
|
@ -48,6 +75,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
|
'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
|
'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -57,6 +85,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
|
'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
|
'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -66,26 +95,12 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
|
'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
def _extract_from_webpage(self, url, webpage):
|
||||||
def _extract_embed_urls(cls, url, webpage):
|
for embed_url in self._extract_mediastream_urls(webpage):
|
||||||
for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
|
yield self.url_result(embed_url, MediaStreamIE, None)
|
||||||
yield f'https://mdstrm.com/embed/{mobj.group("video_id")}'
|
|
||||||
|
|
||||||
yield from re.findall(
|
|
||||||
r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage)
|
|
||||||
|
|
||||||
for mobj in re.finditer(
|
|
||||||
r'''(?x)
|
|
||||||
<(?:div|ps-mediastream)[^>]+
|
|
||||||
class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+
|
|
||||||
data-video-id\s*=\s*"(?P<video_id>\w+)\s*"
|
|
||||||
(?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))?
|
|
||||||
''', webpage):
|
|
||||||
|
|
||||||
video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed'
|
|
||||||
yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -94,7 +109,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id)
|
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for video_format in player_config['src']:
|
for video_format in player_config['src']:
|
||||||
|
@ -122,7 +137,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WinSportsVideoIE(InfoExtractor):
|
class WinSportsVideoIE(MediaStreamBaseIE):
|
||||||
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
|
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -158,21 +173,36 @@ class WinSportsVideoIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.winsports.co/videos/bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6402adb62bbf3b18d454e1b0',
|
||||||
|
'display_id': 'bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
|
||||||
|
'title': '⚽Bucaramanga se quedó con el grito de gol en la garganta',
|
||||||
|
'description': 'Gol anulado Bucaramanga',
|
||||||
|
'thumbnail': r're:^https?://[^?#]+6402adb62bbf3b18d454e1b0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={})
|
data = self._search_json(
|
||||||
media_setting_json = self._search_json(
|
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'data', display_id)
|
||||||
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
|
|
||||||
|
|
||||||
mediastream_id = traverse_obj(
|
mediastream_url = urljoin(f'{self._EMBED_BASE_URL}/', (
|
||||||
media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}),
|
traverse_obj(data, (
|
||||||
get_all=False) or json_ld.get('url')
|
(('settings', 'mediastream_formatter', ..., 'mediastream_id'), 'url'), {str}), get_all=False)
|
||||||
if not mediastream_id:
|
or next(self._extract_mediastream_urls(webpage), None)))
|
||||||
|
|
||||||
|
if not mediastream_url:
|
||||||
self.raise_no_formats('No MediaStream embed found in webpage')
|
self.raise_no_formats('No MediaStream embed found in webpage')
|
||||||
|
|
||||||
|
title = clean_html(remove_end(
|
||||||
|
self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}).get('title')
|
||||||
|
or self._og_search_title(webpage), '| Win Sports'))
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True,
|
mediastream_url, MediaStreamIE, display_id, url_transparent=True, display_id=display_id, video_title=title)
|
||||||
display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports')))
|
|
||||||
|
|
Loading…
Reference in a new issue