[tvopengr] Add extractors (#2297)

Authored by: zmousm
This commit is contained in:
Zenon Mousmoulas 2022-01-19 22:43:02 +02:00 committed by GitHub
parent f7085283e1
commit 1a20d29552
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 169 additions and 0 deletions

View file

@ -1679,6 +1679,10 @@ from .tvnow import (
TVNowAnnualIE,
TVNowShowIE,
)
from .tvopengr import (
TVOpenGrWatchIE,
TVOpenGrEmbedIE,
)
from .tvp import (
TVPEmbedIE,
TVPIE,

View file

@ -139,6 +139,7 @@ from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE
from .simplecast import SimplecastIE
from .wimtv import WimTVIE
from .tvopengr import TVOpenGrEmbedIE
from .tvp import TVPEmbedIE
from .blogger import BloggerIE
from .mainstreaming import MainStreamingIE
@ -2227,6 +2228,22 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
{
# tvopengr:embed
'url': 'https://www.ethnos.gr/World/article/190604/hparosiaxekinoynoisynomiliessthgeneyhmethskiatoypolemoypanoapothnoykrania',
'md5': 'eb0c3995d0a6f18f6538c8e057865d7d',
'info_dict': {
'id': '101119',
'ext': 'mp4',
'display_id': 'oikarpoitondiapragmateyseonhparosias',
'title': 'md5:b979f4d640c568617d6547035528a149',
'description': 'md5:e54fc1977c7159b01cc11cd7d9d85550',
'timestamp': 1641772800,
'upload_date': '20220110',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
}
},
{
# blogger embed
'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
@ -3671,6 +3688,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
rumble_urls, video_id, video_title, ie=RumbleEmbedIE.ie_key())
# Look for (tvopen|ethnos).gr embeds
tvopengr_urls = list(TVOpenGrEmbedIE._extract_urls(webpage))
if tvopengr_urls:
return self.playlist_from_matches(tvopengr_urls, video_id, video_title, ie=TVOpenGrEmbedIE.ie_key())
tvp_urls = TVPEmbedIE._extract_urls(webpage)
if tvp_urls:
return self.playlist_from_matches(tvp_urls, video_id, video_title, ie=TVPEmbedIE.ie_key())

View file

@ -0,0 +1,143 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
get_elements_text_and_html_by_attribute,
merge_dicts,
unescapeHTML,
)
class TVOpenGrBaseIE(InfoExtractor):
def _return_canonical_url(self, url, video_id):
webpage = self._download_webpage(url, video_id)
canonical_url = self._og_search_url(webpage)
title = self._og_search_title(webpage)
return self.url_result(canonical_url, ie=TVOpenGrWatchIE.ie_key(), video_id=video_id, video_title=title)
class TVOpenGrWatchIE(TVOpenGrBaseIE):
IE_NAME = 'tvopengr:watch'
IE_DESC = 'tvopen.gr (and ethnos.gr) videos'
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:tvopen|ethnos)\.gr)/watch/(?P<id>\d+)/(?P<slug>[^/]+)'
_API_ENDPOINT = 'https://www.tvopen.gr/templates/data/player'
_TESTS = [{
'url': 'https://www.ethnos.gr/watch/101009/nikoskaprabelosdenexoymekanenanasthenhsemethmethmetallaxhomikron',
'md5': '8728570e3a72e0f8d9475ba94859fdc1',
'info_dict': {
'id': '101009',
'title': 'md5:51f68773dcb6c70498cd326f45fefdf0',
'display_id': 'nikoskaprabelosdenexoymekanenanasthenhsemethmethmetallaxhomikron',
'description': 'md5:78fff49f18fb3effe41b070e5c7685d6',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/d573ba71-ec5f-43c6-b4cb-d181f327d3a8.jpg',
'ext': 'mp4',
'upload_date': '20220109',
'timestamp': 1641686400,
},
}, {
'url': 'https://www.tvopen.gr/watch/100979/se28099agapaomenalla7cepeisodio267cmhthrargiapashskakias',
'md5': '38f98a1be0c577db4ea2d1b1c0770c48',
'info_dict': {
'id': '100979',
'title': 'md5:e021f3001e16088ee40fa79b20df305b',
'display_id': 'se28099agapaomenalla7cepeisodio267cmhthrargiapashskakias',
'description': 'md5:ba17db53954134eb8d625d199e2919fb',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/9bb71cf1-21da-43a9-9d65-367950fde4e3.jpg',
'ext': 'mp4',
'upload_date': '20220108',
'timestamp': 1641600000,
},
}]
def _extract_formats_and_subs(self, response, video_id):
formats, subs = [], {}
for format_id, format_url in response.items():
if format_id not in ('stream', 'httpstream', 'mpegdash'):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats_, subs_ = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', m3u8_id=format_id,
fatal=False)
elif ext == 'mpd':
formats_, subs_ = self._extract_mpd_formats_and_subtitles(
format_url, video_id, 'mp4', fatal=False)
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
continue
formats.extend(formats_)
self._merge_subtitles(subs_, target=subs)
self._sort_formats(formats)
return formats, subs
@staticmethod
def _scale_thumbnails_to_max_width(formats, thumbnails, url_width_re):
_keys = ('width', 'height')
max_dimensions = max(
[tuple(format.get(k) or 0 for k in _keys) for format in formats],
default=(0, 0))
if not max_dimensions[0]:
return thumbnails
return [
merge_dicts(
{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
dict(zip(_keys, max_dimensions)), thumbnail)
for thumbnail in thumbnails
]
def _real_extract(self, url):
netloc, video_id, display_id = self._match_valid_url(url).group('netloc', 'id', 'slug')
if netloc.find('tvopen.gr') == -1:
return self._return_canonical_url(url, video_id)
webpage = self._download_webpage(url, video_id)
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject')
info['formats'], info['subtitles'] = self._extract_formats_and_subs(
self._download_json(self._API_ENDPOINT, video_id, query={'cid': video_id}),
video_id)
info['thumbnails'] = self._scale_thumbnails_to_max_width(
info['formats'], info['thumbnails'], r'(?<=/imgHandler/)\d+')
description, _html = next(get_elements_text_and_html_by_attribute('class', 'description', webpage))
if description and _html.startswith('<span '):
info['description'] = description
info['id'] = video_id
info['display_id'] = display_id
return info
class TVOpenGrEmbedIE(TVOpenGrBaseIE):
IE_NAME = 'tvopengr:embed'
IE_DESC = 'tvopen.gr embedded videos'
_VALID_URL = r'(?:https?:)?//(?:www\.|cdn\.|)(?:tvopen|ethnos).gr/embed/(?P<id>\d+)'
_EMBED_RE = re.compile(rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''')
_TESTS = [{
'url': 'https://cdn.ethnos.gr/embed/100963',
'md5': '2da147881f45571d81662d94d086628b',
'info_dict': {
'id': '100963',
'display_id': 'koronoiosapotoysdieythyntestonsxoleionselftestgiaosoysdenbrhkan',
'title': 'md5:2c71876fadf0cda6043da0da5fca2936',
'description': 'md5:17482b4432e5ed30eccd93b05d6ea509',
'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/5804e07f-799a-4247-a696-33842c94ca37.jpg',
'ext': 'mp4',
'upload_date': '20220108',
'timestamp': 1641600000,
},
}]
@classmethod
def _extract_urls(cls, webpage):
for mobj in cls._EMBED_RE.finditer(webpage):
yield unescapeHTML(mobj.group('url'))
def _real_extract(self, url):
video_id = self._match_id(url)
return self._return_canonical_url(url, video_id)