parent
3f771f75d7
commit
c0599d4fe4
2 changed files with 181 additions and 0 deletions
|
@ -1788,6 +1788,10 @@ from .wistia import (
|
|||
WistiaPlaylistIE,
|
||||
)
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wppilot import (
|
||||
WPPilotIE,
|
||||
WPPilotChannelsIE,
|
||||
)
|
||||
from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
|
|
177
yt_dlp/extractor/wppilot.py
Normal file
177
yt_dlp/extractor/wppilot.py
Normal file
|
@ -0,0 +1,177 @@
|
|||
# coding: utf-8
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
|
||||
|
||||
class WPPilotBaseIE(InfoExtractor):
|
||||
_VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s'
|
||||
_VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s'
|
||||
|
||||
_HEADERS_WEB = {
|
||||
'Content-Type': 'application/json; charset=UTF-8',
|
||||
'Referer': 'https://pilot.wp.pl/tv/',
|
||||
}
|
||||
|
||||
def _get_channel_list(self, cache=True):
|
||||
if cache is True:
|
||||
cache_res = self._downloader.cache.load('wppilot', 'channel-list')
|
||||
if cache_res:
|
||||
return cache_res, True
|
||||
webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
|
||||
page_data_base_url = self._search_regex(
|
||||
r'<script src="(https://wp-pilot-gatsby\.wpcdn\.pl/v[\d.-]+/desktop)',
|
||||
webpage, 'gatsby build version') + '/page-data'
|
||||
page_data = self._download_json(f'{page_data_base_url}/tv/page-data.json', None, 'Downloading page data')
|
||||
for qhash in page_data['staticQueryHashes']:
|
||||
qhash_content = self._download_json(
|
||||
f'{page_data_base_url}/sq/d/{qhash}.json', None,
|
||||
'Searching for channel list')
|
||||
channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes'])
|
||||
if channel_list is None:
|
||||
continue
|
||||
self._downloader.cache.store('wppilot', 'channel-list', channel_list)
|
||||
return channel_list, False
|
||||
raise ExtractorError('Unable to find the channel list')
|
||||
|
||||
def _parse_channel(self, chan):
|
||||
return {
|
||||
'id': str(chan['id']),
|
||||
'title': chan['name'],
|
||||
'is_live': True,
|
||||
'thumbnails': [{
|
||||
'id': key,
|
||||
'url': chan[key],
|
||||
} for key in ('thumbnail', 'thumbnail_mobile', 'icon') if chan.get(key)],
|
||||
}
|
||||
|
||||
|
||||
class WPPilotIE(WPPilotBaseIE):
|
||||
_VALID_URL = r'(?:https?://pilot\.wp\.pl/tv/?#|wppilot:)(?P<id>[a-z\d-]+)'
|
||||
IE_NAME = 'wppilot'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://pilot.wp.pl/tv/#telewizja-wp-hd',
|
||||
'info_dict': {
|
||||
'id': '158',
|
||||
'ext': 'mp4',
|
||||
'title': 'Telewizja WP HD',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
# audio only
|
||||
'url': 'https://pilot.wp.pl/tv/#radio-nowy-swiat',
|
||||
'info_dict': {
|
||||
'id': '238',
|
||||
'ext': 'm4a',
|
||||
'title': 'Radio Nowy Świat',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestaudio',
|
||||
},
|
||||
}, {
|
||||
'url': 'wppilot:9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_channel(self, id_or_slug):
|
||||
video_list, is_cached = self._get_channel_list(cache=True)
|
||||
key = 'id' if re.match(r'^\d+$', id_or_slug) else 'slug'
|
||||
for video in video_list:
|
||||
if video.get(key) == id_or_slug:
|
||||
return self._parse_channel(video)
|
||||
# if cached channel not found, download and retry
|
||||
if is_cached:
|
||||
video_list, _ = self._get_channel_list(cache=False)
|
||||
for video in video_list:
|
||||
if video.get(key) == id_or_slug:
|
||||
return self._parse_channel(video)
|
||||
raise ExtractorError('Channel not found')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
channel = self._get_channel(video_id)
|
||||
video_id = str(channel['id'])
|
||||
|
||||
is_authorized = next((c for c in self._downloader.cookiejar if c.name == 'netviapisessid'), None)
|
||||
# cookies starting with "g:" are assigned to guests
|
||||
is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False
|
||||
|
||||
video = self._download_json(
|
||||
(self._VIDEO_URL if is_authorized else self._VIDEO_GUEST_URL) % video_id,
|
||||
video_id, query={
|
||||
'device_type': 'web',
|
||||
}, headers=self._HEADERS_WEB,
|
||||
expected_status=(200, 422))
|
||||
|
||||
stream_token = try_get(video, lambda x: x['_meta']['error']['info']['stream_token'])
|
||||
if stream_token:
|
||||
close = self._download_json(
|
||||
'https://pilot.wp.pl/api/v1/channels/close', video_id,
|
||||
'Invalidating previous stream session', headers=self._HEADERS_WEB,
|
||||
data=json.dumps({
|
||||
'channelId': video_id,
|
||||
't': stream_token,
|
||||
}).encode('utf-8'))
|
||||
if try_get(close, lambda x: x['data']['status']) == 'ok':
|
||||
return self.url_result(url, ie=WPPilotIE.ie_key())
|
||||
|
||||
formats = []
|
||||
|
||||
for fmt in video['data']['stream_channel']['streams']:
|
||||
# live DASH does not work for now
|
||||
# if fmt['type'] == 'dash@live:abr':
|
||||
# formats.extend(
|
||||
# self._extract_mpd_formats(
|
||||
# random.choice(fmt['url']), video_id))
|
||||
if fmt['type'] == 'hls@live:abr':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
random.choice(fmt['url']),
|
||||
video_id, live=True))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
channel['formats'] = formats
|
||||
return channel
|
||||
|
||||
|
||||
class WPPilotChannelsIE(WPPilotBaseIE):
|
||||
_VALID_URL = r'(?:https?://pilot\.wp\.pl/(?:tv/?)?(?:\?[^#]*)?#?|wppilot:)$'
|
||||
IE_NAME = 'wppilot:channels'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'wppilot:',
|
||||
'info_dict': {
|
||||
'id': 'wppilot',
|
||||
'title': 'WP Pilot',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'https://pilot.wp.pl/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self):
|
||||
channel_list, _ = self._get_channel_list()
|
||||
for chan in channel_list:
|
||||
entry = self._parse_channel(chan)
|
||||
entry.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': f'wppilot:{chan["id"]}',
|
||||
'ie_key': WPPilotIE.ie_key(),
|
||||
})
|
||||
yield entry
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.playlist_result(self._entries(), 'wppilot', 'WP Pilot')
|
Loading…
Reference in a new issue