[TrovoLive] Add extractor (partially fix #20)

Only VOD extractor has been implemented

Related: https://github.com/ytdl-org/youtube-dl/issues/26125
Related: https://github.com/blackjack4494/yt-dlc/issues/220
This commit is contained in:
pukkandan 2021-01-20 00:35:50 +05:30
parent 8a51f56439
commit 5c610515c9
3 changed files with 117 additions and 1 deletions

View file

@ -1265,6 +1265,7 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE from .trilulilu import TriluliluIE
from .trovolive import TrovoLiveIE
from .trunews import TruNewsIE from .trunews import TruNewsIE
from .trutv import TruTVIE from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE

View file

@ -0,0 +1,111 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
try_get,
int_or_none,
str_or_none,
url_or_none,
)
from ..compat import compat_str
class TrovoLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trovo\.live/video/(?P<id>[\w-]+)'
_TEST = {
'url': 'https://trovo.live/video/ltv-100759829_100759829_1610625308',
'md5': 'ea7b58427910e9af66a462d895201a30',
'info_dict': {
'id': 'ltv-100759829_100759829_1610625308',
'ext': 'ts',
'title': 'GTA RP ASTERIX doa najjaca',
'uploader': 'Peroo42',
'duration': 5872,
'view_count': int,
'like_count': int,
'comment_count': int,
'categories': list,
'is_live': False,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader_id': '100759829',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
nuxt = self._search_regex(r'\bwindow\.__NUXT__\s*=\s*(.+?);?\s*</script>', webpage, 'nuxt', default='')
mobj = re.search(r'\((?P<arg_names>[^(]+)\)\s*{\s*return\s+(?P<json>{.+})\s*\((?P<args>.+?)\)\s*\)$', nuxt)
vod_details = vod_info = {}
if mobj:
vod_details = self._parse_json(
js_to_json(
self._search_regex(r'VodDetailInfos\s*:({.+?}),\s*_', webpage, 'VodDetailInfos'),
dict(zip(
(i.strip() for i in mobj.group('arg_names').split(',')),
(i.strip() for i in mobj.group('args').split(','))))),
video_id)
vod_info = try_get(vod_details, lambda x: x['json'][video_id]['vodInfo'], dict) or {}
player_info = self._parse_json(
self._search_regex(
r'_playerInfo\s*=\s*({.+?})\s*</script>', webpage, 'player info'),
video_id)
title = (
vod_info.get('title')
or self._html_search_regex(r'<h3>(.+?)</h3>', webpage, 'title', fatal=False)
or self._og_search_title(webpage))
uploader = (
try_get(vod_details, lambda x: x['json'][video_id]['streamerInfo']['userName'], compat_str)
or self._search_regex(r'<div[^>]+userName\s=\s[\'"](.+?)[\'"]', webpage, 'uploader', fatal=False))
format_dicts = vod_info.get('playInfos') or player_info.get('urlArray') or []
def _extract_format_data(format_dict):
res = format_dict.get('desc')
enc = str_or_none(format_dict.get('encodeType'))
if enc:
notes = [enc.replace('VOD_ENCODE_TYPE_', '')]
level = str_or_none(format_dict.get('levelType'))
if level:
notes.append('level %s' % level)
height = int_or_none(res[:-1]) if res else None
bitrate = format_dict.get('bitrate')
fid = res or ('%sk' % str_or_none(bitrate) if bitrate else None) or ' '.join(notes)
return {
'url': format_dict['playUrl'],
'format_id': fid,
'format_note': ' '.join(notes),
'height': height,
'resolution': str_or_none(res),
'tbr': int_or_none(bitrate),
'filesize': int_or_none(format_dict.get('fileSize')),
'vcodec': 'avc3',
'acodec': 'aac',
'ext': 'ts'
}
formats = [_extract_format_data(f) for f in format_dicts]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'uploader': uploader,
'duration': int_or_none(vod_info.get('duration')),
'formats': formats,
'view_count': int_or_none(vod_info.get('watchNum')),
'like_count': int_or_none(vod_info.get('likeNum')),
'comment_count': int_or_none(vod_info.get('commentNum')),
'categories': [str_or_none(vod_info.get('categoryName'))],
'is_live': try_get(player_info, lambda x: x['isLive'], bool),
'thumbnail': url_or_none(vod_info.get('coverUrl')),
'uploader_id': str_or_none(try_get(vod_details, lambda x: x['json'][video_id]['streamerInfo']['uid'])),
}

View file

@ -4099,7 +4099,8 @@ def strip_jsonp(code):
r'\g<callback_data>', code) r'\g<callback_data>', code)
def js_to_json(code): def js_to_json(code, vars={}):
# vars is a dict of var, val pairs to substitute
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*' COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE) SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
INTEGER_TABLE = ( INTEGER_TABLE = (
@ -4128,6 +4129,9 @@ def js_to_json(code):
i = int(im.group(1), base) i = int(im.group(1), base)
return '"%d":' % i if v.endswith(':') else '%d' % i return '"%d":' % i if v.endswith(':') else '%d' % i
if v in vars:
return vars[v]
return '"%s"' % v return '"%s"' % v
return re.sub(r'''(?sx) return re.sub(r'''(?sx)