#49 [niconico] Improved extraction and support encrypted/SMILE movies

Co-authored-by: tsukumijima <tsukumijima@users.noreply.github.com>
Co-authored-by: tsukumi <39271166+tsukumijima@users.noreply.github.com>
Co-authored-by: Bepis <36346617+bbepis@users.noreply.github.com>
Co-authored-by: pukkandan <pukkandan@gmail.com>
This commit is contained in:
kurumigi 2021-02-10 15:45:20 +09:00 committed by GitHub
parent 8d801631cf
commit fb198a8a9c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 338 additions and 168 deletions

View file

@ -2681,7 +2681,7 @@ class YoutubeDL(object):
'|', '|',
format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
format_field(f, 'tbr', '%4dk'), format_field(f, 'tbr', '%4dk'),
f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"), f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
'|', '|',
format_field(f, 'vcodec', default='unknown').replace('none', ''), format_field(f, 'vcodec', default='unknown').replace('none', ''),
format_field(f, 'vbr', '%4dk'), format_field(f, 'vbr', '%4dk'),

View file

@ -21,6 +21,7 @@ from .http import HttpFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .ism import IsmFD from .ism import IsmFD
from .niconico import NiconicoDmcFD
from .youtube_live_chat import YoutubeLiveChatReplayFD from .youtube_live_chat import YoutubeLiveChatReplayFD
from .external import ( from .external import (
get_external_downloader, get_external_downloader,
@ -36,6 +37,7 @@ PROTOCOL_MAP = {
'f4m': F4mFD, 'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD, 'http_dash_segments': DashSegmentsFD,
'ism': IsmFD, 'ism': IsmFD,
'niconico_dmc': NiconicoDmcFD,
'youtube_live_chat_replay': YoutubeLiveChatReplayFD, 'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
} }

View file

@ -0,0 +1,54 @@
# coding: utf-8
from __future__ import unicode_literals
import threading
from .common import FileDownloader
from ..downloader import _get_real_downloader
from ..extractor.niconico import NiconicoIE
from ..compat import compat_urllib_request
class NiconicoDmcFD(FileDownloader):
""" Downloading niconico douga from DMC with heartbeat """
FD_NAME = 'niconico_dmc'
def real_download(self, filename, info_dict):
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
ie = NiconicoIE(self.ydl)
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
fd = _get_real_downloader(info_dict, params=self.params)(self.ydl, self.params)
success = download_complete = False
timer = [None]
heartbeat_lock = threading.Lock()
heartbeat_url = heartbeat_info_dict['url']
heartbeat_data = heartbeat_info_dict['data']
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
self.to_screen('[%s] Heartbeat with %s second interval...' % (self.FD_NAME, heartbeat_interval))
def heartbeat():
try:
compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data.encode())
except Exception:
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
with heartbeat_lock:
if not download_complete:
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
timer[0].start()
try:
heartbeat()
success = fd.real_download(filename, info_dict)
finally:
if heartbeat_lock:
with heartbeat_lock:
timer[0].cancel()
download_complete = True
return success

View file

@ -1,25 +1,25 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import datetime import re
import functools
import json import json
import math import datetime
from .common import InfoExtractor from .common import InfoExtractor
from ..postprocessor.ffmpeg import FFmpegPostProcessor
from ..compat import ( from ..compat import (
compat_parse_qs, compat_parse_qs,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..utils import ( from ..utils import (
determine_ext,
dict_get, dict_get,
ExtractorError, ExtractorError,
float_or_none,
InAdvancePagedList,
int_or_none, int_or_none,
float_or_none,
OnDemandPagedList,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
PostProcessingError,
remove_start, remove_start,
try_get, try_get,
unified_timestamp, unified_timestamp,
@ -191,37 +191,87 @@ class NiconicoIE(InfoExtractor):
self._downloader.report_warning('unable to log in: bad username or password') self._downloader.report_warning('unable to log in: bad username or password')
return login_ok return login_ok
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality): def _get_heartbeat_info(self, info_dict):
def yesno(boolean):
return 'yes' if boolean else 'no'
session_api_data = api_data['video']['dmcInfo']['session_api'] video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
session_api_endpoint = session_api_data['urls'][0]
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality])) # Get video webpage for API data.
webpage, handle = self._download_webpage_handle(
'http://www.nicovideo.jp/watch/' + video_id, video_id)
api_data = self._parse_json(self._html_search_regex(
'data-api-data="([^"]+)"', webpage,
'API data', default='{}'), video_id)
session_api_data = try_get(api_data, lambda x: x['video']['dmcInfo']['session_api'])
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
# ping
self._download_json(
'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
query={'t': try_get(api_data, lambda x: x['video']['dmcInfo']['tracking_id'])},
headers={
'Origin': 'https://www.nicovideo.jp',
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
'X-Frontend-Id': '6',
'X-Frontend-Version': '0'
})
yesno = lambda x: 'yes' if x else 'no'
# m3u8 (encryption)
if 'encryption' in try_get(api_data, lambda x: x['video']['dmcInfo']) or {}:
protocol = 'm3u8'
session_api_http_parameters = {
'parameters': {
'hls_parameters': {
'encryption': {
'hls_encryption_v1': {
'encrypted_key': try_get(api_data, lambda x: x['video']['dmcInfo']['encryption']['hls_encryption_v1']['encrypted_key']),
'key_uri': try_get(api_data, lambda x: x['video']['dmcInfo']['encryption']['hls_encryption_v1']['key_uri'])
}
},
'transfer_preset': '',
'use_ssl': yesno(session_api_endpoint['is_ssl']),
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
'segment_duration': 6000
}
}
}
# http
else:
protocol = 'http'
session_api_http_parameters = {
'parameters': {
'http_output_download_parameters': {
'use_ssl': yesno(session_api_endpoint['is_ssl']),
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
}
}
}
session_response = self._download_json( session_response = self._download_json(
session_api_endpoint['url'], video_id, session_api_endpoint['url'], video_id,
query={'_format': 'json'}, query={'_format': 'json'},
headers={'Content-Type': 'application/json'}, headers={'Content-Type': 'application/json'},
note='Downloading JSON metadata for %s' % format_id, note='Downloading JSON metadata for %s' % info_dict['format_id'],
data=json.dumps({ data=json.dumps({
'session': { 'session': {
'client_info': { 'client_info': {
'player_id': session_api_data['player_id'], 'player_id': session_api_data.get('player_id'),
}, },
'content_auth': { 'content_auth': {
'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]], 'auth_type': try_get(session_api_data, lambda x: x['auth_types'][session_api_data['protocols'][0]]),
'content_key_timeout': session_api_data['content_key_timeout'], 'content_key_timeout': session_api_data.get('content_key_timeout'),
'service_id': 'nicovideo', 'service_id': 'nicovideo',
'service_user_id': session_api_data['service_user_id'] 'service_user_id': session_api_data.get('service_user_id')
}, },
'content_id': session_api_data['content_id'], 'content_id': session_api_data.get('content_id'),
'content_src_id_sets': [{ 'content_src_id_sets': [{
'content_src_ids': [{ 'content_src_ids': [{
'src_id_to_mux': { 'src_id_to_mux': {
'audio_src_ids': [audio_quality['id']], 'audio_src_ids': [audio_src_id],
'video_src_ids': [video_quality['id']], 'video_src_ids': [video_src_id],
} }
}] }]
}], }],
@ -229,52 +279,78 @@ class NiconicoIE(InfoExtractor):
'content_uri': '', 'content_uri': '',
'keep_method': { 'keep_method': {
'heartbeat': { 'heartbeat': {
'lifetime': session_api_data['heartbeat_lifetime'] 'lifetime': session_api_data.get('heartbeat_lifetime')
} }
}, },
'priority': session_api_data['priority'], 'priority': session_api_data.get('priority'),
'protocol': { 'protocol': {
'name': 'http', 'name': 'http',
'parameters': { 'parameters': {
'http_parameters': { 'http_parameters': session_api_http_parameters
'parameters': {
'http_output_download_parameters': {
'use_ssl': yesno(session_api_endpoint['is_ssl']),
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
}
}
}
} }
}, },
'recipe_id': session_api_data['recipe_id'], 'recipe_id': session_api_data.get('recipe_id'),
'session_operation_auth': { 'session_operation_auth': {
'session_operation_auth_by_signature': { 'session_operation_auth_by_signature': {
'signature': session_api_data['signature'], 'signature': session_api_data.get('signature'),
'token': session_api_data['token'], 'token': session_api_data.get('token'),
} }
}, },
'timing_constraint': 'unlimited' 'timing_constraint': 'unlimited'
} }
}).encode()) }).encode())
resolution = video_quality.get('resolution', {}) info_dict['url'] = session_response['data']['session']['content_uri']
info_dict['protocol'] = protocol
# get heartbeat info
heartbeat_info_dict = {
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
'data': json.dumps(session_response['data']),
# interval, convert milliseconds to seconds, then halve to make a buffer.
'interval': float_or_none(session_api_data.get('heartbeat_lifetime'), scale=2000),
}
return info_dict, heartbeat_info_dict
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
def parse_format_id(id_code):
mobj = re.match(r'''(?x)
(?:archive_)?
(?:(?P<codec>[^_]+)_)?
(?:(?P<br>[\d]+)kbps_)?
(?:(?P<res>[\d+]+)p_)?
''', '%s_' % id_code)
return mobj.groupdict() if mobj else {}
protocol = 'niconico_dmc'
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
vdict = parse_format_id(video_quality['id'])
adict = parse_format_id(audio_quality['id'])
resolution = video_quality.get('resolution', {'height': vdict.get('res')})
return { return {
'url': session_response['data']['session']['content_uri'], 'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
'format_id': format_id, 'format_id': format_id,
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
'abr': float_or_none(audio_quality.get('bitrate'), 1000), 'vcodec': vdict.get('codec'),
'vbr': float_or_none(video_quality.get('bitrate'), 1000), 'acodec': adict.get('codec'),
'height': resolution.get('height'), 'vbr': float_or_none(video_quality.get('bitrate'), 1000) or float_or_none(vdict.get('br')),
'width': resolution.get('width'), 'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
'height': int_or_none(resolution.get('height', vdict.get('res'))),
'width': int_or_none(resolution.get('width')),
'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
'protocol': protocol,
'http_headers': {
'Origin': 'https://www.nicovideo.jp',
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
}
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
# Get video webpage. We are not actually interested in it for normal # Get video webpage for API data.
# cases, but need the cookies in order to be able to download the
# info webpage
webpage, handle = self._download_webpage_handle( webpage, handle = self._download_webpage_handle(
'http://www.nicovideo.jp/watch/' + video_id, video_id) 'http://www.nicovideo.jp/watch/' + video_id, video_id)
if video_id.startswith('so'): if video_id.startswith('so'):
@ -284,80 +360,134 @@ class NiconicoIE(InfoExtractor):
'data-api-data="([^"]+)"', webpage, 'data-api-data="([^"]+)"', webpage,
'API data', default='{}'), video_id) 'API data', default='{}'), video_id)
def _format_id_from_url(video_url): def get_video_info_web(items):
return 'economy' if video_real_url.endswith('low') else 'normal' return dict_get(api_data['video'], items)
# Get video info
video_info_xml = self._download_xml(
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
video_id, note='Downloading video info page')
def get_video_info_xml(items):
if not isinstance(items, list):
items = [items]
for item in items:
ret = xpath_text(video_info_xml, './/' + item)
if ret:
return ret
if get_video_info_xml('error'):
error_code = get_video_info_xml('code')
if error_code == 'DELETED':
raise ExtractorError('The video has been deleted.',
expected=True)
elif error_code == 'NOT_FOUND':
raise ExtractorError('The video is not found.',
expected=True)
elif error_code == 'COMMUNITY':
self.to_screen('%s: The video is community members only.' % video_id)
else:
raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
# Start extracting video formats
formats = []
# Get HTML5 videos info
try:
dmc_info = api_data['video']['dmcInfo']
except KeyError:
raise ExtractorError('The video can\'t downloaded.',
expected=True)
quality_info = dmc_info.get('quality')
for audio_quality in quality_info.get('audios') or {}:
for video_quality in quality_info.get('videos') or {}:
if not audio_quality.get('available') or not video_quality.get('available'):
continue
formats.append(self._extract_format_for_quality(
api_data, video_id, audio_quality, video_quality))
# Get flv/swf info
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
is_economy = video_real_url.endswith('low')
if is_economy:
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
# Invoking ffprobe to determine resolution
pp = FFmpegPostProcessor(self._downloader)
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
try: try:
video_real_url = api_data['video']['smileInfo']['url'] metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
except KeyError: # Flash videos except PostProcessingError as err:
# Get flv info raise ExtractorError(err.msg, expected=True)
flv_info_webpage = self._download_webpage(
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
video_id, 'Downloading flv info')
flv_info = compat_parse_qs(flv_info_webpage) v_stream = a_stream = {}
if 'url' not in flv_info:
if 'deleted' in flv_info:
raise ExtractorError('The video has been deleted.',
expected=True)
elif 'closed' in flv_info:
raise ExtractorError('Niconico videos now require logging in',
expected=True)
elif 'error' in flv_info:
raise ExtractorError('%s reports error: %s' % (
self.IE_NAME, flv_info['error'][0]), expected=True)
else:
raise ExtractorError('Unable to find video URL')
video_info_xml = self._download_xml( # Some complex swf files doesn't have video stream (e.g. nm4809023)
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, for stream in metadata['streams']:
video_id, note='Downloading video info page') if stream['codec_type'] == 'video':
v_stream = stream
elif stream['codec_type'] == 'audio':
a_stream = stream
def get_video_info(items): # Community restricted videos seem to have issues with the thumb API not returning anything at all
if not isinstance(items, list): filesize = int(
items = [items] (get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
for item in items: or metadata['format']['size']
ret = xpath_text(video_info_xml, './/' + item) )
if ret: extension = (
return ret get_video_info_xml('movie_type')
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
)
video_real_url = flv_info['url'][0] # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
timestamp = (
parse_iso8601(get_video_info_web('first_retrieve'))
or unified_timestamp(get_video_info_web('postedDateTime'))
)
metadata_timestamp = (
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
or timestamp if extension != 'mp4' else 0
)
extension = get_video_info('movie_type') # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
if not extension: smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
extension = determine_ext(video_real_url)
formats = [{ is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
# If movie file size is unstable, old server movie is not source movie.
if filesize > 1:
formats.append({
'url': video_real_url, 'url': video_real_url,
'format_id': 'smile' if not is_economy else 'smile_low',
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
'ext': extension, 'ext': extension,
'format_id': _format_id_from_url(video_real_url), 'container': extension,
}] 'vcodec': v_stream.get('codec_name'),
else: 'acodec': a_stream.get('codec_name'),
formats = [] # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
'height': int_or_none(v_stream.get('height')),
'width': int_or_none(v_stream.get('width')),
'source_preference': 5 if not is_economy else -2,
'quality': 5 if is_source and not is_economy else None,
'filesize': filesize
})
dmc_info = api_data['video'].get('dmcInfo') if len(formats) == 0:
if dmc_info: # "New" HTML5 videos raise ExtractorError('Unable to find video info.')
quality_info = dmc_info['quality']
for audio_quality in quality_info['audios']:
for video_quality in quality_info['videos']:
if not audio_quality['available'] or not video_quality['available']:
continue
formats.append(self._extract_format_for_quality(
api_data, video_id, audio_quality, video_quality))
self._sort_formats(formats) self._sort_formats(formats)
else: # "Old" HTML5 videos
formats = [{
'url': video_real_url,
'ext': 'mp4',
'format_id': _format_id_from_url(video_real_url),
}]
def get_video_info(items):
return dict_get(api_data['video'], items)
# Start extracting information # Start extracting information
title = get_video_info('title') title = get_video_info_web('originalTitle')
if not title: if not title:
title = self._og_search_title(webpage, default=None) title = self._og_search_title(webpage, default=None)
if not title: if not title:
@ -372,14 +502,13 @@ class NiconicoIE(InfoExtractor):
video_detail = watch_api_data.get('videoDetail', {}) video_detail = watch_api_data.get('videoDetail', {})
thumbnail = ( thumbnail = (
get_video_info(['thumbnail_url', 'thumbnailURL']) self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
or get_video_info_web(['thumbnail_url', 'largeThumbnailURL', 'thumbnailURL'])
or self._html_search_meta('image', webpage, 'thumbnail', default=None) or self._html_search_meta('image', webpage, 'thumbnail', default=None)
or video_detail.get('thumbnail')) or video_detail.get('thumbnail'))
description = get_video_info('description') description = get_video_info_web('description')
timestamp = (parse_iso8601(get_video_info('first_retrieve'))
or unified_timestamp(get_video_info('postedDateTime')))
if not timestamp: if not timestamp:
match = self._html_search_meta('datePublished', webpage, 'date published', default=None) match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
if match: if match:
@ -389,7 +518,7 @@ class NiconicoIE(InfoExtractor):
video_detail['postedAt'].replace('/', '-'), video_detail['postedAt'].replace('/', '-'),
delimiter=' ', timezone=datetime.timedelta(hours=9)) delimiter=' ', timezone=datetime.timedelta(hours=9))
view_count = int_or_none(get_video_info(['view_counter', 'viewCount'])) view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
if not view_count: if not view_count:
match = self._html_search_regex( match = self._html_search_regex(
r'>Views: <strong[^>]*>([^<]+)</strong>', r'>Views: <strong[^>]*>([^<]+)</strong>',
@ -398,7 +527,7 @@ class NiconicoIE(InfoExtractor):
view_count = int_or_none(match.replace(',', '')) view_count = int_or_none(match.replace(',', ''))
view_count = view_count or video_detail.get('viewCount') view_count = view_count or video_detail.get('viewCount')
comment_count = (int_or_none(get_video_info('comment_num')) comment_count = (int_or_none(get_video_info_web('comment_num'))
or video_detail.get('commentCount') or video_detail.get('commentCount')
or try_get(api_data, lambda x: x['thread']['commentCount'])) or try_get(api_data, lambda x: x['thread']['commentCount']))
if not comment_count: if not comment_count:
@ -409,19 +538,19 @@ class NiconicoIE(InfoExtractor):
comment_count = int_or_none(match.replace(',', '')) comment_count = int_or_none(match.replace(',', ''))
duration = (parse_duration( duration = (parse_duration(
get_video_info('length') get_video_info_web('length')
or self._html_search_meta( or self._html_search_meta(
'video:duration', webpage, 'video duration', default=None)) 'video:duration', webpage, 'video duration', default=None))
or video_detail.get('length') or video_detail.get('length')
or get_video_info('duration')) or get_video_info_web('duration'))
webpage_url = get_video_info('watch_url') or url webpage_url = get_video_info_web('watch_url') or url
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null" # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
# in the JSON, which will cause None to be returned instead of {}. # in the JSON, which will cause None to be returned instead of {}.
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {} owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id') uploader_id = get_video_info_web(['ch_id', 'user_id']) or owner.get('id')
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname') uploader = get_video_info_web(['ch_name', 'user_nickname']) or owner.get('nickname')
return { return {
'id': video_id, 'id': video_id,
@ -456,60 +585,45 @@ class NiconicoPlaylistIE(InfoExtractor):
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728', 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
'only_matching': True, 'only_matching': True,
}] }]
_PAGE_SIZE = 100
def _call_api(self, list_id, resource, query):
return self._download_json(
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
'Downloading %s JSON metatdata' % resource, query=query,
headers={'X-Frontend-Id': 6})['data']['mylist']
def _parse_owner(self, item):
owner = item.get('owner') or {}
if owner:
return {
'uploader': owner.get('name'),
'uploader_id': owner.get('id'),
}
return {}
def _fetch_page(self, list_id, page):
page += 1
items = self._call_api(list_id, 'page %d' % page, {
'page': page,
'pageSize': self._PAGE_SIZE,
})['items']
for item in items:
video = item.get('video') or {}
video_id = video.get('id')
if not video_id:
continue
count = video.get('count') or {}
get_count = lambda x: int_or_none(count.get(x))
info = {
'_type': 'url',
'id': video_id,
'title': video.get('title'),
'url': 'https://www.nicovideo.jp/watch/' + video_id,
'description': video.get('shortDescription'),
'duration': int_or_none(video.get('duration')),
'view_count': get_count('view'),
'comment_count': get_count('comment'),
'ie_key': NiconicoIE.ie_key(),
}
info.update(self._parse_owner(video))
yield info
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
mylist = self._call_api(list_id, 'list', { webpage = self._download_webpage(url, list_id)
'pageSize': 1,
}) header = self._parse_json(self._html_search_regex(
entries = InAdvancePagedList( r'data-common-header="([^"]+)"', webpage,
functools.partial(self._fetch_page, list_id), 'webpage header'), list_id)
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE), frontendId = header.get('initConfig').get('frontendId')
self._PAGE_SIZE) frontendVersion = header.get('initConfig').get('frontendVersion')
result = self.playlist_result(
entries, list_id, mylist.get('name'), mylist.get('description')) def get_page_data(pagenum, pagesize):
result.update(self._parse_owner(mylist)) return self._download_json(
return result 'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
query={'page': 1 + pagenum, 'pageSize': pagesize},
headers={
'X-Frontend-Id': frontendId,
'X-Frontend-Version': frontendVersion,
}).get('data').get('mylist')
data = get_page_data(0, 1)
title = data.get('name')
description = data.get('description')
uploader = data.get('owner').get('name')
uploader_id = data.get('owner').get('id')
def pagefunc(pagenum):
data = get_page_data(pagenum, 25)
return ({
'_type': 'url',
'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
} for item in data.get('items'))
return {
'_type': 'playlist',
'id': list_id,
'title': title,
'description': description,
'uploader': uploader,
'uploader_id': uploader_id,
'entries': OnDemandPagedList(pagefunc, 25),
}