[niconico] Add playlist extractors and refactor (#2915)

Authored by: Lesmiscore
2022-03-03 02:08:13 +09:00 · 2022-03-03 02:08:13 +09:00 · f494ddada8
commit f494ddada8
parent 02fc6feb6e
2 changed files with 153 additions and 33 deletions
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -1011,11 +1011,12 @@ from .nick import (
    NickNightIE,
    NickRuIE,
 )
 from .niconico import (
    NiconicoIE,
    NiconicoPlaylistIE,
    NiconicoUserIE,
    NiconicoSeriesIE,
    NiconicoHistoryIE,
    NicovideoSearchDateIE,
    NicovideoSearchIE,
    NicovideoSearchURLIE,
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import datetime
 import itertools
 import functools
 import json
 import re
@ -12,6 +13,7 @@ from ..compat import (
    compat_str,
    compat_parse_qs,
    compat_urllib_parse_urlparse,
    compat_HTTPError,
 )
 from ..utils import (
    ExtractorError,
@ -24,7 +26,9 @@ from ..utils import (
    PostProcessingError,
    remove_start,
    str_or_none,
    traverse_obj,
    try_get,
    unescapeHTML,
    unified_timestamp,
    urlencode_postdata,
    xpath_text,
@ -606,8 +610,61 @@ class NiconicoIE(InfoExtractor):
        }
-class NiconicoPlaylistIE(InfoExtractor):
+class NiconicoPlaylistBaseIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
+    _PAGE_SIZE = 100
    _API_HEADERS = {
        'X-Frontend-ID': '6',
        'X-Frontend-Version': '0',
        'X-Niconico-Language': 'en-us'
    }
    def _call_api(self, list_id, resource, query):
        "Implement this in child class"
        pass
    @staticmethod
    def _parse_owner(item):
        return {
            'uploader': traverse_obj(item, ('owner', 'name')),
            'uploader_id': traverse_obj(item, ('owner', 'id')),
        }
    def _fetch_page(self, list_id, page):
        page += 1
        resp = self._call_api(list_id, 'page %d' % page, {
            'page': page,
            'pageSize': self._PAGE_SIZE,
        })
        # this is needed to support both mylist and user
        for video in traverse_obj(resp, ('items', ..., ('video', None))) or []:
            video_id = video.get('id')
            if not video_id:
                # skip {"video": {"id": "blablabla", ...}}
                continue
            count = video.get('count') or {}
            get_count = lambda x: int_or_none(count.get(x))
            yield {
                '_type': 'url',
                'id': video_id,
                'title': video.get('title'),
                'url': f'https://www.nicovideo.jp/watch/{video_id}',
                'description': video.get('shortDescription'),
                'duration': int_or_none(video.get('duration')),
                'view_count': get_count('view'),
                'comment_count': get_count('comment'),
                'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))),
                'ie_key': NiconicoIE.ie_key(),
                **self._parse_owner(video),
            }
    def _entries(self, list_id):
        return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE)
 class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
    IE_NAME = 'niconico:playlist'
    _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.nicovideo.jp/mylist/27411728',
@ -618,48 +675,110 @@ class NiconicoPlaylistIE(InfoExtractor):
            'uploader': 'のっく',
            'uploader_id': '805442',
        },
-        'playlist_mincount': 225,
+        'playlist_mincount': 291,
    }, {
        'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
        'only_matching': True,
    }, {
        'url': 'https://www.nicovideo.jp/my/mylist/#/68048635',
        'only_matching': True,
    }]
-    _API_HEADERS = {
+    def _call_api(self, list_id, resource, query):
-        'X-Frontend-ID': '6',
+        return self._download_json(
-        'X-Frontend-Version': '0'
+            f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id,
-    }
+            f'Downloading {resource}', query=query,
            headers=self._API_HEADERS)['data']['mylist']
    def _real_extract(self, url):
        list_id = self._match_id(url)
        mylist = self._call_api(list_id, 'list', {
            'pageSize': 1,
        })
        return self.playlist_result(
            self._entries(list_id), list_id,
            mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))
        def get_page_data(pagenum, pagesize):
            return self._download_json(
                'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
                query={'page': 1 + pagenum, 'pageSize': pagesize},
                headers=self._API_HEADERS).get('data').get('mylist')
-        data = get_page_data(0, 1)
+class NiconicoSeriesIE(InfoExtractor):
-        title = data.get('name')
+    IE_NAME = 'niconico:series'
-        description = data.get('description')
+    _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
        uploader = data.get('owner').get('name')
        uploader_id = data.get('owner').get('id')
-        def pagefunc(pagenum):
+    _TESTS = [{
-            data = get_page_data(pagenum, 25)
+        'url': 'https://www.nicovideo.jp/series/110226',
-            return ({
+        'info_dict': {
-                '_type': 'url',
+            'id': '110226',
-                'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
+            'title': 'ご立派ァ！のシリーズ',
-            } for item in data.get('items'))
+        },
        'playlist_mincount': 10,  # as of 2021/03/17
    }, {
        'url': 'https://www.nicovideo.jp/series/12312/',
        'info_dict': {
            'id': '12312',
            'title': 'バトルスピリッツ　お勧めカード紹介(調整中)',
        },
        'playlist_mincount': 97,  # as of 2021/03/17
    }, {
        'url': 'https://nico.ms/series/203559',
        'only_matching': True,
    }]
-        return {
+    def _real_extract(self, url):
-            '_type': 'playlist',
+        list_id = self._match_id(url)
-            'id': list_id,
+        webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
-            'title': title,
+
-            'description': description,
+        title = self._search_regex(
-            'uploader': uploader,
+            (r'<title>「(.+)（全',
-            'uploader_id': uploader_id,
+             r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
-            'entries': OnDemandPagedList(pagefunc, 25),
+            webpage, 'title', fatal=False)
-        }
+        if title:
            title = unescapeHTML(title)
        playlist = [
            self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
            for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
        return self.playlist_result(playlist, list_id, title)
 class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
    IE_NAME = 'niconico:history'
    IE_DESC = 'NicoNico user history. Requires cookies.'
    _VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/history'
    _TESTS = [{
        'note': 'PC page, with /video',
        'url': 'https://www.nicovideo.jp/my/history/video',
        'only_matching': True,
    }, {
        'note': 'PC page, without /video',
        'url': 'https://www.nicovideo.jp/my/history',
        'only_matching': True,
    }, {
        'note': 'mobile page, with /video',
        'url': 'https://sp.nicovideo.jp/my/history/video',
        'only_matching': True,
    }, {
        'note': 'mobile page, without /video',
        'url': 'https://sp.nicovideo.jp/my/history',
        'only_matching': True,
    }]
    def _call_api(self, list_id, resource, query):
        return self._download_json(
            'https://nvapi.nicovideo.jp/v1/users/me/watch/history', 'history',
            f'Downloading {resource}', query=query,
            headers=self._API_HEADERS)['data']
    def _real_extract(self, url):
        list_id = 'history'
        try:
            mylist = self._call_api(list_id, 'list', {
                'pageSize': 1,
            })
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
                self.raise_login_required('You have to be logged in to get your watch history')
            raise
        return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))
 class NicovideoSearchBaseIE(InfoExtractor):