parent
9bd13fe5bb
commit
bfbecd1174
3 changed files with 57 additions and 2 deletions
|
@ -1083,6 +1083,7 @@ from .newgrounds import (
|
|||
NewgroundsPlaylistIE,
|
||||
NewgroundsUserIE,
|
||||
)
|
||||
from .newspicks import NewsPicksIE
|
||||
from .newstube import NewstubeIE
|
||||
from .newsy import NewsyIE
|
||||
from .nextmedia import (
|
||||
|
|
|
@ -3260,7 +3260,7 @@ class InfoExtractor:
|
|||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = strip_or_none(media_attributes.get('src'))
|
||||
src = strip_or_none(dict_get(media_attributes, ('src', 'data-video-src', 'data-src', 'data-source')))
|
||||
if src:
|
||||
f = parse_content_type(media_attributes.get('type'))
|
||||
_, formats = _media_formats(src, media_type, f)
|
||||
|
@ -3271,7 +3271,7 @@ class InfoExtractor:
|
|||
s_attr = extract_attributes(source_tag)
|
||||
# data-video-src and data-src are non standard but seen
|
||||
# several times in the wild
|
||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
|
||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src', 'data-source')))
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(s_attr.get('type'))
|
||||
|
|
54
yt_dlp/extractor/newspicks.py
Normal file
54
yt_dlp/extractor/newspicks.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NewsPicksIE(InfoExtractor):
|
||||
_VALID_URL = r'https://newspicks.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://newspicks.com/movie-series/11?movieId=1813',
|
||||
'info_dict': {
|
||||
'id': '1813',
|
||||
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||
'channel': 'HORIE ONE',
|
||||
'channel_id': '11',
|
||||
'release_date': '20220117',
|
||||
'thumbnail': r're:https://.+jpg',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = self._parse_html5_media_entries(
|
||||
url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
|
||||
if not entries:
|
||||
raise ExtractorError('No HTML5 media elements found')
|
||||
info = entries[0]
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
title = self._html_search_meta('og:title', webpage, fatal=False)
|
||||
description = self._html_search_meta(
|
||||
('og:description', 'twitter:title'), webpage, fatal=False)
|
||||
channel = self._html_search_regex(
|
||||
r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
|
||||
if not title or not channel:
|
||||
title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
|
||||
|
||||
release_date = self._search_regex(
|
||||
r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
|
||||
webpage, 'release date', fatal=False, group=(1, 2, 3))
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
|
||||
})
|
||||
return info
|
Loading…
Reference in a new issue