parent
1cf376f55a
commit
277d6ff5f2
6 changed files with 57 additions and 17 deletions
|
@ -245,7 +245,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||||
"OUTPUT TEMPLATE" for a list of available
|
"OUTPUT TEMPLATE" for a list of available
|
||||||
keys) to match if the key is present, !key
|
keys) to match if the key is present, !key
|
||||||
to check if the key is not present,
|
to check if the key is not present,
|
||||||
key>NUMBER (like "comment_count > 12", also
|
key>NUMBER (like "view_count > 12", also
|
||||||
works with >=, <, <=, !=, =) to compare
|
works with >=, <, <=, !=, =) to compare
|
||||||
against a number, key = 'LITERAL' (like
|
against a number, key = 'LITERAL' (like
|
||||||
"uploader = 'Mike Smith'", also works with
|
"uploader = 'Mike Smith'", also works with
|
||||||
|
@ -403,7 +403,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
|
||||||
--no-write-playlist-metafiles Do not write playlist metadata when using
|
--no-write-playlist-metafiles Do not write playlist metadata when using
|
||||||
--write-info-json, --write-description etc.
|
--write-info-json, --write-description etc.
|
||||||
--get-comments Retrieve video comments to be placed in the
|
--get-comments Retrieve video comments to be placed in the
|
||||||
.info.json file
|
.info.json file. The comments are fetched
|
||||||
|
even without this option if the extraction
|
||||||
|
is known to be quick
|
||||||
--load-info-json FILE JSON file containing the video information
|
--load-info-json FILE JSON file containing the video information
|
||||||
(created with the "--write-info-json"
|
(created with the "--write-info-json"
|
||||||
option)
|
option)
|
||||||
|
@ -814,7 +816,7 @@ The available fields are:
|
||||||
- `dislike_count` (numeric): Number of negative ratings of the video
|
- `dislike_count` (numeric): Number of negative ratings of the video
|
||||||
- `repost_count` (numeric): Number of reposts of the video
|
- `repost_count` (numeric): Number of reposts of the video
|
||||||
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
|
||||||
- `comment_count` (numeric): Number of comments on the video
|
- `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
|
||||||
- `age_limit` (numeric): Age restriction for the video (years)
|
- `age_limit` (numeric): Age restriction for the video (years)
|
||||||
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
|
||||||
- `was_live` (boolean): Whether this video was originally a live stream
|
- `was_live` (boolean): Whether this video was originally a live stream
|
||||||
|
|
|
@ -2041,6 +2041,7 @@ class YoutubeDL(object):
|
||||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||||
print_mandatory('format')
|
print_mandatory('format')
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
|
self.post_extract(info_dict)
|
||||||
self.to_stdout(json.dumps(info_dict))
|
self.to_stdout(json.dumps(info_dict))
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
|
@ -2064,6 +2065,7 @@ class YoutubeDL(object):
|
||||||
if self._match_entry(info_dict, incomplete=False) is not None:
|
if self._match_entry(info_dict, incomplete=False) is not None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
self.post_extract(info_dict)
|
||||||
self._num_downloads += 1
|
self._num_downloads += 1
|
||||||
|
|
||||||
info_dict = self.pre_process(info_dict)
|
info_dict = self.pre_process(info_dict)
|
||||||
|
@ -2497,6 +2499,7 @@ class YoutubeDL(object):
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
if self.params.get('dump_single_json', False):
|
if self.params.get('dump_single_json', False):
|
||||||
|
self.post_extract(res)
|
||||||
self.to_stdout(json.dumps(res))
|
self.to_stdout(json.dumps(res))
|
||||||
|
|
||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
@ -2545,6 +2548,24 @@ class YoutubeDL(object):
|
||||||
del files_to_move[old_filename]
|
del files_to_move[old_filename]
|
||||||
return files_to_move, infodict
|
return files_to_move, infodict
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def post_extract(info_dict):
|
||||||
|
def actual_post_extract(info_dict):
|
||||||
|
if info_dict.get('_type') in ('playlist', 'multi_video'):
|
||||||
|
for video_dict in info_dict.get('entries', {}):
|
||||||
|
actual_post_extract(video_dict)
|
||||||
|
return
|
||||||
|
|
||||||
|
if '__post_extractor' not in info_dict:
|
||||||
|
return
|
||||||
|
post_extractor = info_dict['__post_extractor']
|
||||||
|
if post_extractor:
|
||||||
|
info_dict.update(post_extractor().items())
|
||||||
|
del info_dict['__post_extractor']
|
||||||
|
return
|
||||||
|
|
||||||
|
actual_post_extract(info_dict)
|
||||||
|
|
||||||
def pre_process(self, ie_info):
|
def pre_process(self, ie_info):
|
||||||
info = dict(ie_info)
|
info = dict(ie_info)
|
||||||
for pp in self._pps['beforedl']:
|
for pp in self._pps['beforedl']:
|
||||||
|
|
|
@ -255,10 +255,6 @@ class BiliBiliIE(InfoExtractor):
|
||||||
info['uploader'] = self._html_search_meta(
|
info['uploader'] = self._html_search_meta(
|
||||||
'author', webpage, 'uploader', default=None)
|
'author', webpage, 'uploader', default=None)
|
||||||
|
|
||||||
comments = None
|
|
||||||
if self._downloader.params.get('getcomments', False):
|
|
||||||
comments = self._get_all_comment_pages(video_id)
|
|
||||||
|
|
||||||
raw_danmaku = self._get_raw_danmaku(video_id, cid)
|
raw_danmaku = self._get_raw_danmaku(video_id, cid)
|
||||||
|
|
||||||
raw_tags = self._get_tags(video_id)
|
raw_tags = self._get_tags(video_id)
|
||||||
|
@ -266,11 +262,18 @@ class BiliBiliIE(InfoExtractor):
|
||||||
|
|
||||||
top_level_info = {
|
top_level_info = {
|
||||||
'raw_danmaku': raw_danmaku,
|
'raw_danmaku': raw_danmaku,
|
||||||
'comments': comments,
|
|
||||||
'comment_count': len(comments) if comments is not None else None,
|
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'raw_tags': raw_tags,
|
'raw_tags': raw_tags,
|
||||||
}
|
}
|
||||||
|
if self._downloader.params.get('getcomments', False):
|
||||||
|
def get_comments():
|
||||||
|
comments = self._get_all_comment_pages(video_id)
|
||||||
|
return {
|
||||||
|
'comments': comments,
|
||||||
|
'comment_count': len(comments)
|
||||||
|
}
|
||||||
|
|
||||||
|
top_level_info['__post_extractor'] = get_comments
|
||||||
|
|
||||||
'''
|
'''
|
||||||
# Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
|
# Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
|
||||||
|
|
|
@ -294,6 +294,14 @@ class InfoExtractor(object):
|
||||||
players on other sites. Can be True (=always allowed),
|
players on other sites. Can be True (=always allowed),
|
||||||
False (=never allowed), None (=unknown), or a string
|
False (=never allowed), None (=unknown), or a string
|
||||||
specifying the criteria for embedability (Eg: 'whitelist').
|
specifying the criteria for embedability (Eg: 'whitelist').
|
||||||
|
__post_extractor: A function to be called just before the metadata is
|
||||||
|
written to either disk, logger or console. The function
|
||||||
|
must return a dict which will be added to the info_dict.
|
||||||
|
This is usefull for additional information that is
|
||||||
|
time-consuming to extract. Note that the fields thus
|
||||||
|
extracted will not be available to output template and
|
||||||
|
match_filter. So, only "comments" and "comment_count" are
|
||||||
|
currently allowed to be extracted via this method.
|
||||||
|
|
||||||
The following fields should only be used when the video belongs to some logical
|
The following fields should only be used when the video belongs to some logical
|
||||||
chapter or section:
|
chapter or section:
|
||||||
|
|
|
@ -2012,9 +2012,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
# Get comments
|
# Get comments
|
||||||
# TODO: Refactor and move to seperate function
|
# TODO: Refactor and move to seperate function
|
||||||
if get_comments:
|
def extract_comments():
|
||||||
expected_video_comment_count = 0
|
expected_video_comment_count = 0
|
||||||
video_comments = []
|
video_comments = []
|
||||||
|
comment_xsrf = xsrf_token
|
||||||
|
|
||||||
def find_value(html, key, num_chars=2, separator='"'):
|
def find_value(html, key, num_chars=2, separator='"'):
|
||||||
pos_begin = html.find(key) + len(key) + num_chars
|
pos_begin = html.find(key) + len(key) + num_chars
|
||||||
|
@ -2083,7 +2084,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
self.to_screen('Downloading comments')
|
self.to_screen('Downloading comments')
|
||||||
while continuations:
|
while continuations:
|
||||||
continuation = continuations.pop()
|
continuation = continuations.pop()
|
||||||
comment_response = get_continuation(continuation, xsrf_token)
|
comment_response = get_continuation(continuation, comment_xsrf)
|
||||||
if not comment_response:
|
if not comment_response:
|
||||||
continue
|
continue
|
||||||
if list(search_dict(comment_response, 'externalErrorMessage')):
|
if list(search_dict(comment_response, 'externalErrorMessage')):
|
||||||
|
@ -2094,7 +2095,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
continue
|
continue
|
||||||
# not sure if this actually helps
|
# not sure if this actually helps
|
||||||
if 'xsrf_token' in comment_response:
|
if 'xsrf_token' in comment_response:
|
||||||
xsrf_token = comment_response['xsrf_token']
|
comment_xsrf = comment_response['xsrf_token']
|
||||||
|
|
||||||
item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
|
item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
|
||||||
if first_continuation:
|
if first_continuation:
|
||||||
|
@ -2123,7 +2124,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
while reply_continuations:
|
while reply_continuations:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continuation = reply_continuations.pop()
|
continuation = reply_continuations.pop()
|
||||||
replies_data = get_continuation(continuation, xsrf_token, True)
|
replies_data = get_continuation(continuation, comment_xsrf, True)
|
||||||
if not replies_data or 'continuationContents' not in replies_data[1]['response']:
|
if not replies_data or 'continuationContents' not in replies_data[1]['response']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -2152,10 +2153,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
|
self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
|
||||||
info.update({
|
return {
|
||||||
'comments': video_comments,
|
'comments': video_comments,
|
||||||
'comment_count': expected_video_comment_count
|
'comment_count': expected_video_comment_count
|
||||||
})
|
}
|
||||||
|
|
||||||
|
if get_comments:
|
||||||
|
info['__post_extractor'] = extract_comments
|
||||||
|
|
||||||
self.mark_watched(video_id, player_response)
|
self.mark_watched(video_id, player_response)
|
||||||
|
|
||||||
|
|
|
@ -347,7 +347,7 @@ def parseOpts(overrideArguments=None):
|
||||||
'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to '
|
'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to '
|
||||||
'match if the key is present, '
|
'match if the key is present, '
|
||||||
'!key to check if the key is not present, '
|
'!key to check if the key is not present, '
|
||||||
'key>NUMBER (like "comment_count > 12", also works with '
|
'key>NUMBER (like "view_count > 12", also works with '
|
||||||
'>=, <, <=, !=, =) to compare against a number, '
|
'>=, <, <=, !=, =) to compare against a number, '
|
||||||
'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
|
'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
|
||||||
'to match against a string literal '
|
'to match against a string literal '
|
||||||
|
@ -985,7 +985,9 @@ def parseOpts(overrideArguments=None):
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--get-comments',
|
'--get-comments',
|
||||||
action='store_true', dest='getcomments', default=False,
|
action='store_true', dest='getcomments', default=False,
|
||||||
help='Retrieve video comments to be placed in the .info.json file')
|
help=(
|
||||||
|
'Retrieve video comments to be placed in the .info.json file. '
|
||||||
|
'The comments are fetched even without this option if the extraction is known to be quick'))
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--load-info-json', '--load-info',
|
'--load-info-json', '--load-info',
|
||||||
dest='load_info_filename', metavar='FILE',
|
dest='load_info_filename', metavar='FILE',
|
||||||
|
|
Loading…
Reference in a new issue