[youtube] Fix comment extraction (Closes #53)

:ci skip dl
This commit is contained in:
pukkandan 2021-02-05 16:28:31 +05:30
parent 0fd1a2b0bf
commit 885d36d4e4

View file

@ -1999,8 +1999,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('Unexpected HTTP error code: %s' % response_code) raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
first_continuation = True first_continuation = True
chain_msg = ''
self.to_screen('Downloading comments')
while continuations: while continuations:
continuation, itct = continuations.pop() continuation = continuations.pop()
comment_response = get_continuation(continuation, xsrf_token) comment_response = get_continuation(continuation, xsrf_token)
if not comment_response: if not comment_response:
continue continue
@ -2046,9 +2048,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue continue
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):
self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count)) chain_msg = ' (chain %s)' % comment['commentId']
self.to_screen('Comments downloaded: %d of ~%d%s' % (len(video_comments), expected_video_comment_count, chain_msg))
reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation'] reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
for reply_meta in replies_data[1]['response']['continuationContents']['commentRepliesContinuation']['contents']: for reply_meta in reply_comment_meta.get('contents', {}):
reply_comment = reply_meta['commentRenderer'] reply_comment = reply_meta['commentRenderer']
video_comments.append({ video_comments.append({
'id': reply_comment['commentId'], 'id': reply_comment['commentId'],
@ -2063,12 +2066,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue continue
reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']] reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count)) self.to_screen('Comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
if 'continuations' in item_section: if 'continuations' in item_section:
continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']] continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
time.sleep(1) time.sleep(1)
self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count)) self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
info.update({ info.update({
'comments': video_comments, 'comments': video_comments,
'comment_count': expected_video_comment_count 'comment_count': expected_video_comment_count