[extractor/youtube:tab] Extract metadata from channel items (#5569)
Authored by: coletdjnz
This commit is contained in:
parent
81388c0954
commit
c733555106
1 changed files with 77 additions and 6 deletions
|
@ -4382,6 +4382,25 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
elif key.startswith('grid') and key.endswith('Renderer'):
|
elif key.startswith('grid') and key.endswith('Renderer'):
|
||||||
return renderer
|
return renderer
|
||||||
|
|
||||||
|
def _extract_channel_renderer(self, renderer):
|
||||||
|
channel_id = renderer['channelId']
|
||||||
|
title = self._get_text(renderer, 'title')
|
||||||
|
channel_url = f'https://www.youtube.com/channel/{channel_id}'
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': channel_url,
|
||||||
|
'id': channel_id,
|
||||||
|
'ie_key': YoutubeTabIE.ie_key(),
|
||||||
|
'channel': title,
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'channel_url': channel_url,
|
||||||
|
'title': title,
|
||||||
|
'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
|
||||||
|
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
|
||||||
|
'playlist_count': self._get_count(renderer, 'videoCountText'),
|
||||||
|
'description': self._get_text(renderer, 'descriptionSnippet'),
|
||||||
|
}
|
||||||
|
|
||||||
def _grid_entries(self, grid_renderer):
|
def _grid_entries(self, grid_renderer):
|
||||||
for item in grid_renderer['items']:
|
for item in grid_renderer['items']:
|
||||||
if not isinstance(item, dict):
|
if not isinstance(item, dict):
|
||||||
|
@ -4407,9 +4426,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
# channel
|
# channel
|
||||||
channel_id = renderer.get('channelId')
|
channel_id = renderer.get('channelId')
|
||||||
if channel_id:
|
if channel_id:
|
||||||
yield self.url_result(
|
yield self._extract_channel_renderer(renderer)
|
||||||
'https://www.youtube.com/channel/%s' % channel_id,
|
|
||||||
ie=YoutubeTabIE.ie_key(), video_title=title)
|
|
||||||
continue
|
continue
|
||||||
# generic endpoint URL support
|
# generic endpoint URL support
|
||||||
ep_url = urljoin('https://www.youtube.com/', try_get(
|
ep_url = urljoin('https://www.youtube.com/', try_get(
|
||||||
|
@ -5762,7 +5779,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'uploader': 'cole-dlp-test-acc',
|
'uploader': 'cole-dlp-test-acc',
|
||||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
'channel': 'cole-dlp-test-acc',
|
'channel': 'cole-dlp-test-acc',
|
||||||
'channel_follower_count': int,
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1,
|
'playlist_mincount': 1,
|
||||||
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
|
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
|
||||||
|
@ -5930,7 +5946,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'title': 'cole-dlp-test-acc - Shorts',
|
'title': 'cole-dlp-test-acc - Shorts',
|
||||||
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
'channel': 'cole-dlp-test-acc',
|
'channel': 'cole-dlp-test-acc',
|
||||||
'channel_follower_count': int,
|
|
||||||
'description': 'test description',
|
'description': 'test description',
|
||||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||||
|
@ -5976,8 +5991,40 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'channel': str,
|
'channel': str,
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
'params': {'extract_flat': True},
|
'params': {'extract_flat': True, 'playlist_items': '1'},
|
||||||
'playlist_mincount': 1
|
'playlist_mincount': 1
|
||||||
|
}, {
|
||||||
|
# Channel renderer metadata. Contains number of videos on the channel
|
||||||
|
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
|
'title': 'cole-dlp-test-acc - Channels',
|
||||||
|
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
|
'channel': 'cole-dlp-test-acc',
|
||||||
|
'description': 'test description',
|
||||||
|
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||||
|
'tags': [],
|
||||||
|
'uploader': 'cole-dlp-test-acc',
|
||||||
|
'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||||
|
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'_type': 'url',
|
||||||
|
'ie_key': 'YoutubeTab',
|
||||||
|
'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||||
|
'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||||
|
'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||||
|
'title': 'PewDiePie',
|
||||||
|
'channel': 'PewDiePie',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
|
||||||
|
'thumbnails': list,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'playlist_count': int
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
'params': {'extract_flat': True},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -6531,6 +6578,30 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||||
# 'title': '#cats',
|
# 'title': '#cats',
|
||||||
# }],
|
# }],
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Channel results
|
||||||
|
'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kurzgesagt',
|
||||||
|
'title': 'kurzgesagt',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
|
||||||
|
'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
|
||||||
|
'ie_key': 'YoutubeTab',
|
||||||
|
'channel': 'Kurzgesagt – In a Nutshell',
|
||||||
|
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
|
||||||
|
'title': 'Kurzgesagt – In a Nutshell',
|
||||||
|
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
|
||||||
|
'playlist_count': int, # XXX: should have a way of saying > 1
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
|
||||||
|
'thumbnails': list
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
'params': {'extract_flat': True, 'playlist_items': '1'},
|
||||||
|
'playlist_mincount': 1,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
|
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
Loading…
Reference in a new issue