[ie/theplatform] Extract more metadata (#8635)
Authored by: trainman261
This commit is contained in:
parent
e370f9ec36
commit
7e09c147fd
7 changed files with 73 additions and 11 deletions
|
@ -121,11 +121,21 @@ class AENetworksIE(AENetworksBaseIE):
|
|||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Winter is Coming',
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
'title': 'Winter Is Coming',
|
||||
'description': 'md5:a40e370925074260b1c8a633c632c63a',
|
||||
'timestamp': 1338306241,
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 2592.0,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:5',
|
||||
'tags': 'count:14',
|
||||
'categories': ['Mountain Men'],
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Mountain Men',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
|
@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE):
|
|||
'timestamp': 1452634428,
|
||||
'upload_date': '20160112',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 1277.695,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:4',
|
||||
'tags': 'count:23',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 9',
|
||||
'season_number': 9,
|
||||
'series': 'Duck Dynasty',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
|
|
|
@ -180,6 +180,13 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||
'tags': 'count:8',
|
||||
'location': 'Quebec',
|
||||
'series': 'All in a Weekend Montreal',
|
||||
'season': 'Season 2015',
|
||||
'season_number': 2015,
|
||||
'media_type': 'Excerpt',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
|
@ -195,25 +202,37 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
'series': 'CBC News: Windsor at 6:00',
|
||||
'categories': ['News/Canada/Windsor'],
|
||||
'location': 'Windsor',
|
||||
'tags': ['cancer'],
|
||||
'creator': 'Allison Johnson',
|
||||
'media_type': 'Excerpt',
|
||||
},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'http://www.cbc.ca/player/play/2249992771553',
|
||||
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
|
||||
'url': 'http://www.cbc.ca/player/play/2284799043667',
|
||||
'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
|
||||
'info_dict': {
|
||||
'id': '2249992771553',
|
||||
'id': '2284799043667',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
|
||||
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
|
||||
'timestamp': 1690596000,
|
||||
'duration': 2716.333,
|
||||
'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
|
||||
'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
|
||||
'timestamp': 1700272800,
|
||||
'duration': 2718.833,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20230729',
|
||||
'upload_date': '20231118',
|
||||
'categories': 'count:4',
|
||||
'series': 'The National - Full Show',
|
||||
'tags': 'count:1',
|
||||
'creator': 'News',
|
||||
'location': 'Canada',
|
||||
'media_type': 'Full Program',
|
||||
},
|
||||
}]
|
||||
|
||||
|
|
|
@ -46,6 +46,10 @@ class CWTVIE(InfoExtractor):
|
|||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:4',
|
||||
'episode': 'Episode 20',
|
||||
'season': 'Season 11',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
|
|
|
@ -73,6 +73,7 @@ class MediasetIE(ThePlatformBaseIE):
|
|||
'season_number': 5,
|
||||
'episode_number': 5,
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 3409.08}, {'start_time': 3409.08, 'end_time': 6565.008}],
|
||||
'categories': ['Informazione'],
|
||||
},
|
||||
}, {
|
||||
# DRM
|
||||
|
@ -149,6 +150,7 @@ class MediasetIE(ThePlatformBaseIE):
|
|||
'season_number': 12,
|
||||
'episode': 'Episode 8',
|
||||
'episode_number': 8,
|
||||
'categories': ['Intrattenimento'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
|
|
@ -53,6 +53,8 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
|||
'chapters': 'count:1',
|
||||
'tags': 'count:4',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
|
||||
'media_type': 'Full Episode',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
|
@ -131,6 +133,8 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
|||
'tags': 'count:10',
|
||||
'age_limit': 0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': ['Series/Quantum Leap 2022'],
|
||||
'media_type': 'Highlight',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
|
|
|
@ -114,6 +114,8 @@ class ScrippsNetworksIE(InfoExtractor):
|
|||
'timestamp': 1475678834,
|
||||
'upload_date': '20161005',
|
||||
'uploader': 'SCNI-SCND',
|
||||
'tags': 'count:10',
|
||||
'creator': 'Cooking Channel',
|
||||
'duration': 29.995,
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': '<Untitled Chapter 1>'}],
|
||||
'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg',
|
||||
|
|
|
@ -104,6 +104,10 @@ class ThePlatformBaseIE(OnceIE):
|
|||
_add_chapter(chapter.get('startTime'), chapter.get('endTime'))
|
||||
_add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
|
||||
|
||||
def extract_site_specific_field(field):
|
||||
# A number of sites have custom-prefixed keys, e.g. 'cbc$seasonNumber'
|
||||
return traverse_obj(info, lambda k, v: v and k.endswith(f'${field}'), get_all=False)
|
||||
|
||||
return {
|
||||
'title': info['title'],
|
||||
'subtitles': subtitles,
|
||||
|
@ -113,6 +117,14 @@ class ThePlatformBaseIE(OnceIE):
|
|||
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
|
||||
'uploader': info.get('billingCode'),
|
||||
'chapters': chapters,
|
||||
'creator': traverse_obj(info, ('author', {str})) or None,
|
||||
'categories': traverse_obj(info, (
|
||||
'categories', lambda _, v: v.get('label') in ('category', None), 'name', {str})) or None,
|
||||
'tags': traverse_obj(info, ('keywords', {lambda x: re.split(r'[;,]\s?', x) if x else None})),
|
||||
'location': extract_site_specific_field('region'),
|
||||
'series': extract_site_specific_field('show'),
|
||||
'season_number': int_or_none(extract_site_specific_field('seasonNumber')),
|
||||
'media_type': extract_site_specific_field('programmingType') or extract_site_specific_field('type'),
|
||||
}
|
||||
|
||||
def _extract_theplatform_metadata(self, path, video_id):
|
||||
|
|
Loading…
Reference in a new issue