[Mediaite] Add Extractor (#973)

Closes #969 
Authored by: Ashish0804
This commit is contained in:
Ashish Gupta 2021-09-16 23:42:45 +05:30 committed by GitHub
parent 23dd2d9a32
commit 2fac2e9136
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 86 additions and 6 deletions

View file

@ -729,6 +729,7 @@ from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .medaltv import MedalTVIE
from .mediaite import MediaiteIE
from .mediaklikk import MediaKlikkIE
from .mediaset import MediasetIE
from .mediasite import (

View file

@ -1215,14 +1215,13 @@ class GenericIE(InfoExtractor):
},
{
# JWPlatform iframe
'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
'info_dict': {
'id': 'O0c5JcKT',
'id': 'AG26UQXM',
'ext': 'mp4',
'upload_date': '20171122',
'timestamp': 1511366290,
'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
'upload_date': '20160719',
'timestamp': 468923808,
'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
},
'add_ie': [JWPlatformIE.ie_key()],
},

View file

@ -0,0 +1,80 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class MediaiteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mediaite.com/(?:tv|sports|politics|podcasts|opinion)/[\w-]+/'
_TESTS = [{
'url': 'https://www.mediaite.com/sports/bill-burr-roasts-nfl-for-promoting-black-lives-matter-while-scheduling-more-games-after-all-the-sht-they-know-about-cte/',
'info_dict': {
'id': 'vPHKITzy',
'ext': 'm4a',
'title': 'Bill Burr On NFL And Black Lives Matter',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/vPHKITzy/poster.jpg?width=720',
'duration': 55,
'timestamp': 1631630185,
'upload_date': '20210914',
},
'params': {'skip_download': True}
}, {
'url': 'https://www.mediaite.com/tv/joe-scarborough-goes-off-on-tax-breaks-for-super-wealthy-largest-income-redistribution-scam-in-american-history/',
'info_dict': {
'id': 'eeFcK4Xm',
'ext': 'mp4',
'title': 'Morning Joe-6_16_52 am - 6_21_10 am-2021-09-14.mp4',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/eeFcK4Xm/poster.jpg?width=720',
'duration': 258,
'timestamp': 1631618057,
'upload_date': '20210914',
},
'params': {'skip_download': True}
}, {
'url': 'https://www.mediaite.com/politics/watch-rudy-giuliani-impersonates-queen-elizabeth-calls-mark-milley-an-asshle-in-bizarre-9-11-speech/',
'info_dict': {
'id': 'EiyiXKcr',
'ext': 'mp4',
'title': 'Giuliani 1',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/EiyiXKcr/poster.jpg?width=720',
'duration': 39,
'timestamp': 1631536476,
'upload_date': '20210913',
},
'params': {'skip_download': True}
}, {
'url': 'https://www.mediaite.com/podcasts/clarissa-ward-says-she-decided-to-become-a-journalist-on-9-11/',
'info_dict': {
'id': 'TxavoRTx',
'ext': 'mp4',
'title': 'clarissa-ward-3.mp4',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/TxavoRTx/poster.jpg?width=720',
'duration': 83,
'timestamp': 1631311188,
'upload_date': '20210910',
},
'params': {'skip_download': True}
}, {
'url': 'https://www.mediaite.com/opinion/mainstream-media-ignores-rose-mcgowans-bombshell-allegation-that-newsoms-wife-tried-to-silence-her-on-weinstein/',
'info_dict': {
'id': 'sEIWvKR7',
'ext': 'mp4',
'title': 'KTTV_09-13-2021_05.34.21',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/sEIWvKR7/poster.jpg?width=720',
'duration': 52,
'timestamp': 1631553328,
'upload_date': '20210913',
},
'params': {'skip_download': True}
}]
def _real_extract(self, url):
webpage = self._download_webpage(url, None)
id = self._search_regex(r'data-video-id\s?=\s?\"([^\"]+)\"', webpage, 'id')
data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{id}', id)
return self._parse_jwplayer_data(data_json)