[blogger] Add extractor (#1629)

Authored by: pabs3
This commit is contained in:
Paul Wise 2021-11-19 06:15:41 +08:00 committed by GitHub
parent cfcaf64a4b
commit 764f5de2f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 72 additions and 0 deletions

View file

@ -0,0 +1,54 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from ..utils import (
mimetype2ext,
parse_duration,
parse_qs,
str_or_none,
traverse_obj,
)
from .common import InfoExtractor
class BloggerIE(InfoExtractor):
IE_NAME = 'blogger.com'
_VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)'
_VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']'''
_TESTS = [{
'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw',
'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
'info_dict': {
'id': 'BLOGGER-video-3c740e3a49197e16-796',
'title': 'BLOGGER-video-3c740e3a49197e16-796',
'ext': 'mp4',
'thumbnail': r're:^https?://.*',
'duration': 76.068,
}
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(BloggerIE._VALID_EMBED, webpage)
def _real_extract(self, url):
token_id = self._match_id(url)
webpage = self._download_webpage(url, token_id)
data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data')
data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id)
streams = data['streams']
formats = [{
'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))),
'url': stream['play_url'],
'format_id': str_or_none(stream.get('format_id')),
} for stream in streams]
return {
'id': data.get('iframe_id', token_id),
'title': data.get('iframe_id', token_id),
'formats': formats,
'thumbnail': data.get('thumbnail'),
'duration': parse_duration(traverse_obj(parse_qs(streams[0]['play_url']), ('dur', 0))),
}

View file

@ -166,6 +166,7 @@ from .bleacherreport import (
BleacherReportIE, BleacherReportIE,
BleacherReportCMSIE, BleacherReportCMSIE,
) )
from .blogger import BloggerIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE

View file

@ -136,6 +136,7 @@ from .medialaan import MedialaanIE
from .simplecast import SimplecastIE from .simplecast import SimplecastIE
from .wimtv import WimTVIE from .wimtv import WimTVIE
from .tvp import TVPEmbedIE from .tvp import TVPEmbedIE
from .blogger import BloggerIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2173,6 +2174,17 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# blogger embed
'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
'info_dict': {
'id': 'BLOGGER-video-3c740e3a49197e16-796',
'ext': 'mp4',
'title': 'Blogger',
'thumbnail': r're:^https?://.*',
},
},
# { # {
# # TODO: find another test # # TODO: find another test
# # http://schema.org/VideoObject # # http://schema.org/VideoObject
@ -3216,6 +3228,11 @@ class GenericIE(InfoExtractor):
if onionstudios_url: if onionstudios_url:
return self.url_result(onionstudios_url) return self.url_result(onionstudios_url)
# Look for Blogger embeds
blogger_urls = BloggerIE._extract_urls(webpage)
if blogger_urls:
return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key())
# Look for ViewLift embeds # Look for ViewLift embeds
viewlift_url = ViewLiftEmbedIE._extract_url(webpage) viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
if viewlift_url: if viewlift_url: