[heise] Fix description, thumbnail and format ID
This commit is contained in:
parent
a32f253112
commit
711ede6e1b
2 changed files with 21 additions and 14 deletions
|
@ -404,7 +404,7 @@ class InfoExtractor(object):
|
||||||
video_info['title'] = playlist_title
|
video_info['title'] = playlist_title
|
||||||
return video_info
|
return video_info
|
||||||
|
|
||||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
|
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
Perform a regex search on the given string, using a single or a list of
|
Perform a regex search on the given string, using a single or a list of
|
||||||
patterns returning the first matching group.
|
patterns returning the first matching group.
|
||||||
|
@ -425,8 +425,11 @@ class InfoExtractor(object):
|
||||||
_name = name
|
_name = name
|
||||||
|
|
||||||
if mobj:
|
if mobj:
|
||||||
|
if group is None:
|
||||||
# return the first matching group
|
# return the first matching group
|
||||||
return next(g for g in mobj.groups() if g is not None)
|
return next(g for g in mobj.groups() if g is not None)
|
||||||
|
else:
|
||||||
|
return mobj.group(group)
|
||||||
elif default is not _NO_DEFAULT:
|
elif default is not _NO_DEFAULT:
|
||||||
return default
|
return default
|
||||||
elif fatal:
|
elif fatal:
|
||||||
|
@ -436,11 +439,11 @@ class InfoExtractor(object):
|
||||||
'please report this issue on http://yt-dl.org/bug' % _name)
|
'please report this issue on http://yt-dl.org/bug' % _name)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
|
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||||
"""
|
"""
|
||||||
res = self._search_regex(pattern, string, name, default, fatal, flags)
|
res = self._search_regex(pattern, string, name, default, fatal, flags, group)
|
||||||
if res:
|
if res:
|
||||||
return clean_html(res).strip()
|
return clean_html(res).strip()
|
||||||
else:
|
else:
|
||||||
|
@ -534,9 +537,9 @@ class InfoExtractor(object):
|
||||||
display_name = name
|
display_name = name
|
||||||
return self._html_search_regex(
|
return self._html_search_regex(
|
||||||
r'''(?ix)<meta
|
r'''(?ix)<meta
|
||||||
(?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
|
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
|
||||||
html, display_name, fatal=fatal, **kwargs)
|
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||||
|
|
||||||
def _dc_search_uploader(self, html):
|
def _dc_search_uploader(self, html):
|
||||||
return self._html_search_meta('dc.creator', html, 'uploader')
|
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||||
|
|
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
get_meta_content,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
@ -25,11 +25,11 @@ class HeiseIE(InfoExtractor):
|
||||||
'title': (
|
'title': (
|
||||||
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
||||||
),
|
),
|
||||||
'format_id': 'mp4_720',
|
'format_id': 'mp4_720p',
|
||||||
'timestamp': 1411812600,
|
'timestamp': 1411812600,
|
||||||
'upload_date': '20140927',
|
'upload_date': '20140927',
|
||||||
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
||||||
'thumbnail': 're:https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,11 +49,12 @@ class HeiseIE(InfoExtractor):
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'timestamp': parse_iso8601(get_meta_content('date', webpage)),
|
'timestamp': parse_iso8601(
|
||||||
|
self._html_search_meta('date', webpage)),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
}
|
}
|
||||||
|
|
||||||
title = get_meta_content('fulltitle', webpage)
|
title = self._html_search_meta('fulltitle', webpage)
|
||||||
if title:
|
if title:
|
||||||
info['title'] = title
|
info['title'] = title
|
||||||
else:
|
else:
|
||||||
|
@ -64,9 +65,12 @@ class HeiseIE(InfoExtractor):
|
||||||
label = source_node.attrib['label']
|
label = source_node.attrib['label']
|
||||||
height = int_or_none(self._search_regex(
|
height = int_or_none(self._search_regex(
|
||||||
r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
|
r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
|
||||||
|
video_url = source_node.attrib['file']
|
||||||
|
ext = determine_ext(video_url, '')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': source_node.attrib['file'],
|
'url': video_url,
|
||||||
'format_note': label,
|
'format_note': label,
|
||||||
|
'format_id': '%s_%s' % (ext, label),
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
Loading…
Reference in a new issue