[mtv] Fixup incorrectly encoded XML documents
This commit is contained in:
parent
a30a60d8eb
commit
e2b38da931
2 changed files with 11 additions and 3 deletions
|
@ -230,9 +230,12 @@ class InfoExtractor(object):
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(self, url_or_request, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Unable to download XML'):
|
note=u'Downloading XML', errnote=u'Unable to download XML',
|
||||||
|
transform_source=None):
|
||||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
if transform_source:
|
||||||
|
xml_string = transform_source(xml_string)
|
||||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||||
|
|
||||||
def to_screen(self, msg):
|
def to_screen(self, msg):
|
||||||
|
|
|
@ -82,8 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
def _get_videos_info(self, uri):
|
def _get_videos_info(self, uri):
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||||
idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
|
|
||||||
u'Downloading info')
|
def fix_ampersand(s):
|
||||||
|
""" Fix unencoded ampersand in XML """
|
||||||
|
return s.replace(u'& ', '& ')
|
||||||
|
idoc = self._download_xml(
|
||||||
|
self._FEED_URL + '?' + data, video_id,
|
||||||
|
u'Downloading info', transform_source=fix_ampersand)
|
||||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue