[theplatform] Correctly extract videos that don't use f4m or rtmp (reported in #3176)

2014-09-21 16:08:38 +02:00 · 2014-09-21 16:08:38 +02:00 · e35cb78c40
commit e35cb78c40
parent 224ce0d872
2 changed files with 31 additions and 19 deletions
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@ -21,7 +21,7 @@ class SBSIE(InfoExtractor):
        'md5': '3150cf278965eeabb5b4cea1c963fe0a',
        'info_dict': {
            'id': '320403011771',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Dingo Conservation',
            'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
            'thumbnail': 're:http://.*\.jpg',
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -5,6 +5,7 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    compat_str,
    ExtractorError,
    xpath_with_ns,
 )
@ -55,7 +56,7 @@ class ThePlatformIE(InfoExtractor):
        body = meta.find(_x('smil:body'))
        f4m_node = body.find(_x('smil:seq//smil:video'))
-        if f4m_node is not None:
+        if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
            f4m_url = f4m_node.attrib['src']
            if 'manifest.f4m?' not in f4m_url:
                f4m_url += '?'
@ -64,24 +65,35 @@ class ThePlatformIE(InfoExtractor):
            f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
            formats = self._extract_f4m_formats(f4m_url, video_id)
        else:
            base_url = head.find(_x('smil:meta')).attrib['base']
            switch = body.find(_x('smil:switch'))
            formats = []
-            for f in switch.findall(_x('smil:video')):
+            switch = body.find(_x('smil:switch'))
-                attr = f.attrib
+            if switch is not None:
-                width = int(attr['width'])
+                base_url = head.find(_x('smil:meta')).attrib['base']
-                height = int(attr['height'])
+                for f in switch.findall(_x('smil:video')):
-                vbr = int(attr['system-bitrate']) // 1000
+                    attr = f.attrib
-                format_id = '%dx%d_%dk' % (width, height, vbr)
+                    width = int(attr['width'])
-                formats.append({
+                    height = int(attr['height'])
-                    'format_id': format_id,
+                    vbr = int(attr['system-bitrate']) // 1000
-                    'url': base_url,
+                    format_id = '%dx%d_%dk' % (width, height, vbr)
-                    'play_path': 'mp4:' + attr['src'],
+                    formats.append({
-                    'ext': 'flv',
+                        'format_id': format_id,
-                    'width': width,
+                        'url': base_url,
-                    'height': height,
+                        'play_path': 'mp4:' + attr['src'],
-                    'vbr': vbr,
+                        'ext': 'flv',
-                })
+                        'width': width,
                        'height': height,
                        'vbr': vbr,
                    })
            else:
                switch = body.find(_x('smil:seq//smil:switch'))
                for f in switch.findall(_x('smil:video')):
                    attr = f.attrib
                    vbr = int(attr['system-bitrate']) // 1000
                    formats.append({
                        'format_id': compat_str(vbr),
                        'url': attr['src'],
                        'vbr': vbr,
                    })
            self._sort_formats(formats)
        return {