Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
99859d436c
10 changed files with 127 additions and 18 deletions
|
@ -26,9 +26,9 @@ tests = [
|
||||||
# 85
|
# 85
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||||
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
||||||
# 84
|
# 84 - vflh9ybst 2013/08/23 (sporadic)
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||||
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
|
"yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
|
||||||
# 83
|
# 83
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
|
||||||
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
|
||||||
|
|
|
@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
from .googlesearch import GoogleSearchIE
|
from .googlesearch import GoogleSearchIE
|
||||||
|
from .hark import HarkIE
|
||||||
from .hotnewhiphop import HotNewHipHopIE
|
from .hotnewhiphop import HotNewHipHopIE
|
||||||
from .howcast import HowcastIE
|
from .howcast import HowcastIE
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
|
@ -57,6 +58,7 @@ from .pornotube import PornotubeIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .redtube import RedTubeIE
|
from .redtube import RedTubeIE
|
||||||
from .ringtv import RingTVIE
|
from .ringtv import RingTVIE
|
||||||
|
from .ro220 import Ro220IE
|
||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
|
@ -116,12 +118,14 @@ _ALL_CLASSES = [
|
||||||
]
|
]
|
||||||
_ALL_CLASSES.append(GenericIE)
|
_ALL_CLASSES.append(GenericIE)
|
||||||
|
|
||||||
|
|
||||||
def gen_extractors():
|
def gen_extractors():
|
||||||
""" Return a list of an instance of every supported extractor.
|
""" Return a list of an instance of every supported extractor.
|
||||||
The order does matter; the first extractor matched is the one handling the URL.
|
The order does matter; the first extractor matched is the one handling the URL.
|
||||||
"""
|
"""
|
||||||
return [klass() for klass in _ALL_CLASSES]
|
return [klass() for klass in _ALL_CLASSES]
|
||||||
|
|
||||||
|
|
||||||
def get_info_extractor(ie_name):
|
def get_info_extractor(ie_name):
|
||||||
"""Returns the info extractor class with the given ie_name"""
|
"""Returns the info extractor class with the given ie_name"""
|
||||||
return globals()[ie_name+'IE']
|
return globals()[ie_name+'IE']
|
||||||
|
|
|
@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
|
||||||
|
|
||||||
_TEST ={
|
_TEST ={
|
||||||
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||||
u'file': u'93440716.mp4',
|
u'file': u'93440716.flv',
|
||||||
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
|
u'md5': u'e59995ac63d0457783ea05f93f12a866',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u'title': u'网事知多少 第32期:车怒',
|
u'title': u'网事知多少 第32期:车怒',
|
||||||
},
|
},
|
||||||
|
|
|
@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
|
||||||
u'file': u'x33vw9.mp4',
|
u'file': u'x33vw9.mp4',
|
||||||
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
|
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"uploader": u"Alex and Van .",
|
u"uploader": u"Amphora Alex and Van .",
|
||||||
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
|
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,12 +7,14 @@ from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
IE_DESC = u'Generic downloader that works on some sites'
|
IE_DESC = u'Generic downloader that works on some sites'
|
||||||
_VALID_URL = r'.*'
|
_VALID_URL = r'.*'
|
||||||
|
@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
# Look for BrigthCove:
|
# Look for BrightCove:
|
||||||
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
|
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
|
||||||
if m_brightcove is not None:
|
if m_brightcove is not None:
|
||||||
self.to_screen(u'Brightcove video detected.')
|
self.to_screen(u'Brightcove video detected.')
|
||||||
|
@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
|
||||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||||
|
if video_url.startswith('//'):
|
||||||
|
video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
|
||||||
|
if '://' not in video_url:
|
||||||
|
video_url = url + ('' if url.endswith('/') else '/') + video_url
|
||||||
video_id = os.path.basename(video_url)
|
video_id = os.path.basename(video_url)
|
||||||
|
|
||||||
# here's a fun little line of code for you:
|
# here's a fun little line of code for you:
|
||||||
|
|
35
youtube_dl/extractor/hark.py
Normal file
35
youtube_dl/extractor/hark.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
class HarkIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
|
||||||
|
u'file': u'mmbzyhkgny.mp3',
|
||||||
|
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
|
||||||
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
|
final_url = self._search_regex(r'src="(.+?).mp3"',
|
||||||
|
webpage, 'video url')+'.mp3'
|
||||||
|
title = self._html_search_regex(r'<title>(.+?)</title>',
|
||||||
|
webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
|
||||||
|
'Sound Clip , Quote, MP3, and Ringtone - Hark','')
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'url' : final_url,
|
||||||
|
'title': title,
|
||||||
|
'ext': determine_ext(final_url),
|
||||||
|
}
|
42
youtube_dl/extractor/ro220.py
Normal file
42
youtube_dl/extractor/ro220.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
compat_parse_qs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Ro220IE(InfoExtractor):
|
||||||
|
IE_NAME = '220.ro'
|
||||||
|
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||||
|
u'file': u'LYV6doKo7f.mp4',
|
||||||
|
u'md5': u'03af18b73a07b4088753930db7a34add',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Luati-le Banii sez 4 ep 1",
|
||||||
|
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
flashVars_str = self._search_regex(
|
||||||
|
r'<param name="flashVars" value="([^"]+)"',
|
||||||
|
webpage, u'flashVars')
|
||||||
|
flashVars = compat_parse_qs(flashVars_str)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'url': flashVars['videoURL'][0],
|
||||||
|
'title': flashVars['title'][0],
|
||||||
|
'description': clean_html(flashVars['desc'][0]),
|
||||||
|
'thumbnail': flashVars['preview'][0],
|
||||||
|
}
|
||||||
|
return info
|
|
@ -8,8 +8,8 @@ from ..utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
class RTLnowIE(InfoExtractor):
|
class RTLnowIE(InfoExtractor):
|
||||||
"""Information Extractor for RTLnow, RTL2now and VOXnow"""
|
"""Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
|
||||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||||
u'file': u'90419.flv',
|
u'file': u'90419.flv',
|
||||||
|
@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
|
||||||
u'params': {
|
u'params': {
|
||||||
u'skip_download': True,
|
u'skip_download': True,
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
|
||||||
|
u'file': u'99205.flv',
|
||||||
|
u'info_dict': {
|
||||||
|
u'upload_date': u'20080928',
|
||||||
|
u'title': u'Medicopter 117 - Angst!',
|
||||||
|
u'description': u'Angst!',
|
||||||
|
u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self,url):
|
def _real_extract(self,url):
|
||||||
|
|
|
@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
elif len(s) == 85:
|
elif len(s) == 85:
|
||||||
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
||||||
elif len(s) == 84:
|
elif len(s) == 84:
|
||||||
return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
|
return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
|
||||||
elif len(s) == 83:
|
elif len(s) == 83:
|
||||||
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
|
||||||
elif len(s) == 82:
|
elif len(s) == 82:
|
||||||
|
|
|
@ -476,7 +476,7 @@ def formatSeconds(secs):
|
||||||
def make_HTTPS_handler(opts):
|
def make_HTTPS_handler(opts):
|
||||||
if sys.version_info < (3,2):
|
if sys.version_info < (3,2):
|
||||||
# Python's 2.x handler is very simplistic
|
# Python's 2.x handler is very simplistic
|
||||||
return compat_urllib_request.HTTPSHandler()
|
return YoutubeDLHandlerHTTPS()
|
||||||
else:
|
else:
|
||||||
import ssl
|
import ssl
|
||||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
||||||
|
@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):
|
||||||
context.verify_mode = (ssl.CERT_NONE
|
context.verify_mode = (ssl.CERT_NONE
|
||||||
if opts.no_check_certificate
|
if opts.no_check_certificate
|
||||||
else ssl.CERT_REQUIRED)
|
else ssl.CERT_REQUIRED)
|
||||||
return compat_urllib_request.HTTPSHandler(context=context)
|
return YoutubeDLHandlerHTTPS(context=context)
|
||||||
|
|
||||||
class ExtractorError(Exception):
|
class ExtractorError(Exception):
|
||||||
"""Error during info extraction."""
|
"""Error during info extraction."""
|
||||||
|
@ -569,7 +569,8 @@ class ContentTooShortError(Exception):
|
||||||
self.downloaded = downloaded
|
self.downloaded = downloaded
|
||||||
self.expected = expected
|
self.expected = expected
|
||||||
|
|
||||||
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|
||||||
|
class YoutubeDLHandler_Template: # Old-style class, like HTTPHandler
|
||||||
"""Handler for HTTP requests and responses.
|
"""Handler for HTTP requests and responses.
|
||||||
|
|
||||||
This class, when installed with an OpenerDirector, automatically adds
|
This class, when installed with an OpenerDirector, automatically adds
|
||||||
|
@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||||
ret.code = code
|
ret.code = code
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def http_request(self, req):
|
def _http_request(self, req):
|
||||||
for h,v in std_headers.items():
|
for h, v in std_headers.items():
|
||||||
if h in req.headers:
|
if h in req.headers:
|
||||||
del req.headers[h]
|
del req.headers[h]
|
||||||
req.add_header(h, v)
|
req.add_header(h, v)
|
||||||
|
@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||||
del req.headers['Youtubedl-user-agent']
|
del req.headers['Youtubedl-user-agent']
|
||||||
return req
|
return req
|
||||||
|
|
||||||
def http_response(self, req, resp):
|
def _http_response(self, req, resp):
|
||||||
old_resp = resp
|
old_resp = resp
|
||||||
# gzip
|
# gzip
|
||||||
if resp.headers.get('Content-encoding', '') == 'gzip':
|
if resp.headers.get('Content-encoding', '') == 'gzip':
|
||||||
|
@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||||
resp.msg = old_resp.msg
|
resp.msg = old_resp.msg
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
https_request = http_request
|
|
||||||
https_response = http_response
|
class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
|
||||||
|
http_request = YoutubeDLHandler_Template._http_request
|
||||||
|
http_response = YoutubeDLHandler_Template._http_response
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
|
||||||
|
https_request = YoutubeDLHandler_Template._http_request
|
||||||
|
https_response = YoutubeDLHandler_Template._http_response
|
||||||
|
|
||||||
|
|
||||||
def unified_strdate(date_str):
|
def unified_strdate(date_str):
|
||||||
"""Return a string with the date in the format YYYYMMDD"""
|
"""Return a string with the date in the format YYYYMMDD"""
|
||||||
|
|
Loading…
Reference in a new issue