Fix some IEs that didn't return the uploade_date in the YYYYMMDD format
Create a function unified_strdate in utils.py to fix these problems
This commit is contained in:
parent
bd55852517
commit
bf50b0383e
3 changed files with 29 additions and 11 deletions
|
@ -15,6 +15,7 @@ from youtube_dl.utils import sanitize_filename
|
||||||
from youtube_dl.utils import unescapeHTML
|
from youtube_dl.utils import unescapeHTML
|
||||||
from youtube_dl.utils import orderedSet
|
from youtube_dl.utils import orderedSet
|
||||||
from youtube_dl.utils import DateRange
|
from youtube_dl.utils import DateRange
|
||||||
|
from youtube_dl.utils import unified_strdate
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
_compat_str = lambda b: b.decode('unicode-escape')
|
_compat_str = lambda b: b.decode('unicode-escape')
|
||||||
|
@ -105,5 +106,11 @@ class TestUtil(unittest.TestCase):
|
||||||
_firstmilenium = DateRange(end="10000101")
|
_firstmilenium = DateRange(end="10000101")
|
||||||
self.assertTrue("07110427" in _firstmilenium)
|
self.assertTrue("07110427" in _firstmilenium)
|
||||||
|
|
||||||
|
def test_unified_dates(self):
|
||||||
|
self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
|
||||||
|
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||||
|
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||||
|
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor):
|
||||||
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
|
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
|
upload_date = unified_strdate(upload_date)
|
||||||
for expression in format_expressions:
|
|
||||||
try:
|
|
||||||
upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# description
|
# description
|
||||||
video_description = get_element_by_id("eow-description", video_webpage)
|
video_description = get_element_by_id("eow-description", video_webpage)
|
||||||
|
@ -2385,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor):
|
||||||
shortMediaId = mediaId.split(':')[-1]
|
shortMediaId = mediaId.split(':')[-1]
|
||||||
showId = mediaId.split(':')[-2].replace('.com', '')
|
showId = mediaId.split(':')[-2].replace('.com', '')
|
||||||
officialTitle = itemEl.findall('./title')[0].text
|
officialTitle = itemEl.findall('./title')[0].text
|
||||||
officialDate = itemEl.findall('./pubDate')[0].text
|
officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||||
|
|
||||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||||
|
@ -2695,12 +2690,13 @@ class SoundcloudIE(InfoExtractor):
|
||||||
|
|
||||||
streams = json.loads(stream_json)
|
streams = json.loads(stream_json)
|
||||||
mediaURL = streams['http_mp3_128_url']
|
mediaURL = streams['http_mp3_128_url']
|
||||||
|
upload_date = unified_strdate(info['created_at'])
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': info['id'],
|
'id': info['id'],
|
||||||
'url': mediaURL,
|
'url': mediaURL,
|
||||||
'uploader': info['user']['username'],
|
'uploader': info['user']['username'],
|
||||||
'upload_date': info['created_at'],
|
'upload_date': upload_date,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'ext': u'mp3',
|
'ext': u'mp3',
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
|
@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor):
|
||||||
self._downloader.report_warning(u'unable to extract video date')
|
self._downloader.report_warning(u'unable to extract video date')
|
||||||
upload_date = None
|
upload_date = None
|
||||||
else:
|
else:
|
||||||
upload_date = result.group('date').strip()
|
upload_date = unified_strdate(result.group('date').strip())
|
||||||
|
|
||||||
# Get the video uploader
|
# Get the video uploader
|
||||||
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
|
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
|
||||||
|
@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor):
|
||||||
if result is None:
|
if result is None:
|
||||||
self._downloader.report_error(u'unable to extract video title')
|
self._downloader.report_error(u'unable to extract video title')
|
||||||
return
|
return
|
||||||
upload_date = result.group('date')
|
upload_date = unified_strdate(result.group('date'))
|
||||||
|
|
||||||
info = {'id': video_id,
|
info = {'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
|
|
@ -570,6 +570,21 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||||
https_request = http_request
|
https_request = http_request
|
||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
def unified_strdate(date_str):
|
||||||
|
"""Return a string with the date in the format YYYYMMDD"""
|
||||||
|
upload_date = None
|
||||||
|
#Replace commas
|
||||||
|
date_str = date_str.replace(',',' ')
|
||||||
|
# %z (UTC offset) is only supported in python>=3.2
|
||||||
|
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
||||||
|
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
|
||||||
|
for expression in format_expressions:
|
||||||
|
try:
|
||||||
|
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return upload_date
|
||||||
|
|
||||||
def date_from_str(date_str):
|
def date_from_str(date_str):
|
||||||
"""Return a datetime object from a string in the format YYYYMMDD"""
|
"""Return a datetime object from a string in the format YYYYMMDD"""
|
||||||
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
|
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
|
||||||
|
|
Loading…
Reference in a new issue