[xminus] Simplify and extend (#4302)

This commit is contained in:
Philipp Hagemeister 2014-11-25 09:54:54 +01:00
parent c3e74731c2
commit be64b5b098
3 changed files with 95 additions and 20 deletions

View file

@ -47,6 +47,7 @@ from youtube_dl.utils import (
js_to_json, js_to_json,
intlist_to_bytes, intlist_to_bytes,
args_to_str, args_to_str,
parse_filesize,
) )
@ -367,5 +368,14 @@ class TestUtil(unittest.TestCase):
'foo ba/r -baz \'2 be\' \'\'' 'foo ba/r -baz \'2 be\' \'\''
) )
def test_parse_filesize(self):
self.assertEqual(parse_filesize(None), None)
self.assertEqual(parse_filesize(''), None)
self.assertEqual(parse_filesize('91 B'), 91)
self.assertEqual(parse_filesize('foobar'), None)
self.assertEqual(parse_filesize('2 MiB'), 2097152)
self.assertEqual(parse_filesize('5 GB'), 5000000000)
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -2,7 +2,14 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..compat import (
compat_chr,
compat_ord,
)
from ..utils import (
int_or_none,
parse_filesize,
)
class XMinusIE(InfoExtractor): class XMinusIE(InfoExtractor):
@ -15,39 +22,46 @@ class XMinusIE(InfoExtractor):
'ext': 'mp3', 'ext': 'mp3',
'title': 'Леонид Агутин-Песенка шофера', 'title': 'Леонид Агутин-Песенка шофера',
'duration': 156, 'duration': 156,
'tbr': 320,
'filesize_approx': 5900000,
'view_count': int,
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
# TODO more code goes here, for example ...
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
artist = self._html_search_regex( artist = self._html_search_regex(
r'minus_track.artist="(.+?)"', webpage, 'artist') r'minus_track\.artist="(.+?)"', webpage, 'artist')
title = artist + '-' + self._html_search_regex( title = artist + '-' + self._html_search_regex(
r'minus_track.title="(.+?)"', webpage, 'title') r'minus_track\.title="(.+?)"', webpage, 'title')
duration = int_or_none(self._html_search_regex( duration = int_or_none(self._html_search_regex(
r'minus_track.dur_sec=\'([0-9]+?)\'', webpage, 'duration')) r'minus_track\.dur_sec=\'([0-9]*?)\'',
webpage, 'duration', fatal=False))
filesize_approx = parse_filesize(self._html_search_regex(
r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
webpage, 'approximate filesize', fatal=False))
tbr = int_or_none(self._html_search_regex(
r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
webpage, 'bitrate', fatal=False))
view_count = int_or_none(self._html_search_regex(
r'<div class="quality.*?► ([0-9]+)',
webpage, 'view count', fatal=False))
enc_token = self._html_search_regex( enc_token = self._html_search_regex(
r'data-mt="(.*?)"', webpage, 'enc_token') r'data-mt="(.*?)"', webpage, 'enc_token')
token = self._decode_token(enc_token) token = ''.join(
url = 'http://x-minus.org/dwlf/{}/{}.mp3'.format(video_id, token) c if pos == 3 else compat_chr(compat_ord(c) - 1)
for pos, c in enumerate(reversed(enc_token)))
video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'url': url, 'url': video_url,
'duration': duration, 'duration': duration,
'filesize_approx': filesize_approx,
'tbr': tbr,
'view_count': view_count,
} }
def _decode_token(self, enc_token):
token = ''
pos = 0
for c in reversed(enc_token):
if pos != 3:
token += chr(ord(c) - 1)
else:
token += c
pos += 1
return token

View file

@ -1046,6 +1046,57 @@ def format_bytes(bytes):
return '%.2f%s' % (converted, suffix) return '%.2f%s' % (converted, suffix)
def parse_filesize(s):
if s is None:
return None
# The lower-case forms are of course incorrect and inofficial,
# but we support those too
_UNIT_TABLE = {
'B': 1,
'b': 1,
'KiB': 1024,
'KB': 1000,
'kB': 1024,
'Kb': 1000,
'MiB': 1024 ** 2,
'MB': 1000 ** 2,
'mB': 1024 ** 2,
'Mb': 1000 ** 2,
'GiB': 1024 ** 3,
'GB': 1000 ** 3,
'gB': 1024 ** 3,
'Gb': 1000 ** 3,
'TiB': 1024 ** 4,
'TB': 1000 ** 4,
'tB': 1024 ** 4,
'Tb': 1000 ** 4,
'PiB': 1024 ** 5,
'PB': 1000 ** 5,
'pB': 1024 ** 5,
'Pb': 1000 ** 5,
'EiB': 1024 ** 6,
'EB': 1000 ** 6,
'eB': 1024 ** 6,
'Eb': 1000 ** 6,
'ZiB': 1024 ** 7,
'ZB': 1000 ** 7,
'zB': 1024 ** 7,
'Zb': 1000 ** 7,
'YiB': 1024 ** 8,
'YB': 1000 ** 8,
'yB': 1024 ** 8,
'Yb': 1000 ** 8,
}
units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
if not m:
return None
return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
def get_term_width(): def get_term_width():
columns = compat_getenv('COLUMNS', None) columns = compat_getenv('COLUMNS', None)
if columns: if columns: