[extractor, test] Basic framework for embed tests (#4307)

and split download tests so they can be more easily run in CI

Authored by: coletdjnz
This commit is contained in:
pukkandan 2022-07-08 16:53:05 +05:30
parent 8f97a15d1c
commit f2e8dbcc00
5 changed files with 89 additions and 53 deletions

View file

@ -92,6 +92,13 @@ def gettestcases(include_onlymatching=False):
yield from ie.get_testcases(include_onlymatching) yield from ie.get_testcases(include_onlymatching)
def getwebpagetestcases():
for ie in yt_dlp.extractor.gen_extractors():
for tc in ie.get_webpage_testcases():
tc.setdefault('add_ie', []).append('Generic')
yield tc
md5 = lambda s: hashlib.md5(s.encode()).hexdigest() md5 = lambda s: hashlib.md5(s.encode()).hexdigest()

View file

@ -8,6 +8,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import collections
import hashlib import hashlib
import http.client import http.client
import json import json
@ -20,6 +21,7 @@ from test.helper import (
expect_warnings, expect_warnings,
get_params, get_params,
gettestcases, gettestcases,
getwebpagetestcases,
is_download_test, is_download_test,
report_warning, report_warning,
try_rm, try_rm,
@ -32,6 +34,7 @@ from yt_dlp.utils import (
ExtractorError, ExtractorError,
UnavailableVideoError, UnavailableVideoError,
format_bytes, format_bytes,
join_nonempty,
) )
RETRIES = 3 RETRIES = 3
@ -57,7 +60,9 @@ def _file_md5(fn):
return hashlib.md5(f.read()).hexdigest() return hashlib.md5(f.read()).hexdigest()
defs = gettestcases() normal_test_cases = gettestcases()
webpage_test_cases = getwebpagetestcases()
tests_counter = collections.defaultdict(collections.Counter)
@is_download_test @is_download_test
@ -72,24 +77,13 @@ class TestDownload(unittest.TestCase):
def __str__(self): def __str__(self):
"""Identify each test with the `add_ie` attribute, if available.""" """Identify each test with the `add_ie` attribute, if available."""
cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie
return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:'
def strclass(cls):
"""From 2.7's unittest; 2.6 had _strclass so we can't import it."""
return f'{cls.__module__}.{cls.__name__}'
add_ie = getattr(self, self._testMethodName).add_ie
return '%s (%s)%s:' % (self._testMethodName,
strclass(self.__class__),
' [%s]' % add_ie if add_ie else '')
def setUp(self):
self.defs = defs
# Dynamically generate tests # Dynamically generate tests
def generator(test_case, tname): def generator(test_case, tname):
def test_template(self): def test_template(self):
if self.COMPLETED_TESTS.get(tname): if self.COMPLETED_TESTS.get(tname):
return return
@ -255,25 +249,29 @@ def generator(test_case, tname):
# And add them to TestDownload # And add them to TestDownload
tests_counter = {} def inject_tests(test_cases, label=''):
for test_case in defs: for test_case in test_cases:
name = test_case['name'] name = test_case['name']
i = tests_counter.get(name, 0) tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_')
tests_counter[name] = i + 1 tests_counter[name][label] += 1
tname = f'test_{name}_{i}' if i else f'test_{name}'
test_method = generator(test_case, tname) test_method = generator(test_case, tname)
test_method.__name__ = str(tname) test_method.__name__ = tname
ie_list = test_case.get('add_ie') test_method.add_ie = ','.join(test_case.get('add_ie', []))
test_method.add_ie = ie_list and ','.join(ie_list)
setattr(TestDownload, test_method.__name__, test_method) setattr(TestDownload, test_method.__name__, test_method)
del test_method
def batch_generator(name, num_tests): inject_tests(normal_test_cases)
# TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction
inject_tests(webpage_test_cases, 'webpage')
def batch_generator(name):
def test_template(self): def test_template(self):
for label, num_tests in tests_counter[name].items():
for i in range(num_tests): for i in range(num_tests):
test_name = f'test_{name}_{i}' if i else f'test_{name}' test_name = join_nonempty('test', name, label, i, delim='_')
try: try:
getattr(self, test_name)() getattr(self, test_name)()
except unittest.SkipTest: except unittest.SkipTest:
@ -282,12 +280,12 @@ def batch_generator(name, num_tests):
return test_template return test_template
for name, num_tests in tests_counter.items(): for name in tests_counter:
test_method = batch_generator(name, num_tests) test_method = batch_generator(name)
test_method.__name__ = f'test_{name}_all' test_method.__name__ = f'test_{name}_all'
test_method.add_ie = '' test_method.add_ie = ''
setattr(TestDownload, test_method.__name__, test_method) setattr(TestDownload, test_method.__name__, test_method)
del test_method del test_method
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -3665,11 +3665,18 @@ class InfoExtractor:
t['name'] = cls.ie_key() t['name'] = cls.ie_key()
yield t yield t
@classmethod
def get_webpage_testcases(cls):
tests = getattr(cls, '_WEBPAGE_TESTS', [])
for t in tests:
t['name'] = cls.ie_key()
return tests
@classproperty @classproperty
def age_limit(cls): def age_limit(cls):
"""Get age limit from the testcases""" """Get age limit from the testcases"""
return max(traverse_obj( return max(traverse_obj(
tuple(cls.get_testcases(include_onlymatching=False)), (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
(..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0]) (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
@classmethod @classmethod
@ -3844,7 +3851,10 @@ class InfoExtractor:
def extract_from_webpage(cls, ydl, url, webpage): def extract_from_webpage(cls, ydl, url, webpage):
ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType) ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
else ydl.get_info_extractor(cls.ie_key())) else ydl.get_info_extractor(cls.ie_key()))
yield from ie._extract_from_webpage(url, webpage) or [] for info in ie._extract_from_webpage(url, webpage) or []:
# url = None since we do not want to set (webpage/original)_url
ydl.add_default_extra_info(info, ie, None)
yield info
@classmethod @classmethod
def _extract_from_webpage(cls, url, webpage): def _extract_from_webpage(cls, url, webpage):

View file

@ -933,21 +933,6 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, },
# YouTube <object> embed
{
'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
'md5': '516718101ec834f74318df76259fb3cc',
'info_dict': {
'id': 'msN87y-iEx0',
'ext': 'webm',
'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
'upload_date': '20080526',
'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
'uploader': 'Christopher Sykes',
'uploader_id': 'ChristopherJSykes',
},
'add_ie': ['Youtube'],
},
# Camtasia studio # Camtasia studio
{ {
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',

View file

@ -2266,6 +2266,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} }
] ]
_WEBPAGE_TESTS = [
# YouTube <object> embed
{
'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
'md5': '873c81d308b979f0e23ee7e620b312a3',
'info_dict': {
'id': 'msN87y-iEx0',
'ext': 'mp4',
'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
'upload_date': '20080526',
'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
'uploader': 'Christopher Sykes',
'uploader_id': 'ChristopherJSykes',
'age_limit': 0,
'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
'playable_in_embed': True,
'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
'like_count': int,
'comment_count': int,
'channel': 'Christopher Sykes',
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
'availability': 'public',
'duration': 195,
'view_count': int,
'categories': ['Science & Technology'],
'channel_follower_count': int,
'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
},
'params': {
'skip_download': True,
}
},
]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
from ..utils import parse_qs from ..utils import parse_qs