[extractor] Add a way to distinguish IEs that returns only videos

This commit is contained in:
pukkandan 2022-11-13 10:56:04 +05:30
parent 83cc7b8aae
commit 171a31dbe8
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
2 changed files with 20 additions and 0 deletions

View file

@ -3702,6 +3702,24 @@ class InfoExtractor:
(*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()), (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
(..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0]) (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
@classproperty(cache=True)
def _RETURN_TYPE(cls):
"""What the extractor returns: "video", "playlist", "any", or None (Unknown)"""
tests = tuple(cls.get_testcases(include_onlymatching=False))
if not tests:
return None
elif not any(k.startswith('playlist') for test in tests for k in test):
return 'video'
elif all(any(k.startswith('playlist') for k in test) for test in tests):
return 'playlist'
return 'any'
@classmethod
def is_single_video(cls, url):
"""Returns whether the URL is of a single video, None if unknown"""
assert cls.suitable(url), 'The URL must be suitable for the extractor'
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
@classmethod @classmethod
def is_suitable(cls, age_limit): def is_suitable(cls, age_limit):
"""Test whether the extractor is generally suitable for the given age limit""" """Test whether the extractor is generally suitable for the given age limit"""
@ -3953,6 +3971,7 @@ class SearchInfoExtractor(InfoExtractor):
""" """
_MAX_RESULTS = float('inf') _MAX_RESULTS = float('inf')
_RETURN_TYPE = 'playlist'
@classproperty @classproperty
def _VALID_URL(cls): def _VALID_URL(cls):

View file

@ -1050,6 +1050,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})" <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
\s[^>]*\bclass="[^"]*\blazy-load-youtube''', \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
] ]
_RETURN_TYPE = 'video' # While there are "multifeed" test cases, they don't seem to actually exist anymore
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',