Add option --use-extractors
Deprecates `--force-generic-extractor` Closes #3234, Closes #2044 Related: #4307, #1791
This commit is contained in:
parent
5314b52192
commit
fe7866d0ed
5 changed files with 58 additions and 18 deletions
|
@ -375,7 +375,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||||
--list-extractors List all supported extractors and exit
|
--list-extractors List all supported extractors and exit
|
||||||
--extractor-descriptions Output descriptions of all supported
|
--extractor-descriptions Output descriptions of all supported
|
||||||
extractors and exit
|
extractors and exit
|
||||||
--force-generic-extractor Force extraction to use the generic extractor
|
--use-extractors, --ies NAMES Extractor names to use separated by commas.
|
||||||
|
You can also use regexes, "all", "default"
|
||||||
|
and "end" (end URL matching); e.g. --ies
|
||||||
|
"holodex.*,end,youtube". Prefix the name
|
||||||
|
with a "-" to exclude it, e.g. --ies
|
||||||
|
default,-generic. Use --list-extractors for
|
||||||
|
a list of available extractor names
|
||||||
--default-search PREFIX Use this prefix for unqualified URLs. E.g.
|
--default-search PREFIX Use this prefix for unqualified URLs. E.g.
|
||||||
"gvsearch2:python" downloads two videos from
|
"gvsearch2:python" downloads two videos from
|
||||||
google videos for the search term "python".
|
google videos for the search term "python".
|
||||||
|
@ -2058,6 +2064,7 @@ While these options are redundant, they are still expected to be used due to the
|
||||||
#### Not recommended
|
#### Not recommended
|
||||||
While these options still work, their use is not recommended since there are other alternatives to achieve the same
|
While these options still work, their use is not recommended since there are other alternatives to achieve the same
|
||||||
|
|
||||||
|
--force-generic-extractor --ies generic,default
|
||||||
--exec-before-download CMD --exec "before_dl:CMD"
|
--exec-before-download CMD --exec "before_dl:CMD"
|
||||||
--no-exec-before-download --no-exec
|
--no-exec-before-download --no-exec
|
||||||
--all-formats -f all
|
--all-formats -f all
|
||||||
|
|
|
@ -29,6 +29,7 @@ from .cookies import load_cookies
|
||||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||||
from .downloader.rtmp import rtmpdump_version
|
from .downloader.rtmp import rtmpdump_version
|
||||||
from .extractor import gen_extractor_classes, get_info_extractor
|
from .extractor import gen_extractor_classes, get_info_extractor
|
||||||
|
from .extractor.common import UnsupportedURLIE
|
||||||
from .extractor.openload import PhantomJSwrapper
|
from .extractor.openload import PhantomJSwrapper
|
||||||
from .minicurses import format_text
|
from .minicurses import format_text
|
||||||
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
|
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
|
||||||
|
@ -237,7 +238,7 @@ class YoutubeDL:
|
||||||
Default is 'only_download' for CLI, but False for API
|
Default is 'only_download' for CLI, but False for API
|
||||||
skip_playlist_after_errors: Number of allowed failures until the rest of
|
skip_playlist_after_errors: Number of allowed failures until the rest of
|
||||||
the playlist is skipped
|
the playlist is skipped
|
||||||
force_generic_extractor: Force downloader to use the generic extractor
|
allowed_extractors: List of regexes to match against extractor names that are allowed
|
||||||
overwrites: Overwrite all video and metadata files if True,
|
overwrites: Overwrite all video and metadata files if True,
|
||||||
overwrite only non-video files if None
|
overwrite only non-video files if None
|
||||||
and don't overwrite any file if False
|
and don't overwrite any file if False
|
||||||
|
@ -477,6 +478,8 @@ class YoutubeDL:
|
||||||
|
|
||||||
The following options are deprecated and may be removed in the future:
|
The following options are deprecated and may be removed in the future:
|
||||||
|
|
||||||
|
force_generic_extractor: Force downloader to use the generic extractor
|
||||||
|
- Use allowed_extractors = ['generic', 'default']
|
||||||
playliststart: - Use playlist_items
|
playliststart: - Use playlist_items
|
||||||
Playlist item to start at.
|
Playlist item to start at.
|
||||||
playlistend: - Use playlist_items
|
playlistend: - Use playlist_items
|
||||||
|
@ -758,13 +761,6 @@ class YoutubeDL:
|
||||||
self._ies_instances[ie_key] = ie
|
self._ies_instances[ie_key] = ie
|
||||||
ie.set_downloader(self)
|
ie.set_downloader(self)
|
||||||
|
|
||||||
def _get_info_extractor_class(self, ie_key):
|
|
||||||
ie = self._ies.get(ie_key)
|
|
||||||
if ie is None:
|
|
||||||
ie = get_info_extractor(ie_key)
|
|
||||||
self.add_info_extractor(ie)
|
|
||||||
return ie
|
|
||||||
|
|
||||||
def get_info_extractor(self, ie_key):
|
def get_info_extractor(self, ie_key):
|
||||||
"""
|
"""
|
||||||
Get an instance of an IE with name ie_key, it will try to get one from
|
Get an instance of an IE with name ie_key, it will try to get one from
|
||||||
|
@ -781,8 +777,19 @@ class YoutubeDL:
|
||||||
"""
|
"""
|
||||||
Add the InfoExtractors returned by gen_extractors to the end of the list
|
Add the InfoExtractors returned by gen_extractors to the end of the list
|
||||||
"""
|
"""
|
||||||
for ie in gen_extractor_classes():
|
all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
|
||||||
self.add_info_extractor(ie)
|
all_ies['end'] = UnsupportedURLIE()
|
||||||
|
try:
|
||||||
|
ie_names = orderedSet_from_options(
|
||||||
|
self.params.get('allowed_extractors', ['default']), {
|
||||||
|
'all': list(all_ies),
|
||||||
|
'default': [name for name, ie in all_ies.items() if ie._ENABLED],
|
||||||
|
}, use_regex=True)
|
||||||
|
except re.error as e:
|
||||||
|
raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
|
||||||
|
for name in ie_names:
|
||||||
|
self.add_info_extractor(all_ies[name])
|
||||||
|
self.write_debug(f'Loaded {len(ie_names)} extractors')
|
||||||
|
|
||||||
def add_post_processor(self, pp, when='post_process'):
|
def add_post_processor(self, pp, when='post_process'):
|
||||||
"""Add a PostProcessor object to the end of the chain."""
|
"""Add a PostProcessor object to the end of the chain."""
|
||||||
|
@ -1413,11 +1420,11 @@ class YoutubeDL:
|
||||||
ie_key = 'Generic'
|
ie_key = 'Generic'
|
||||||
|
|
||||||
if ie_key:
|
if ie_key:
|
||||||
ies = {ie_key: self._get_info_extractor_class(ie_key)}
|
ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
|
||||||
else:
|
else:
|
||||||
ies = self._ies
|
ies = self._ies
|
||||||
|
|
||||||
for ie_key, ie in ies.items():
|
for key, ie in ies.items():
|
||||||
if not ie.suitable(url):
|
if not ie.suitable(url):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -1426,14 +1433,16 @@ class YoutubeDL:
|
||||||
'and will probably not work.')
|
'and will probably not work.')
|
||||||
|
|
||||||
temp_id = ie.get_temp_id(url)
|
temp_id = ie.get_temp_id(url)
|
||||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
|
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
|
||||||
self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
|
self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
|
||||||
if self.params.get('break_on_existing', False):
|
if self.params.get('break_on_existing', False):
|
||||||
raise ExistingVideoReached()
|
raise ExistingVideoReached()
|
||||||
break
|
break
|
||||||
return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
|
return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
|
||||||
else:
|
else:
|
||||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
|
||||||
|
self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
|
||||||
|
tb=False if extractors_restricted else None)
|
||||||
|
|
||||||
def _handle_extraction_exceptions(func):
|
def _handle_extraction_exceptions(func):
|
||||||
@functools.wraps(func)
|
@functools.wraps(func)
|
||||||
|
|
|
@ -766,6 +766,7 @@ def parse_options(argv=None):
|
||||||
'windowsfilenames': opts.windowsfilenames,
|
'windowsfilenames': opts.windowsfilenames,
|
||||||
'ignoreerrors': opts.ignoreerrors,
|
'ignoreerrors': opts.ignoreerrors,
|
||||||
'force_generic_extractor': opts.force_generic_extractor,
|
'force_generic_extractor': opts.force_generic_extractor,
|
||||||
|
'allowed_extractors': opts.allowed_extractors or ['default'],
|
||||||
'ratelimit': opts.ratelimit,
|
'ratelimit': opts.ratelimit,
|
||||||
'throttledratelimit': opts.throttledratelimit,
|
'throttledratelimit': opts.throttledratelimit,
|
||||||
'overwrites': opts.overwrites,
|
'overwrites': opts.overwrites,
|
||||||
|
|
|
@ -480,6 +480,9 @@ class InfoExtractor:
|
||||||
will be used by geo restriction bypass mechanism similarly
|
will be used by geo restriction bypass mechanism similarly
|
||||||
to _GEO_COUNTRIES.
|
to _GEO_COUNTRIES.
|
||||||
|
|
||||||
|
The _ENABLED attribute should be set to False for IEs that
|
||||||
|
are disabled by default and must be explicitly enabled.
|
||||||
|
|
||||||
The _WORKING attribute should be set to False for broken IEs
|
The _WORKING attribute should be set to False for broken IEs
|
||||||
in order to warn the users and skip the tests.
|
in order to warn the users and skip the tests.
|
||||||
"""
|
"""
|
||||||
|
@ -491,6 +494,7 @@ class InfoExtractor:
|
||||||
_GEO_COUNTRIES = None
|
_GEO_COUNTRIES = None
|
||||||
_GEO_IP_BLOCKS = None
|
_GEO_IP_BLOCKS = None
|
||||||
_WORKING = True
|
_WORKING = True
|
||||||
|
_ENABLED = True
|
||||||
_NETRC_MACHINE = None
|
_NETRC_MACHINE = None
|
||||||
IE_DESC = None
|
IE_DESC = None
|
||||||
SEARCH_KEY = None
|
SEARCH_KEY = None
|
||||||
|
@ -3941,3 +3945,12 @@ class SearchInfoExtractor(InfoExtractor):
|
||||||
@classproperty
|
@classproperty
|
||||||
def SEARCH_KEY(cls):
|
def SEARCH_KEY(cls):
|
||||||
return cls._SEARCH_KEY
|
return cls._SEARCH_KEY
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedURLIE(InfoExtractor):
|
||||||
|
_VALID_URL = '.*'
|
||||||
|
_ENABLED = False
|
||||||
|
IE_DESC = False
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
raise UnsupportedError(url)
|
||||||
|
|
|
@ -353,10 +353,20 @@ def create_parser():
|
||||||
'--extractor-descriptions',
|
'--extractor-descriptions',
|
||||||
action='store_true', dest='list_extractor_descriptions', default=False,
|
action='store_true', dest='list_extractor_descriptions', default=False,
|
||||||
help='Output descriptions of all supported extractors and exit')
|
help='Output descriptions of all supported extractors and exit')
|
||||||
|
general.add_option(
|
||||||
|
'--use-extractors', '--ies',
|
||||||
|
action='callback', dest='allowed_extractors', metavar='NAMES', type='str',
|
||||||
|
default=[], callback=_list_from_options_callback,
|
||||||
|
help=(
|
||||||
|
'Extractor names to use separated by commas. '
|
||||||
|
'You can also use regexes, "all", "default" and "end" (end URL matching); '
|
||||||
|
'e.g. --ies "holodex.*,end,youtube". '
|
||||||
|
'Prefix the name with a "-" to exclude it, e.g. --ies default,-generic. '
|
||||||
|
'Use --list-extractors for a list of available extractor names'))
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--force-generic-extractor',
|
'--force-generic-extractor',
|
||||||
action='store_true', dest='force_generic_extractor', default=False,
|
action='store_true', dest='force_generic_extractor', default=False,
|
||||||
help='Force extraction to use the generic extractor')
|
help=optparse.SUPPRESS_HELP)
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--default-search',
|
'--default-search',
|
||||||
dest='default_search', metavar='PREFIX',
|
dest='default_search', metavar='PREFIX',
|
||||||
|
|
Loading…
Reference in a new issue