[extractor] Import _ALL_CLASSES lazily

This significantly speeds up `import yt_dlp` in the absence of `lazy_extractors`
This commit is contained in:
pukkandan 2022-06-15 18:00:34 +05:30
parent 99d10bf607
commit 560738f34d
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
6 changed files with 49 additions and 30 deletions

View file

@ -53,7 +53,7 @@ def get_all_ies():
if os.path.exists(PLUGINS_DIRNAME): if os.path.exists(PLUGINS_DIRNAME):
os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME) os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
try: try:
from yt_dlp.extractor import _ALL_CLASSES from yt_dlp.extractor.extractors import _ALL_CLASSES
finally: finally:
if os.path.exists(BLOCKED_DIRNAME): if os.path.exists(BLOCKED_DIRNAME):
os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME) os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)

View file

@ -38,8 +38,6 @@ from .compat import (
from .cookies import load_cookies from .cookies import load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
from .extractor import _LAZY_LOADER
from .extractor import _PLUGIN_CLASSES as plugin_extractors
from .extractor import gen_extractor_classes, get_info_extractor from .extractor import gen_extractor_classes, get_info_extractor
from .extractor.openload import PhantomJSwrapper from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text from .minicurses import format_text
@ -3659,6 +3657,10 @@ class YoutubeDL:
if not self.params.get('verbose'): if not self.params.get('verbose'):
return return
# These imports can be slow. So import them only as needed
from .extractor.extractors import _LAZY_LOADER
from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
def get_encoding(stream): def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
if not supports_terminal_sequences(stream): if not supports_terminal_sequences(stream):

View file

@ -12,7 +12,7 @@ import sys
from .compat import compat_getpass, compat_shlex_quote from .compat import compat_getpass, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader import FileDownloader from .downloader import FileDownloader
from .extractor import GenericIE, list_extractor_classes from .extractor import list_extractor_classes
from .extractor.adobepass import MSO_INFO from .extractor.adobepass import MSO_INFO
from .extractor.common import InfoExtractor from .extractor.common import InfoExtractor
from .options import parseOpts from .options import parseOpts
@ -79,6 +79,10 @@ def get_urls(urls, batchfile, verbose):
def print_extractor_information(opts, urls): def print_extractor_information(opts, urls):
# Importing GenericIE is currently slow since it imports other extractors
# TODO: Move this back to module level after generalization of embed detection
from .extractor.generic import GenericIE
out = '' out = ''
if opts.list_extractors: if opts.list_extractors:
urls = dict.fromkeys(urls, False) urls = dict.fromkeys(urls, False)

View file

@ -33,7 +33,7 @@ def _is_package(module):
def passthrough_module(parent, child, *, callback=lambda _: None): def passthrough_module(parent, child, *, callback=lambda _: None):
parent_module = importlib.import_module(parent) parent_module = importlib.import_module(parent)
child_module = importlib.import_module(child, parent) child_module = None # Import child module only as needed
class PassthroughModule(types.ModuleType): class PassthroughModule(types.ModuleType):
def __getattr__(self, attr): def __getattr__(self, attr):
@ -41,6 +41,9 @@ def passthrough_module(parent, child, *, callback=lambda _: None):
with contextlib.suppress(ImportError): with contextlib.suppress(ImportError):
return importlib.import_module(f'.{attr}', parent) return importlib.import_module(f'.{attr}', parent)
nonlocal child_module
child_module = child_module or importlib.import_module(child, parent)
ret = _NO_ATTRIBUTE ret = _NO_ATTRIBUTE
with contextlib.suppress(AttributeError): with contextlib.suppress(AttributeError):
ret = getattr(child_module, attr) ret = getattr(child_module, attr)

View file

@ -1,32 +1,15 @@
import contextlib from ..compat.compat_utils import passthrough_module
import os
from ..utils import load_plugins passthrough_module(__name__, '.extractors')
del passthrough_module
_LAZY_LOADER = False
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
with contextlib.suppress(ImportError):
from .lazy_extractors import * # noqa: F403
from .lazy_extractors import _ALL_CLASSES
_LAZY_LOADER = True
if not _LAZY_LOADER:
from ._extractors import * # noqa: F403
_ALL_CLASSES = [ # noqa: F811
klass
for name, klass in globals().items()
if name.endswith('IE') and name != 'GenericIE'
]
_ALL_CLASSES.append(GenericIE) # noqa: F405
_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
def gen_extractor_classes(): def gen_extractor_classes():
""" Return a list of supported extractors. """ Return a list of supported extractors.
The order does matter; the first extractor matched is the one handling the URL. The order does matter; the first extractor matched is the one handling the URL.
""" """
from .extractors import _ALL_CLASSES
return _ALL_CLASSES return _ALL_CLASSES
@ -39,10 +22,12 @@ def gen_extractors():
def list_extractor_classes(age_limit=None): def list_extractor_classes(age_limit=None):
"""Return a list of extractors that are suitable for the given age, sorted by extractor name""" """Return a list of extractors that are suitable for the given age, sorted by extractor name"""
from .generic import GenericIE
yield from sorted(filter( yield from sorted(filter(
lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405 lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
yield GenericIE # noqa: F405 yield GenericIE
def list_extractors(age_limit=None): def list_extractors(age_limit=None):
@ -52,4 +37,6 @@ def list_extractors(age_limit=None):
def get_info_extractor(ie_name): def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name""" """Returns the info extractor class with the given ie_name"""
return globals()[ie_name + 'IE'] from . import extractors
return getattr(extractors, f'{ie_name}IE')

View file

@ -0,0 +1,23 @@
import contextlib
import os
from ..utils import load_plugins
_LAZY_LOADER = False
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
with contextlib.suppress(ImportError):
from .lazy_extractors import * # noqa: F403
from .lazy_extractors import _ALL_CLASSES
_LAZY_LOADER = True
if not _LAZY_LOADER:
from ._extractors import * # noqa: F403
_ALL_CLASSES = [ # noqa: F811
klass
for name, klass in globals().items()
if name.endswith('IE') and name != 'GenericIE'
]
_ALL_CLASSES.append(GenericIE) # noqa: F405
_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES