[extractor] Import _ALL_CLASSES lazily

This significantly speeds up `import yt_dlp` in the absence of `lazy_extractors`
2022-06-15 18:00:34 +05:30 · 2022-06-15 18:00:34 +05:30 · 560738f34d
commit 560738f34d
parent 99d10bf607
6 changed files with 49 additions and 30 deletions
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@ -53,7 +53,7 @@ def get_all_ies():
    if os.path.exists(PLUGINS_DIRNAME):
        os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
    try:
-        from yt_dlp.extractor import _ALL_CLASSES
+        from yt_dlp.extractor.extractors import _ALL_CLASSES
    finally:
        if os.path.exists(BLOCKED_DIRNAME):
            os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -38,8 +38,6 @@ from .compat import (
 from .cookies import load_cookies
 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
 from .downloader.rtmp import rtmpdump_version
 from .extractor import _LAZY_LOADER
 from .extractor import _PLUGIN_CLASSES as plugin_extractors
 from .extractor import gen_extractor_classes, get_info_extractor
 from .extractor.openload import PhantomJSwrapper
 from .minicurses import format_text
@ -3659,6 +3657,10 @@ class YoutubeDL:
        if not self.params.get('verbose'):
            return
        # These imports can be slow. So import them only as needed
        from .extractor.extractors import _LAZY_LOADER
        from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
        def get_encoding(stream):
            ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
            if not supports_terminal_sequences(stream):
--- a/yt_dlp/init.py
+++ b/yt_dlp/init.py
@ -12,7 +12,7 @@ import sys
 from .compat import compat_getpass, compat_shlex_quote
 from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
 from .downloader import FileDownloader
-from .extractor import GenericIE, list_extractor_classes
+from .extractor import list_extractor_classes
 from .extractor.adobepass import MSO_INFO
 from .extractor.common import InfoExtractor
 from .options import parseOpts
@ -79,6 +79,10 @@ def get_urls(urls, batchfile, verbose):
 def print_extractor_information(opts, urls):
    # Importing GenericIE is currently slow since it imports other extractors
    # TODO: Move this back to module level after generalization of embed detection
    from .extractor.generic import GenericIE
    out = ''
    if opts.list_extractors:
        urls = dict.fromkeys(urls, False)
--- a/yt_dlp/compat/compat_utils.py
+++ b/yt_dlp/compat/compat_utils.py
@ -33,7 +33,7 @@ def _is_package(module):
 def passthrough_module(parent, child, *, callback=lambda _: None):
    parent_module = importlib.import_module(parent)
-    child_module = importlib.import_module(child, parent)
+    child_module = None  # Import child module only as needed
    class PassthroughModule(types.ModuleType):
        def __getattr__(self, attr):
@ -41,6 +41,9 @@ def passthrough_module(parent, child, *, callback=lambda _: None):
                with contextlib.suppress(ImportError):
                    return importlib.import_module(f'.{attr}', parent)
            nonlocal child_module
            child_module = child_module or importlib.import_module(child, parent)
            ret = _NO_ATTRIBUTE
            with contextlib.suppress(AttributeError):
                ret = getattr(child_module, attr)
--- a/yt_dlp/extractor/init.py
+++ b/yt_dlp/extractor/init.py
@ -1,32 +1,15 @@
-import contextlib
+from ..compat.compat_utils import passthrough_module
 import os
-from ..utils import load_plugins
+passthrough_module(__name__, '.extractors')
-
+del passthrough_module
 _LAZY_LOADER = False
 if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
    with contextlib.suppress(ImportError):
        from .lazy_extractors import *  # noqa: F403
        from .lazy_extractors import _ALL_CLASSES
        _LAZY_LOADER = True
 if not _LAZY_LOADER:
    from ._extractors import *  # noqa: F403
    _ALL_CLASSES = [  # noqa: F811
        klass
        for name, klass in globals().items()
        if name.endswith('IE') and name != 'GenericIE'
    ]
    _ALL_CLASSES.append(GenericIE)  # noqa: F405
 _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
 _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
 def gen_extractor_classes():
    """ Return a list of supported extractors.
    The order does matter; the first extractor matched is the one handling the URL.
    """
    from .extractors import _ALL_CLASSES
    return _ALL_CLASSES
@ -39,10 +22,12 @@ def gen_extractors():
 def list_extractor_classes(age_limit=None):
    """Return a list of extractors that are suitable for the given age, sorted by extractor name"""
    from .generic import GenericIE
    yield from sorted(filter(
-        lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,  # noqa: F405
+        lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
        gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
-    yield GenericIE  # noqa: F405
+    yield GenericIE
 def list_extractors(age_limit=None):
@ -52,4 +37,6 @@ def list_extractors(age_limit=None):
 def get_info_extractor(ie_name):
    """Returns the info extractor class with the given ie_name"""
-    return globals()[ie_name + 'IE']
+    from . import extractors
    return getattr(extractors, f'{ie_name}IE')
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -0,0 +1,23 @@
 import contextlib
 import os
 from ..utils import load_plugins
 _LAZY_LOADER = False
 if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
    with contextlib.suppress(ImportError):
        from .lazy_extractors import *  # noqa: F403
        from .lazy_extractors import _ALL_CLASSES
        _LAZY_LOADER = True
 if not _LAZY_LOADER:
    from ._extractors import *  # noqa: F403
    _ALL_CLASSES = [  # noqa: F811
        klass
        for name, klass in globals().items()
        if name.endswith('IE') and name != 'GenericIE'
    ]
    _ALL_CLASSES.append(GenericIE)  # noqa: F405
 _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
 _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES