Add option --lazy-playlist
to process entries as they are received
This commit is contained in:
parent
0df111a371
commit
7e9a612585
6 changed files with 97 additions and 66 deletions
|
@ -540,6 +540,11 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||||
bandwidth throttling imposed by a webserver
|
bandwidth throttling imposed by a webserver
|
||||||
(experimental)
|
(experimental)
|
||||||
--playlist-random Download playlist videos in random order
|
--playlist-random Download playlist videos in random order
|
||||||
|
--lazy-playlist Process entries in the playlist as they are
|
||||||
|
received. This disables n_entries,
|
||||||
|
--playlist-random and --playlist-reverse
|
||||||
|
--no-lazy-playlist Process videos in the playlist only after
|
||||||
|
the entire playlist is parsed (default)
|
||||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||||
expected file size
|
expected file size
|
||||||
--hls-use-mpegts Use the mpegts container for HLS videos;
|
--hls-use-mpegts Use the mpegts container for HLS videos;
|
||||||
|
|
|
@ -1046,7 +1046,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
for name, func, expected_eval in (
|
for name, func, expected_eval in (
|
||||||
('list', list_entries, INDICES),
|
('list', list_entries, INDICES),
|
||||||
('Generator', generator_entries, generator_eval),
|
('Generator', generator_entries, generator_eval),
|
||||||
('LazyList', lazylist_entries, generator_eval),
|
# ('LazyList', lazylist_entries, generator_eval), # Generator and LazyList follow the exact same code path
|
||||||
('PagedList', pagedlist_entries, pagedlist_eval),
|
('PagedList', pagedlist_entries, pagedlist_eval),
|
||||||
):
|
):
|
||||||
evaluated = []
|
evaluated = []
|
||||||
|
|
|
@ -242,11 +242,9 @@ class YoutubeDL:
|
||||||
and don't overwrite any file if False
|
and don't overwrite any file if False
|
||||||
For compatibility with youtube-dl,
|
For compatibility with youtube-dl,
|
||||||
"nooverwrites" may also be used instead
|
"nooverwrites" may also be used instead
|
||||||
playliststart: Playlist item to start at.
|
|
||||||
playlistend: Playlist item to end at.
|
|
||||||
playlist_items: Specific indices of playlist to download.
|
playlist_items: Specific indices of playlist to download.
|
||||||
playlistreverse: Download playlist items in reverse order.
|
|
||||||
playlistrandom: Download playlist items in random order.
|
playlistrandom: Download playlist items in random order.
|
||||||
|
lazy_playlist: Process playlist entries as they are received.
|
||||||
matchtitle: Download only matching titles.
|
matchtitle: Download only matching titles.
|
||||||
rejecttitle: Reject downloads for matching titles.
|
rejecttitle: Reject downloads for matching titles.
|
||||||
logger: Log messages to a logging.Logger instance.
|
logger: Log messages to a logging.Logger instance.
|
||||||
|
@ -469,6 +467,12 @@ class YoutubeDL:
|
||||||
|
|
||||||
The following options are deprecated and may be removed in the future:
|
The following options are deprecated and may be removed in the future:
|
||||||
|
|
||||||
|
playliststart: - Use playlist_items
|
||||||
|
Playlist item to start at.
|
||||||
|
playlistend: - Use playlist_items
|
||||||
|
Playlist item to end at.
|
||||||
|
playlistreverse: - Use playlist_items
|
||||||
|
Download playlist items in reverse order.
|
||||||
forceurl: - Use forceprint
|
forceurl: - Use forceprint
|
||||||
Force printing final URL.
|
Force printing final URL.
|
||||||
forcetitle: - Use forceprint
|
forcetitle: - Use forceprint
|
||||||
|
@ -1671,16 +1675,26 @@ class YoutubeDL:
|
||||||
self.to_screen(f'[download] Downloading playlist: {title}')
|
self.to_screen(f'[download] Downloading playlist: {title}')
|
||||||
|
|
||||||
all_entries = PlaylistEntries(self, ie_result)
|
all_entries = PlaylistEntries(self, ie_result)
|
||||||
entries = orderedSet(all_entries.get_requested_items())
|
entries = orderedSet(all_entries.get_requested_items(), lazy=True)
|
||||||
ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
|
|
||||||
n_entries, ie_result['playlist_count'] = len(entries), all_entries.full_count
|
lazy = self.params.get('lazy_playlist')
|
||||||
|
if lazy:
|
||||||
|
resolved_entries, n_entries = [], 'N/A'
|
||||||
|
ie_result['requested_entries'], ie_result['entries'] = None, None
|
||||||
|
else:
|
||||||
|
entries = resolved_entries = list(entries)
|
||||||
|
n_entries = len(resolved_entries)
|
||||||
|
ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
|
||||||
|
if not ie_result.get('playlist_count'):
|
||||||
|
# Better to do this after potentially exhausting entries
|
||||||
|
ie_result['playlist_count'] = all_entries.get_full_count()
|
||||||
|
|
||||||
_infojson_written = False
|
_infojson_written = False
|
||||||
write_playlist_files = self.params.get('allow_playlist_files', True)
|
write_playlist_files = self.params.get('allow_playlist_files', True)
|
||||||
if write_playlist_files and self.params.get('list_thumbnails'):
|
if write_playlist_files and self.params.get('list_thumbnails'):
|
||||||
self.list_thumbnails(ie_result)
|
self.list_thumbnails(ie_result)
|
||||||
if write_playlist_files and not self.params.get('simulate'):
|
if write_playlist_files and not self.params.get('simulate'):
|
||||||
ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
|
ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
|
||||||
_infojson_written = self._write_info_json(
|
_infojson_written = self._write_info_json(
|
||||||
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
|
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
|
||||||
if _infojson_written is None:
|
if _infojson_written is None:
|
||||||
|
@ -1691,9 +1705,12 @@ class YoutubeDL:
|
||||||
# TODO: This should be passed to ThumbnailsConvertor if necessary
|
# TODO: This should be passed to ThumbnailsConvertor if necessary
|
||||||
self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
|
self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
|
||||||
|
|
||||||
if self.params.get('playlistreverse', False):
|
if lazy:
|
||||||
entries = entries[::-1]
|
if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
|
||||||
if self.params.get('playlistrandom', False):
|
self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
|
||||||
|
elif self.params.get('playlistreverse'):
|
||||||
|
entries.reverse()
|
||||||
|
elif self.params.get('playlistrandom'):
|
||||||
random.shuffle(entries)
|
random.shuffle(entries)
|
||||||
|
|
||||||
self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
|
self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
|
||||||
|
@ -1701,23 +1718,27 @@ class YoutubeDL:
|
||||||
|
|
||||||
failures = 0
|
failures = 0
|
||||||
max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
|
max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
|
||||||
for i, (playlist_index, entry) in enumerate(entries, 1):
|
for i, (playlist_index, entry) in enumerate(entries):
|
||||||
|
if lazy:
|
||||||
|
resolved_entries.append((playlist_index, entry))
|
||||||
|
|
||||||
# TODO: Add auto-generated fields
|
# TODO: Add auto-generated fields
|
||||||
if self._match_entry(entry, incomplete=True) is not None:
|
if self._match_entry(entry, incomplete=True) is not None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if 'playlist-index' in self.params.get('compat_opts', []):
|
|
||||||
playlist_index = ie_result['requested_entries'][i - 1]
|
|
||||||
self.to_screen('[download] Downloading video %s of %s' % (
|
self.to_screen('[download] Downloading video %s of %s' % (
|
||||||
self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
|
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
|
||||||
|
|
||||||
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
|
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
|
||||||
|
if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
|
||||||
|
playlist_index = ie_result['requested_entries'][i]
|
||||||
|
|
||||||
entry_result = self.__process_iterable_entry(entry, download, {
|
entry_result = self.__process_iterable_entry(entry, download, {
|
||||||
'n_entries': n_entries,
|
'n_entries': int_or_none(n_entries),
|
||||||
'__last_playlist_index': max(ie_result['requested_entries']),
|
'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
|
||||||
'playlist_count': ie_result.get('playlist_count'),
|
'playlist_count': ie_result.get('playlist_count'),
|
||||||
'playlist_index': playlist_index,
|
'playlist_index': playlist_index,
|
||||||
'playlist_autonumber': i,
|
'playlist_autonumber': i + 1,
|
||||||
'playlist': title,
|
'playlist': title,
|
||||||
'playlist_id': ie_result.get('id'),
|
'playlist_id': ie_result.get('id'),
|
||||||
'playlist_title': ie_result.get('title'),
|
'playlist_title': ie_result.get('title'),
|
||||||
|
@ -1735,10 +1756,10 @@ class YoutubeDL:
|
||||||
self.report_error(
|
self.report_error(
|
||||||
f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
|
f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
|
||||||
break
|
break
|
||||||
entries[i - 1] = (playlist_index, entry_result)
|
resolved_entries[i] = (playlist_index, entry_result)
|
||||||
|
|
||||||
# Update with processed data
|
# Update with processed data
|
||||||
ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
|
ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
|
||||||
|
|
||||||
# Write the updated info to json
|
# Write the updated info to json
|
||||||
if _infojson_written is True and self._write_info_json(
|
if _infojson_written is True and self._write_info_json(
|
||||||
|
|
|
@ -434,6 +434,9 @@ def validate_options(opts):
|
||||||
setattr(opts, opt1, default)
|
setattr(opts, opt1, default)
|
||||||
|
|
||||||
# Conflicting options
|
# Conflicting options
|
||||||
|
report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random')
|
||||||
|
report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist')
|
||||||
|
report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist')
|
||||||
report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None)
|
report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None)
|
||||||
report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None)
|
report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None)
|
||||||
report_conflict('--exec-before-download', 'exec_before_dl_cmd',
|
report_conflict('--exec-before-download', 'exec_before_dl_cmd',
|
||||||
|
@ -740,6 +743,7 @@ def parse_options(argv=None):
|
||||||
'playlistend': opts.playlistend,
|
'playlistend': opts.playlistend,
|
||||||
'playlistreverse': opts.playlist_reverse,
|
'playlistreverse': opts.playlist_reverse,
|
||||||
'playlistrandom': opts.playlist_random,
|
'playlistrandom': opts.playlist_random,
|
||||||
|
'lazy_playlist': opts.lazy_playlist,
|
||||||
'noplaylist': opts.noplaylist,
|
'noplaylist': opts.noplaylist,
|
||||||
'logtostderr': opts.outtmpl.get('default') == '-',
|
'logtostderr': opts.outtmpl.get('default') == '-',
|
||||||
'consoletitle': opts.consoletitle,
|
'consoletitle': opts.consoletitle,
|
||||||
|
|
|
@ -888,7 +888,7 @@ def create_parser():
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
'--playlist-reverse',
|
'--playlist-reverse',
|
||||||
action='store_true',
|
action='store_true', dest='playlist_reverse',
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
'--no-playlist-reverse',
|
'--no-playlist-reverse',
|
||||||
|
@ -896,8 +896,16 @@ def create_parser():
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
'--playlist-random',
|
'--playlist-random',
|
||||||
action='store_true',
|
action='store_true', dest='playlist_random',
|
||||||
help='Download playlist videos in random order')
|
help='Download playlist videos in random order')
|
||||||
|
downloader.add_option(
|
||||||
|
'--lazy-playlist',
|
||||||
|
action='store_true', dest='lazy_playlist',
|
||||||
|
help='Process entries in the playlist as they are received. This disables n_entries, --playlist-random and --playlist-reverse')
|
||||||
|
downloader.add_option(
|
||||||
|
'--no-lazy-playlist',
|
||||||
|
action='store_false', dest='lazy_playlist',
|
||||||
|
help='Process videos in the playlist only after the entire playlist is parsed (default)')
|
||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
'--xattr-set-filesize',
|
'--xattr-set-filesize',
|
||||||
dest='xattr_set_filesize', action='store_true',
|
dest='xattr_set_filesize', action='store_true',
|
||||||
|
|
|
@ -770,13 +770,16 @@ def expand_path(s):
|
||||||
return os.path.expandvars(compat_expanduser(s))
|
return os.path.expandvars(compat_expanduser(s))
|
||||||
|
|
||||||
|
|
||||||
def orderedSet(iterable):
|
def orderedSet(iterable, *, lazy=False):
|
||||||
""" Remove all duplicates from the input iterable """
|
"""Remove all duplicates from the input iterable"""
|
||||||
res = []
|
def _iter():
|
||||||
for el in iterable:
|
seen = [] # Do not use set since the items can be unhashable
|
||||||
if el not in res:
|
for x in iterable:
|
||||||
res.append(el)
|
if x not in seen:
|
||||||
return res
|
seen.append(x)
|
||||||
|
yield x
|
||||||
|
|
||||||
|
return _iter() if lazy else list(_iter())
|
||||||
|
|
||||||
|
|
||||||
def _htmlentity_transform(entity_with_semicolon):
|
def _htmlentity_transform(entity_with_semicolon):
|
||||||
|
@ -2820,7 +2823,26 @@ class PlaylistEntries:
|
||||||
is_exhausted = False
|
is_exhausted = False
|
||||||
|
|
||||||
def __init__(self, ydl, info_dict):
|
def __init__(self, ydl, info_dict):
|
||||||
self.ydl, self.info_dict = ydl, info_dict
|
self.ydl = ydl
|
||||||
|
|
||||||
|
# _entries must be assigned now since infodict can change during iteration
|
||||||
|
entries = info_dict.get('entries')
|
||||||
|
if entries is None:
|
||||||
|
raise EntryNotInPlaylist('There are no entries')
|
||||||
|
elif isinstance(entries, list):
|
||||||
|
self.is_exhausted = True
|
||||||
|
|
||||||
|
requested_entries = info_dict.get('requested_entries')
|
||||||
|
self.is_incomplete = bool(requested_entries)
|
||||||
|
if self.is_incomplete:
|
||||||
|
assert self.is_exhausted
|
||||||
|
self._entries = [self.MissingEntry] * max(requested_entries)
|
||||||
|
for i, entry in zip(requested_entries, entries):
|
||||||
|
self._entries[i - 1] = entry
|
||||||
|
elif isinstance(entries, (list, PagedList, LazyList)):
|
||||||
|
self._entries = entries
|
||||||
|
else:
|
||||||
|
self._entries = LazyList(entries)
|
||||||
|
|
||||||
PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
|
PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
|
||||||
(?P<start>[+-]?\d+)?
|
(?P<start>[+-]?\d+)?
|
||||||
|
@ -2863,37 +2885,13 @@ class PlaylistEntries:
|
||||||
except (ExistingVideoReached, RejectedVideoReached):
|
except (ExistingVideoReached, RejectedVideoReached):
|
||||||
return
|
return
|
||||||
|
|
||||||
@property
|
def get_full_count(self):
|
||||||
def full_count(self):
|
if self.is_exhausted and not self.is_incomplete:
|
||||||
if self.info_dict.get('playlist_count'):
|
|
||||||
return self.info_dict['playlist_count']
|
|
||||||
elif self.is_exhausted and not self.is_incomplete:
|
|
||||||
return len(self)
|
return len(self)
|
||||||
elif isinstance(self._entries, InAdvancePagedList):
|
elif isinstance(self._entries, InAdvancePagedList):
|
||||||
if self._entries._pagesize == 1:
|
if self._entries._pagesize == 1:
|
||||||
return self._entries._pagecount
|
return self._entries._pagecount
|
||||||
|
|
||||||
@functools.cached_property
|
|
||||||
def _entries(self):
|
|
||||||
entries = self.info_dict.get('entries')
|
|
||||||
if entries is None:
|
|
||||||
raise EntryNotInPlaylist('There are no entries')
|
|
||||||
elif isinstance(entries, list):
|
|
||||||
self.is_exhausted = True
|
|
||||||
|
|
||||||
indices = self.info_dict.get('requested_entries')
|
|
||||||
self.is_incomplete = bool(indices)
|
|
||||||
if self.is_incomplete:
|
|
||||||
assert self.is_exhausted
|
|
||||||
ret = [self.MissingEntry] * max(indices)
|
|
||||||
for i, entry in zip(indices, entries):
|
|
||||||
ret[i - 1] = entry
|
|
||||||
return ret
|
|
||||||
|
|
||||||
if isinstance(entries, (list, PagedList, LazyList)):
|
|
||||||
return entries
|
|
||||||
return LazyList(entries)
|
|
||||||
|
|
||||||
@functools.cached_property
|
@functools.cached_property
|
||||||
def _getter(self):
|
def _getter(self):
|
||||||
if isinstance(self._entries, list):
|
if isinstance(self._entries, list):
|
||||||
|
@ -2937,17 +2935,12 @@ class PlaylistEntries:
|
||||||
if i < 0:
|
if i < 0:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
try:
|
entry = self._getter(i)
|
||||||
entry = self._getter(i)
|
except self.IndexError:
|
||||||
except self.IndexError:
|
self.is_exhausted = True
|
||||||
self.is_exhausted = True
|
if step > 0:
|
||||||
if step > 0:
|
|
||||||
break
|
|
||||||
continue
|
|
||||||
except IndexError:
|
|
||||||
if self.is_exhausted:
|
|
||||||
break
|
break
|
||||||
raise
|
continue
|
||||||
yield i + 1, entry
|
yield i + 1, entry
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
|
Loading…
Reference in a new issue