[ie/youtube] Raise a warning for Incomplete Data
instead of an error (#8238)
Closes https://github.com/yt-dlp/yt-dlp/issues/8206 Adds `raise_incomplete_data` extractor arg to revert this behaviour and raise an error. Authored by: coletdjnz Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
This commit is contained in:
parent
c54ddfba0f
commit
eb5bdbfa70
2 changed files with 20 additions and 7 deletions
|
@ -1809,6 +1809,7 @@ The following extractors use this feature:
|
||||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
||||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||||
* `innertube_key`: Innertube API key to use for all API requests
|
* `innertube_key`: Innertube API key to use for all API requests
|
||||||
|
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||||
|
|
||||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||||
|
|
|
@ -941,7 +941,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
|
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
|
||||||
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
|
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
|
||||||
default_client='web'):
|
default_client='web'):
|
||||||
for retry in self.RetryManager():
|
raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
|
||||||
|
# Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
|
||||||
|
icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
|
||||||
|
icd_rm = next(icd_retries)
|
||||||
|
main_retries = iter(self.RetryManager())
|
||||||
|
main_rm = next(main_retries)
|
||||||
|
for _ in range(main_rm.retries + icd_rm.retries + 1):
|
||||||
try:
|
try:
|
||||||
response = self._call_api(
|
response = self._call_api(
|
||||||
ep=ep, fatal=True, headers=headers,
|
ep=ep, fatal=True, headers=headers,
|
||||||
|
@ -953,7 +959,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
if not isinstance(e.cause, network_exceptions):
|
if not isinstance(e.cause, network_exceptions):
|
||||||
return self._error_or_warning(e, fatal=fatal)
|
return self._error_or_warning(e, fatal=fatal)
|
||||||
elif not isinstance(e.cause, HTTPError):
|
elif not isinstance(e.cause, HTTPError):
|
||||||
retry.error = e
|
main_rm.error = e
|
||||||
|
next(main_retries)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
first_bytes = e.cause.response.read(512)
|
first_bytes = e.cause.response.read(512)
|
||||||
|
@ -965,27 +972,32 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
if yt_error:
|
if yt_error:
|
||||||
self._report_alerts([('ERROR', yt_error)], fatal=False)
|
self._report_alerts([('ERROR', yt_error)], fatal=False)
|
||||||
# Downloading page may result in intermittent 5xx HTTP error
|
# Downloading page may result in intermittent 5xx HTTP error
|
||||||
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||||
# We also want to catch all other network exceptions since errors in later pages can be troublesome
|
# We also want to catch all other network exceptions since errors in later pages can be troublesome
|
||||||
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
|
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
|
||||||
if e.cause.status not in (403, 429):
|
if e.cause.status not in (403, 429):
|
||||||
retry.error = e
|
main_rm.error = e
|
||||||
|
next(main_retries)
|
||||||
continue
|
continue
|
||||||
return self._error_or_warning(e, fatal=fatal)
|
return self._error_or_warning(e, fatal=fatal)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._extract_and_report_alerts(response, only_once=True)
|
self._extract_and_report_alerts(response, only_once=True)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
# YouTube servers may return errors we want to retry on in a 200 OK response
|
# YouTube's servers may return errors we want to retry on in a 200 OK response
|
||||||
# See: https://github.com/yt-dlp/yt-dlp/issues/839
|
# See: https://github.com/yt-dlp/yt-dlp/issues/839
|
||||||
if 'unknown error' in e.msg.lower():
|
if 'unknown error' in e.msg.lower():
|
||||||
retry.error = e
|
main_rm.error = e
|
||||||
|
next(main_retries)
|
||||||
continue
|
continue
|
||||||
return self._error_or_warning(e, fatal=fatal)
|
return self._error_or_warning(e, fatal=fatal)
|
||||||
# Youtube sometimes sends incomplete data
|
# Youtube sometimes sends incomplete data
|
||||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||||
if not traverse_obj(response, *variadic(check_get_keys)):
|
if not traverse_obj(response, *variadic(check_get_keys)):
|
||||||
retry.error = ExtractorError('Incomplete data received', expected=True)
|
icd_rm.error = ExtractorError('Incomplete data received', expected=True)
|
||||||
|
should_retry = next(icd_retries, None)
|
||||||
|
if not should_retry:
|
||||||
|
return None
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
Loading…
Reference in a new issue