From 8e3fd7e034cdd54972d13394821cd9e55e1c3735 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 26 Sep 2021 20:00:56 +0530 Subject: [PATCH] [reddit] Fix 429 by generating a random `reddit_session` Related: a76e2e0f8898c06939b6a123fa863ab8876cfa20, #1014, https://github.com/ytdl-org/youtube-dl/issues/29986 Original PR: https://github.com/ytdl-org/youtube-dl/pull/30017 Authored by: AjaxGb --- yt_dlp/extractor/reddit.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 14592bc62..e5a1f6920 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -1,5 +1,4 @@ -from __future__ import unicode_literals - +import random from .common import InfoExtractor from ..utils import ( @@ -49,7 +48,7 @@ class RedditIE(InfoExtractor): class RedditRIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?reddit(?:media)?\.com/r/(?P[^/]+/comments/(?P[^/?#&]+))' + _VALID_URL = r'https?://(?P[^/]+\.)?reddit(?:media)?\.com/r/(?P[^/]+/comments/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { @@ -99,13 +98,22 @@ class RedditRIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _gen_session_id(): + id_length = 16 + rand_max = 1 << (id_length * 4) + return '%0.*x' % (id_length, random.randrange(rand_max)) + def _real_extract(self, url): - slug, video_id = self._match_valid_url(url).group('slug', 'id') - - self._set_cookie('reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') - data = self._download_json( - f'https://old.reddit.com/r/{slug}/.json', video_id)[0]['data']['children'][0]['data'] + subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id') + self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id()) + self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') + data = self._download_json(f'https://{subdomain}.reddit.com/r/{slug}/.json', video_id, fatal=False) + if not data: + # Fall back to old.reddit.com in case the requested subdomain fails + data = self._download_json(f'https://old.reddit.com/r/{slug}/.json', video_id) + data = data[0]['data']['children'][0]['data'] video_url = data['url'] # Avoid recursing into the same reddit URL