[ie/weibo] Fix extraction (#8463)

Closes #8445 Authored by: c-basalt
2023-11-11 15:02:59 -05:00 · 2023-11-11 15:02:59 -05:00 · 15b252dfd2
commit 15b252dfd2
parent 312a2d1e8b
1 changed files with 19 additions and 9 deletions
--- a/yt_dlp/extractor/weibo.py
+++ b/yt_dlp/extractor/weibo.py
@ -1,3 +1,4 @@
 import json
 import random
 import itertools
 import urllib.parse
@ -18,24 +19,33 @@ from ..utils import (
 class WeiboBaseIE(InfoExtractor):
-    def _update_visitor_cookies(self, video_id):
+    def _update_visitor_cookies(self, visitor_url, video_id):
        headers = {'Referer': visitor_url}
        chrome_ver = self._search_regex(
            r'Chrome/(\d+)', self.get_param('http_headers')['User-Agent'], 'user agent version', default='90')
        visitor_data = self._download_json(
            'https://passport.weibo.com/visitor/genvisitor', video_id,
            note='Generating first-visit guest request',
-            transform_source=strip_jsonp,
+            headers=headers, transform_source=strip_jsonp,
            data=urlencode_postdata({
                'cb': 'gen_callback',
-                'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}',
+                'fp': json.dumps({
-            }))
+                    'os': '1',
                    'browser': f'Chrome{chrome_ver},0,0,0',
                    'fonts': 'undefined',
                    'screenInfo': '1920*1080*24',
                    'plugins': ''
                }, separators=(',', ':'))}))['data']
        self._download_webpage(
            'https://passport.weibo.com/visitor/visitor', video_id,
            note='Running first-visit callback to get guest cookies',
-            query={
+            headers=headers, query={
                'a': 'incarnate',
-                't': visitor_data['data']['tid'],
+                't': visitor_data['tid'],
-                'w': 2,
+                'w': 3 if visitor_data.get('new_tid') else 2,
-                'c': '%03d' % visitor_data['data']['confidence'],
+                'c': f'{visitor_data.get("confidence", 100):03d}',
                'gc': '',
                'cb': 'cross_domain',
                'from': 'weibo',
                '_rand': random.random(),
@ -44,7 +54,7 @@ class WeiboBaseIE(InfoExtractor):
    def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
        webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
        if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
-            self._update_visitor_cookies(video_id)
+            self._update_visitor_cookies(urlh.url, video_id)
            webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
        return self._parse_json(webpage, video_id, fatal=fatal)