[extractor/tiktok] Fix TikTokIE (#4984)

Authored by: bashonly
This commit is contained in:
bashonly 2022-09-21 09:12:54 +00:00 committed by GitHub
parent fada8272b6
commit f7c5a5e967
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -25,7 +25,7 @@ from ..utils import (
class TikTokBaseIE(InfoExtractor): class TikTokBaseIE(InfoExtractor):
_APP_VERSIONS = [('20.9.3', '293'), ('20.4.3', '243'), ('20.2.1', '221'), ('20.1.2', '212'), ('20.0.4', '204')] _APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')]
_WORKING_APP_VERSION = None _WORKING_APP_VERSION = None
_APP_NAME = 'trill' _APP_NAME = 'trill'
_AID = 1180 _AID = 1180
@ -33,7 +33,6 @@ class TikTokBaseIE(InfoExtractor):
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
_WEBPAGE_HOST = 'https://www.tiktok.com/' _WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p') QUALITIES = ('360p', '540p', '720p', '1080p')
_session_initialized = False
@staticmethod @staticmethod
def _create_url(user_id, video_id): def _create_url(user_id, video_id):
@ -43,12 +42,6 @@ class TikTokBaseIE(InfoExtractor):
return self._parse_json(get_element_by_id( return self._parse_json(get_element_by_id(
'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id) 'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
def _real_initialize(self):
if self._session_initialized:
return
self._request_webpage(HEADRequest('https://www.tiktok.com'), None, note='Setting up session', fatal=False)
TikTokBaseIE._session_initialized = True
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160)))
@ -289,7 +282,7 @@ class TikTokBaseIE(InfoExtractor):
'uploader_url': user_url, 'uploader_url': user_url,
'track': music_track, 'track': music_track,
'album': str_or_none(music_info.get('album')) or None, 'album': str_or_none(music_info.get('album')) or None,
'artist': music_author, 'artist': music_author or None,
'timestamp': int_or_none(aweme_detail.get('create_time')), 'timestamp': int_or_none(aweme_detail.get('create_time')),
'formats': formats, 'formats': formats,
'subtitles': self.extract_subtitles(aweme_detail, aweme_id), 'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
@ -522,7 +515,7 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
}, },
'expected_warnings': ['trying feed workaround', 'Unable to find video in feed'] 'skip': 'This video is unavailable',
}, { }, {
# Auto-captions available # Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758', 'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
@ -530,13 +523,6 @@ class TikTokIE(TikTokBaseIE):
}] }]
def _extract_aweme_app(self, aweme_id): def _extract_aweme_app(self, aweme_id):
try:
aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
note='Downloading video details', errnote='Unable to download video details').get('aweme_detail')
if not aweme_detail:
raise ExtractorError('Video not available', video_id=aweme_id)
except ExtractorError as e:
self.report_warning(f'{e.orig_msg}; trying feed workaround')
feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id, feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id,
note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or [] note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or []
aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
@ -572,6 +558,7 @@ class TikTokIE(TikTokBaseIE):
class TikTokUserIE(TikTokBaseIE): class TikTokUserIE(TikTokBaseIE):
IE_NAME = 'tiktok:user' IE_NAME = 'tiktok:user'
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])' _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])'
_WORKING = False
_TESTS = [{ _TESTS = [{
'url': 'https://tiktok.com/@corgibobaa?lang=en', 'url': 'https://tiktok.com/@corgibobaa?lang=en',
'playlist_mincount': 45, 'playlist_mincount': 45,
@ -708,6 +695,7 @@ class TikTokBaseListIE(TikTokBaseIE):
class TikTokSoundIE(TikTokBaseListIE): class TikTokSoundIE(TikTokBaseListIE):
IE_NAME = 'tiktok:sound' IE_NAME = 'tiktok:sound'
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/music/[\w\.-]+-(?P<id>[\d]+)[/?#&]?' _VALID_URL = r'https?://(?:www\.)?tiktok\.com/music/[\w\.-]+-(?P<id>[\d]+)[/?#&]?'
_WORKING = False
_QUERY_NAME = 'music_id' _QUERY_NAME = 'music_id'
_API_ENDPOINT = 'music/aweme' _API_ENDPOINT = 'music/aweme'
_TESTS = [{ _TESTS = [{
@ -731,6 +719,7 @@ class TikTokSoundIE(TikTokBaseListIE):
class TikTokEffectIE(TikTokBaseListIE): class TikTokEffectIE(TikTokBaseListIE):
IE_NAME = 'tiktok:effect' IE_NAME = 'tiktok:effect'
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/sticker/[\w\.-]+-(?P<id>[\d]+)[/?#&]?' _VALID_URL = r'https?://(?:www\.)?tiktok\.com/sticker/[\w\.-]+-(?P<id>[\d]+)[/?#&]?'
_WORKING = False
_QUERY_NAME = 'sticker_id' _QUERY_NAME = 'sticker_id'
_API_ENDPOINT = 'sticker/aweme' _API_ENDPOINT = 'sticker/aweme'
_TESTS = [{ _TESTS = [{
@ -750,6 +739,7 @@ class TikTokEffectIE(TikTokBaseListIE):
class TikTokTagIE(TikTokBaseListIE): class TikTokTagIE(TikTokBaseListIE):
IE_NAME = 'tiktok:tag' IE_NAME = 'tiktok:tag'
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/tag/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?tiktok\.com/tag/(?P<id>[^/?#&]+)'
_WORKING = False
_QUERY_NAME = 'ch_id' _QUERY_NAME = 'ch_id'
_API_ENDPOINT = 'challenge/aweme' _API_ENDPOINT = 'challenge/aweme'
_TESTS = [{ _TESTS = [{