[ie/twitter] Extract stale tweets (#8724)
Closes #8691 Authored by: bashonly
This commit is contained in:
parent
00a3e47bf5
commit
1c54a98e19
1 changed files with 61 additions and 24 deletions
|
@ -479,9 +479,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'_old_archive_ids': ['twitter 643211948184596480'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
|
||||||
|
@ -515,6 +515,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': ['TV', 'StarWars', 'TheForceAwakens'],
|
'tags': ['TV', 'StarWars', 'TheForceAwakens'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 665052190608723968'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
|
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
|
||||||
|
@ -558,9 +559,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['Damndaniel'],
|
'tags': ['Damndaniel'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 700207533655363584'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
||||||
|
@ -599,9 +600,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 719944021058060289'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
|
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
|
||||||
|
@ -616,6 +617,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
},
|
},
|
||||||
'add_ie': ['Periscope'],
|
'add_ie': ['Periscope'],
|
||||||
|
'skip': 'Broadcast not found',
|
||||||
}, {
|
}, {
|
||||||
# has mp4 formats via mobile API
|
# has mp4 formats via mobile API
|
||||||
'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
|
'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
|
||||||
|
@ -635,9 +637,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'_old_archive_ids': ['twitter 852138619213144067'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||||
|
@ -657,9 +659,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['Maria'],
|
'tags': ['Maria'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 910031516746514432'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
|
@ -683,9 +685,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1001551623938805763'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True, # requires ffmpeg
|
'skip_download': True, # requires ffmpeg
|
||||||
|
@ -749,6 +751,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1349794411333394432'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -771,18 +774,18 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1577855540407197696'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1577719286659006464',
|
'id': '1577719286659006464',
|
||||||
'title': 'Ultima📛| New Era - Test',
|
'title': 'Ultima - Test',
|
||||||
'description': 'Test https://t.co/Y3KEZD7Dad',
|
'description': 'Test https://t.co/Y3KEZD7Dad',
|
||||||
'uploader': 'Ultima📛| New Era',
|
'uploader': 'Ultima',
|
||||||
'uploader_id': 'UltimaShadowX',
|
'uploader_id': 'UltimaShadowX',
|
||||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||||
'upload_date': '20221005',
|
'upload_date': '20221005',
|
||||||
|
@ -813,9 +816,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'tags': ['HurricaneIan'],
|
'tags': ['HurricaneIan'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1575560063510810624'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Adult content, fails if not logged in
|
# Adult content, fails if not logged in
|
||||||
|
@ -951,10 +954,10 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
||||||
'display_id': '1600649710662213632',
|
'display_id': '1600649710662213632',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
|
||||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||||
'upload_date': '20221208',
|
'upload_date': '20221208',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1600649710662213632'],
|
||||||
},
|
},
|
||||||
'params': {'noplaylist': True},
|
'params': {'noplaylist': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -979,7 +982,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'_old_archive_ids': ['twitter 1621117700482416640'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
||||||
|
@ -995,13 +998,13 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'duration': 9.531,
|
'duration': 9.531,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
|
||||||
'upload_date': '20221203',
|
'upload_date': '20221203',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'timestamp': 1670092210.0,
|
'timestamp': 1670092210.0,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'uploader': '\u06ea',
|
'uploader': '\u06ea',
|
||||||
'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
|
'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
|
||||||
|
'_old_archive_ids': ['twitter 1599108751385972737'],
|
||||||
},
|
},
|
||||||
'params': {'noplaylist': True},
|
'params': {'noplaylist': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -1012,7 +1015,6 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader_url': 'https://twitter.com/MunTheShinobi',
|
'uploader_url': 'https://twitter.com/MunTheShinobi',
|
||||||
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
|
||||||
'view_count': int,
|
|
||||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'uploader': 'Mün',
|
'uploader': 'Mün',
|
||||||
|
@ -1025,6 +1027,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader_id': 'MunTheShinobi',
|
'uploader_id': 'MunTheShinobi',
|
||||||
'duration': 139.987,
|
'duration': 139.987,
|
||||||
'timestamp': 1670306984.0,
|
'timestamp': 1670306984.0,
|
||||||
|
'_old_archive_ids': ['twitter 1600009574919962625'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# retweeted_status (private)
|
# retweeted_status (private)
|
||||||
|
@ -1068,8 +1071,8 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'view_count': int,
|
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# retweeted_status w/ legacy API
|
# retweeted_status w/ legacy API
|
||||||
|
@ -1091,18 +1094,24 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'_old_archive_ids': ['twitter 1695424220702888009'],
|
||||||
},
|
},
|
||||||
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
|
||||||
}, {
|
}, {
|
||||||
# Broadcast embedded in tweet
|
# Broadcast embedded in tweet
|
||||||
'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
|
'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1yNGaNLjEblJj',
|
'id': '1rmxPMjLzAXKN',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
|
'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
|
||||||
'uploader': 'Jessica Dobson',
|
'uploader': 'Jessica Dobson',
|
||||||
'uploader_id': '1DZEoDwDovRQa',
|
'uploader_id': 'JessicaDobsonWX',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'uploader_url': 'https://twitter.com/JessicaDobsonWX',
|
||||||
|
'timestamp': 1701566398,
|
||||||
|
'upload_date': '20231203',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
|
||||||
|
'concurrent_view_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'add_ie': ['TwitterBroadcast'],
|
'add_ie': ['TwitterBroadcast'],
|
||||||
|
@ -1125,6 +1134,30 @@ class TwitterIE(TwitterBaseIE):
|
||||||
},
|
},
|
||||||
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
|
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
|
||||||
'expected_warnings': ['Not all metadata'],
|
'expected_warnings': ['Not all metadata'],
|
||||||
|
}, {
|
||||||
|
# "stale tweet" with typename "TweetWithVisibilityResults"
|
||||||
|
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
|
||||||
|
'md5': '62b1e11cdc2cdd0e527f83adb081f536',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1724883339285544960',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
|
||||||
|
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
|
||||||
|
'display_id': '1724884212803834154',
|
||||||
|
'uploader': 'Robert F. Kennedy Jr',
|
||||||
|
'uploader_id': 'RobertKennedyJr',
|
||||||
|
'uploader_url': 'https://twitter.com/RobertKennedyJr',
|
||||||
|
'upload_date': '20231115',
|
||||||
|
'timestamp': 1700079417.0,
|
||||||
|
'duration': 341.048,
|
||||||
|
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
|
||||||
|
'tags': ['Kennedy24'],
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
'_old_archive_ids': ['twitter 1724884212803834154'],
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
|
@ -1179,19 +1212,23 @@ class TwitterIE(TwitterBaseIE):
|
||||||
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
|
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
|
||||||
data, ('tweetResult', 'result', {dict}), default={})
|
data, ('tweetResult', 'result', {dict}), default={})
|
||||||
|
|
||||||
if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
|
typename = result.get('__typename')
|
||||||
self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
|
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
|
||||||
|
self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
|
||||||
|
|
||||||
if 'tombstone' in result:
|
if 'tombstone' in result:
|
||||||
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
|
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
|
||||||
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
||||||
elif result.get('__typename') == 'TweetUnavailable':
|
elif typename == 'TweetUnavailable':
|
||||||
reason = result.get('reason')
|
reason = result.get('reason')
|
||||||
if reason == 'NsfwLoggedOut':
|
if reason == 'NsfwLoggedOut':
|
||||||
self.raise_login_required('NSFW tweet requires authentication')
|
self.raise_login_required('NSFW tweet requires authentication')
|
||||||
elif reason == 'Protected':
|
elif reason == 'Protected':
|
||||||
self.raise_login_required('You are not authorized to view this protected tweet')
|
self.raise_login_required('You are not authorized to view this protected tweet')
|
||||||
raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
|
raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
|
||||||
|
# Result for "stale tweet" needs additional transformation
|
||||||
|
elif typename == 'TweetWithVisibilityResults':
|
||||||
|
result = traverse_obj(result, ('tweet', {dict})) or {}
|
||||||
|
|
||||||
status = result.get('legacy', {})
|
status = result.get('legacy', {})
|
||||||
status.update(traverse_obj(result, {
|
status.update(traverse_obj(result, {
|
||||||
|
@ -1377,7 +1414,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
|
'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
|
||||||
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
|
||||||
# The codec of http formats are unknown
|
# The codec of http formats are unknown
|
||||||
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
'_format_sort_fields': ('res', 'br', 'size', 'proto'),
|
||||||
|
|
Loading…
Reference in a new issue