From 070f6a85ea8d9c8c75dd77b7c2415bb8c78ab082 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Fri, 24 Dec 2021 22:25:44 +0000 Subject: [PATCH] [Steam] Fix extractor (#2029) Closes #1992 Authored by: u-spec-png --- yt_dlp/extractor/steam.py | 146 +++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 82 deletions(-) diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py index 7f777c40b..4ed0fb592 100644 --- a/yt_dlp/extractor/steam.py +++ b/yt_dlp/extractor/steam.py @@ -7,14 +7,13 @@ from ..utils import ( extract_attributes, ExtractorError, get_element_by_class, - js_to_json, ) class SteamIE(InfoExtractor): _VALID_URL = r"""(?x) - https?://store\.steampowered\.com/ - (agecheck/)? + https?://(?:store\.steampowered|steamcommunity)\.com/ + (?:agecheck/)? (?Pvideo|app)/ #If the page is only for videos or for a game (?P\d+)/? (?P\d*)(?P\??) # For urltype == video we sometimes get the videoID @@ -26,22 +25,25 @@ class SteamIE(InfoExtractor): _TESTS = [{ 'url': 'http://store.steampowered.com/video/105600/', 'playlist': [ + { + 'md5': '695242613303ffa2a4c44c9374ddc067', + 'info_dict': { + 'id': '256785003', + 'ext': 'mp4', + 'title': 'Terraria video 256785003', + 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com', + 'n_entries': 2, + } + }, { 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592', 'info_dict': { 'id': '2040428', 'ext': 'mp4', - 'title': 'Terraria 1.3 Trailer', - 'playlist_index': 1, - } - }, - { - 'md5': '911672b20064ca3263fa89650ba5a7aa', - 'info_dict': { - 'id': '2029566', - 'ext': 'mp4', - 'title': 'Terraria 1.2 Trailer', + 'title': 'Terraria video 2040428', 'playlist_index': 2, + 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com', + 'n_entries': 2, } } ], @@ -53,96 +55,76 @@ class SteamIE(InfoExtractor): 'playlistend': 2, } }, { - 'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205', + 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/', 'info_dict': { - 'id': 'X8kpJBlzD2E', + 'id': '256757115', + 'title': 'Grand Theft Auto V video 256757115', 'ext': 'mp4', - 'upload_date': '20140617', - 'title': 'FRONTIERS - Trapping', - 'description': 'md5:bf6f7f773def614054089e5769c12a6e', - 'uploader': 'AAD Productions', - 'uploader_id': 'AtomicAgeDogGames', - } + 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com', + 'n_entries': 20, + }, }] def _real_extract(self, url): m = self._match_valid_url(url) fileID = m.group('fileID') if fileID: - videourl = url + video_url = url playlist_id = fileID else: gameID = m.group('gameID') playlist_id = gameID - videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id + video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id - self._set_cookie('steampowered.com', 'mature_content', '1') + self._set_cookie('steampowered.com', 'wants_mature_content', '1') + self._set_cookie('steampowered.com', 'birthtime', '944006401') + self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000') - webpage = self._download_webpage(videourl, playlist_id) + webpage = self._download_webpage(video_url, playlist_id) - if re.search('

Please enter your birth date to continue:

', webpage) is not None: - videourl = self._AGECHECK_TEMPLATE % playlist_id + if re.search(']+>Please enter your birth date to continue:', webpage) is not None: + video_url = self._AGECHECK_TEMPLATE % playlist_id self.report_age_confirmation() - webpage = self._download_webpage(videourl, playlist_id) + webpage = self._download_webpage(video_url, playlist_id) - flash_vars = self._parse_json(self._search_regex( - r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage, - 'flash vars'), playlist_id, js_to_json) - - playlist_title = None + videos = re.findall(r'(]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage) entries = [] - if fileID: - playlist_title = get_element_by_class('workshopItemTitle', webpage) - for movie in flash_vars.values(): - if not movie: - continue - youtube_id = movie.get('YOUTUBE_VIDEO_ID') - if not youtube_id: - continue + playlist_title = get_element_by_class('apphub_AppName', webpage) + for movie, movie_id in videos: + if not movie: + continue + movie = extract_attributes(movie) + if not movie_id: + continue + entry = { + 'id': movie_id, + 'title': f'{playlist_title} video {movie_id}', + } + formats = [] + if movie: + entry['thumbnail'] = movie.get('data-poster') + for quality in ('', '-hd'): + for ext in ('webm', 'mp4'): + video_url = movie.get('data-%s%s-source' % (ext, quality)) + if video_url: + formats.append({ + 'format_id': ext + quality, + 'url': video_url, + }) + self._sort_formats(formats) + entry['formats'] = formats + entries.append(entry) + embedded_videos = re.findall(r'(]+>)', webpage) + for evideos in embedded_videos: + evideos = extract_attributes(evideos).get('src') + video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None) + if video_id: entries.append({ - '_type': 'url', - 'url': youtube_id, + '_type': 'url_transparent', + 'id': video_id, + 'url': video_id, 'ie_key': 'Youtube', }) - else: - playlist_title = get_element_by_class('apphub_AppName', webpage) - for movie_id, movie in flash_vars.items(): - if not movie: - continue - video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False) - title = movie.get('MOVIE_NAME') - if not title or not video_id: - continue - entry = { - 'id': video_id, - 'title': title.replace('+', ' '), - } - formats = [] - flv_url = movie.get('FILENAME') - if flv_url: - formats.append({ - 'format_id': 'flv', - 'url': flv_url, - }) - highlight_element = self._search_regex( - r'(]+id="highlight_movie_%s"[^>]+>)' % video_id, - webpage, 'highlight element', fatal=False) - if highlight_element: - highlight_attribs = extract_attributes(highlight_element) - if highlight_attribs: - entry['thumbnail'] = highlight_attribs.get('data-poster') - for quality in ('', '-hd'): - for ext in ('webm', 'mp4'): - video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality)) - if video_url: - formats.append({ - 'format_id': ext + quality, - 'url': video_url, - }) - if not formats and not self.get_param('ignore_no_formats'): - continue - entry['formats'] = formats - entries.append(entry) if not entries: raise ExtractorError('Could not find any videos')