From 1c1b2f96ae9696ef16b1b27d1a007bf89c683a0c Mon Sep 17 00:00:00 2001 From: coletdev Date: Mon, 28 Mar 2022 13:49:42 +1300 Subject: [PATCH] [youtube:tab] Fix duration extraction for shorts (#3171) Related: https://github.com/TeamNewPipe/NewPipe/issues/8034 Authored-by: coletdjnz --- test/test_utils.py | 2 ++ yt_dlp/extractor/youtube.py | 6 ++++++ yt_dlp/utils.py | 12 ++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 660ce03bf..31f168998 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -627,6 +627,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('3h 11m 53s'), 11513) self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) + self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513) + self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4d7e79fbf..e5097c264 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -807,6 +807,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): description = self._get_text(renderer, 'descriptionSnippet') duration = parse_duration(self._get_text( renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text'))) + if duration is None: + duration = parse_duration(self._search_regex( + r'(?i)(ago)(?!.*\1)\s+(?P[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$', + traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str), + video_id, default=None, group='duration')) + view_count = self._get_count(renderer, 'viewCountText') uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 6854dbb63..72f11691f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2644,23 +2644,23 @@ def parse_duration(s): m = re.match( r'''(?ix)(?:P? (?: - [0-9]+\s*y(?:ears?)?\s* + [0-9]+\s*y(?:ears?)?,?\s* )? (?: - [0-9]+\s*m(?:onths?)?\s* + [0-9]+\s*m(?:onths?)?,?\s* )? (?: - [0-9]+\s*w(?:eeks?)?\s* + [0-9]+\s*w(?:eeks?)?,?\s* )? (?: - (?P[0-9]+)\s*d(?:ays?)?\s* + (?P[0-9]+)\s*d(?:ays?)?,?\s* )? T)? (?: - (?P[0-9]+)\s*h(?:ours?)?\s* + (?P[0-9]+)\s*h(?:ours?)?,?\s* )? (?: - (?P[0-9]+)\s*m(?:in(?:ute)?s?)?\s* + (?P[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s* )? (?: (?P[0-9]+)(?P\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*