[youtube:tab] Fix duration extraction for shorts (#3171)

Related: https://github.com/TeamNewPipe/NewPipe/issues/8034
Authored-by: coletdjnz
This commit is contained in:
coletdev 2022-03-28 13:49:42 +13:00 committed by GitHub
parent 47b8bf207b
commit 1c1b2f96ae
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 14 additions and 6 deletions

View file

@ -627,6 +627,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('3h 11m 53s'), 11513) self.assertEqual(parse_duration('3h 11m 53s'), 11513)
self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513)
self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513)
self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('62m45s'), 3765)
self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('6m59s'), 419)
self.assertEqual(parse_duration('49s'), 49) self.assertEqual(parse_duration('49s'), 49)

View file

@ -807,6 +807,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
description = self._get_text(renderer, 'descriptionSnippet') description = self._get_text(renderer, 'descriptionSnippet')
duration = parse_duration(self._get_text( duration = parse_duration(self._get_text(
renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text'))) renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
if duration is None:
duration = parse_duration(self._search_regex(
r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
video_id, default=None, group='duration'))
view_count = self._get_count(renderer, 'viewCountText') view_count = self._get_count(renderer, 'viewCountText')
uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')

View file

@ -2644,23 +2644,23 @@ def parse_duration(s):
m = re.match( m = re.match(
r'''(?ix)(?:P? r'''(?ix)(?:P?
(?: (?:
[0-9]+\s*y(?:ears?)?\s* [0-9]+\s*y(?:ears?)?,?\s*
)? )?
(?: (?:
[0-9]+\s*m(?:onths?)?\s* [0-9]+\s*m(?:onths?)?,?\s*
)? )?
(?: (?:
[0-9]+\s*w(?:eeks?)?\s* [0-9]+\s*w(?:eeks?)?,?\s*
)? )?
(?: (?:
(?P<days>[0-9]+)\s*d(?:ays?)?\s* (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
)? )?
T)? T)?
(?: (?:
(?P<hours>[0-9]+)\s*h(?:ours?)?\s* (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
)? )?
(?: (?:
(?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
)? )?
(?: (?:
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*