[extractor/youtube] Mark videos as fully watched (#4146)

* Also fixes videos appearing as shorts in watch history

Closes #2555
Authored by: Brett824
This commit is contained in:
Brett824 2022-06-23 19:30:17 -04:00 committed by GitHub
parent 34baaced11
commit 06cc8f103b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2643,30 +2643,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return sts return sts
def _mark_watched(self, video_id, player_responses): def _mark_watched(self, video_id, player_responses):
playback_url = get_first( for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), label = 'fully ' if is_full else ''
expected_type=url_or_none) url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
if not playback_url: expected_type=url_or_none)
self.report_warning('Unable to mark watched') if not url:
return self.report_warning(f'Unable to mark {label}watched')
parsed_playback_url = compat_urlparse.urlparse(playback_url) return
qs = compat_urlparse.parse_qs(parsed_playback_url.query) parsed_url = compat_urlparse.urlparse(url)
qs = compat_urlparse.parse_qs(parsed_url.query)
# cpn generation algorithm is reverse engineered from base.js. # cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn. # In fact it works even with dummy cpn.
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
qs.update({ # # more consistent results setting it to right before the end
'ver': ['2'], video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
'cpn': [cpn],
})
playback_url = compat_urlparse.urlunparse(
parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
self._download_webpage( qs.update({
playback_url, video_id, 'Marking watched', 'ver': ['2'],
'Unable to mark watched', fatal=False) 'cpn': [cpn],
'cmt': video_length,
'el': 'detailpage', # otherwise defaults to "shorts"
})
if is_full:
# these seem to mark watchtime "history" in the real world
# they're required, so send in a single value
qs.update({
'st': video_length,
'et': video_length,
})
url = compat_urlparse.urlunparse(
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
self._download_webpage(
url, video_id, f'Marking {label}watched',
'Unable to mark watched', fatal=False)
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):