[extractor/udemy] Fix lectures that have no URL and detect DRM
Closes #5662
This commit is contained in:
parent
9bb856998b
commit
8d1ddb0805
1 changed files with 15 additions and 5 deletions
|
@ -11,8 +11,10 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unsmuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
@ -106,7 +108,7 @@ class UdemyIE(InfoExtractor):
|
||||||
% (course_id, lecture_id),
|
% (course_id, lecture_id),
|
||||||
lecture_id, 'Downloading lecture JSON', query={
|
lecture_id, 'Downloading lecture JSON', query={
|
||||||
'fields[lecture]': 'title,description,view_html,asset',
|
'fields[lecture]': 'title,description,view_html,asset',
|
||||||
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
|
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed',
|
||||||
})
|
})
|
||||||
|
|
||||||
def _handle_error(self, response):
|
def _handle_error(self, response):
|
||||||
|
@ -199,9 +201,11 @@ class UdemyIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lecture_id = self._match_id(url)
|
lecture_id = self._match_id(url)
|
||||||
|
course_id = unsmuggle_url(url, {})[1].get('course_id')
|
||||||
|
|
||||||
|
webpage = None
|
||||||
|
if not course_id:
|
||||||
webpage = self._download_webpage(url, lecture_id)
|
webpage = self._download_webpage(url, lecture_id)
|
||||||
|
|
||||||
course_id, _ = self._extract_course_info(webpage, lecture_id)
|
course_id, _ = self._extract_course_info(webpage, lecture_id)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -209,6 +213,7 @@ class UdemyIE(InfoExtractor):
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
# Error could possibly mean we are not enrolled in the course
|
# Error could possibly mean we are not enrolled in the course
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
webpage = webpage or self._download_webpage(url, lecture_id)
|
||||||
self._enroll_course(url, webpage, course_id)
|
self._enroll_course(url, webpage, course_id)
|
||||||
lecture = self._download_lecture(course_id, lecture_id)
|
lecture = self._download_lecture(course_id, lecture_id)
|
||||||
else:
|
else:
|
||||||
|
@ -391,6 +396,9 @@ class UdemyIE(InfoExtractor):
|
||||||
if f.get('url'):
|
if f.get('url'):
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
|
if not formats and asset.get('course_is_drmed'):
|
||||||
|
self.report_drm(video_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -449,7 +457,9 @@ class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE
|
||||||
if lecture_id:
|
if lecture_id:
|
||||||
entry = {
|
entry = {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
|
'url': smuggle_url(
|
||||||
|
f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}',
|
||||||
|
{'course_id': course_id}),
|
||||||
'title': entry.get('title'),
|
'title': entry.get('title'),
|
||||||
'ie_key': UdemyIE.ie_key(),
|
'ie_key': UdemyIE.ie_key(),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue