[linuxacadamy] Improve regex
TODO: We need to make a more robust standard regex for fetching js objects from html
This commit is contained in:
parent
9160a0c6a2
commit
037cc66ec8
1 changed files with 13 additions and 3 deletions
|
@ -38,8 +38,8 @@ class LinuxAcademyIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'What Is Data Science',
|
'title': 'What Is Data Science',
|
||||||
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
|
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
|
||||||
'timestamp': 1607387907,
|
'timestamp': int, # The timestamp and upload date changes
|
||||||
'upload_date': '20201208',
|
'upload_date': r're:\d+',
|
||||||
'duration': 304,
|
'duration': 304,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -59,6 +59,16 @@ class LinuxAcademyIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_count': 41,
|
'playlist_count': 41,
|
||||||
'skip': 'Requires Linux Academy account credentials',
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'https://linuxacademy.com/cp/modules/view/id/39',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '39',
|
||||||
|
'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)',
|
||||||
|
'description': 'md5:0f1d3369e90c3fb14a79813b863c902f',
|
||||||
|
'duration': 89280,
|
||||||
|
},
|
||||||
|
'playlist_count': 73,
|
||||||
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
|
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
|
||||||
|
@ -162,7 +172,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||||
if course_id:
|
if course_id:
|
||||||
module = self._parse_json(
|
module = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
|
r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'),
|
||||||
item_id)
|
item_id)
|
||||||
entries = []
|
entries = []
|
||||||
chapter_number = None
|
chapter_number = None
|
||||||
|
|
Loading…
Reference in a new issue