parent
aedaa455d9
commit
80e8493ee7
2 changed files with 21 additions and 7 deletions
|
@ -2527,6 +2527,21 @@ class GenericIE(InfoExtractor):
|
|||
'upload_date': '20220504',
|
||||
},
|
||||
},
|
||||
{
|
||||
# Webpage contains double BOM
|
||||
'url': 'https://www.filmarkivet.se/movies/paris-d-moll/',
|
||||
'md5': 'df02cadc719dcc63d43288366f037754',
|
||||
'info_dict': {
|
||||
'id': 'paris-d-moll',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220518',
|
||||
'title': 'Paris d-moll',
|
||||
'description': 'md5:319e37ea5542293db37e1e13072fe330',
|
||||
'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg',
|
||||
'timestamp': 1652833414,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
|
|
|
@ -3290,14 +3290,13 @@ def is_html(first_bytes):
|
|||
(b'\xff\xfe', 'utf-16-le'),
|
||||
(b'\xfe\xff', 'utf-16-be'),
|
||||
]
|
||||
for bom, enc in BOMS:
|
||||
if first_bytes.startswith(bom):
|
||||
s = first_bytes[len(bom):].decode(enc, 'replace')
|
||||
break
|
||||
else:
|
||||
s = first_bytes.decode('utf-8', 'replace')
|
||||
|
||||
return re.match(r'^\s*<', s)
|
||||
encoding = 'utf-8'
|
||||
for bom, enc in BOMS:
|
||||
while first_bytes.startswith(bom):
|
||||
encoding, first_bytes = enc, first_bytes[len(bom):]
|
||||
|
||||
return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
|
||||
|
||||
|
||||
def determine_protocol(info_dict):
|
||||
|
|
Loading…
Reference in a new issue