[utils] LenientJSONDecoder
: Parse unclosed objects
This commit is contained in:
parent
da8e2912b1
commit
cc09083636
1 changed files with 30 additions and 8 deletions
|
@ -593,21 +593,43 @@ def clean_html(html):
|
||||||
|
|
||||||
|
|
||||||
class LenientJSONDecoder(json.JSONDecoder):
|
class LenientJSONDecoder(json.JSONDecoder):
|
||||||
def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
|
# TODO: Write tests
|
||||||
|
def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
|
||||||
self.transform_source, self.ignore_extra = transform_source, ignore_extra
|
self.transform_source, self.ignore_extra = transform_source, ignore_extra
|
||||||
|
self._close_attempts = 2 * close_objects
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _close_object(err):
|
||||||
|
doc = err.doc[:err.pos]
|
||||||
|
# We need to add comma first to get the correct error message
|
||||||
|
if err.msg.startswith('Expecting \',\''):
|
||||||
|
return doc + ','
|
||||||
|
elif not doc.endswith(','):
|
||||||
|
return
|
||||||
|
|
||||||
|
if err.msg.startswith('Expecting property name'):
|
||||||
|
return doc[:-1] + '}'
|
||||||
|
elif err.msg.startswith('Expecting value'):
|
||||||
|
return doc[:-1] + ']'
|
||||||
|
|
||||||
def decode(self, s):
|
def decode(self, s):
|
||||||
if self.transform_source:
|
if self.transform_source:
|
||||||
s = self.transform_source(s)
|
s = self.transform_source(s)
|
||||||
|
for attempt in range(self._close_attempts + 1):
|
||||||
try:
|
try:
|
||||||
if self.ignore_extra:
|
if self.ignore_extra:
|
||||||
return self.raw_decode(s.lstrip())[0]
|
return self.raw_decode(s.lstrip())[0]
|
||||||
return super().decode(s)
|
return super().decode(s)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
if e.pos is not None:
|
if e.pos is None:
|
||||||
raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
|
|
||||||
raise
|
raise
|
||||||
|
elif attempt < self._close_attempts:
|
||||||
|
s = self._close_object(e)
|
||||||
|
if s is not None:
|
||||||
|
continue
|
||||||
|
raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
|
||||||
|
assert False, 'Too many attempts to decode JSON'
|
||||||
|
|
||||||
|
|
||||||
def sanitize_open(filename, open_mode):
|
def sanitize_open(filename, open_mode):
|
||||||
|
|
Loading…
Reference in a new issue