From 8b7fb8b60da78b54a518246b251be3d1829fef38 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 3 Oct 2022 16:50:27 +0530
Subject: [PATCH] [extractor] Make search_json able to parse lists

Now `contains_pattern` can be set to `\[.+\]`
---
 yt_dlp/extractor/common.py      | 4 ++--
 yt_dlp/extractor/dropbox.py     | 2 +-
 yt_dlp/extractor/radiofrance.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 11e715871..caec0ccf6 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1227,7 +1227,7 @@ class InfoExtractor:
             return None
 
     def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
-                     contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
+                     contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT, **kwargs):
         """Searches string for the JSON object specified by start_pattern"""
         # NB: end_pattern is only used to reduce the size of the initial match
         if default is NO_DEFAULT:
@@ -1236,7 +1236,7 @@ class InfoExtractor:
             fatal, has_default = False, True
 
         json_string = self._search_regex(
-            rf'(?:{start_pattern})\s*(?P<json>{{\s*(?:{contains_pattern})\s*}})\s*(?:{end_pattern})',
+            rf'(?:{start_pattern})\s*(?P<json>{contains_pattern})\s*(?:{end_pattern})',
             string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
         if not json_string:
             return default
diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index 0d12513b2..54d97a25d 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -54,7 +54,7 @@ class DropboxIE(InfoExtractor):
                 raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
 
         info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
-                                      contains_pattern=r'.+?"preview".+?', end_pattern=r'\)')['props']
+                                      contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
         transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
         formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
 
diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py
index 7b60b2617..38420a15d 100644
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -84,7 +84,7 @@ class FranceCultureIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         # _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
-        video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+')
+        video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'{\s*"@type"\s*:\s*"AudioObject".+}')
 
         return {
             'id': video_id,