Implement --add-header
without modifying std_headers
Closes #2526, #1614
This commit is contained in:
parent
e48b3875ec
commit
8b7539d27c
10 changed files with 28 additions and 29 deletions
|
@ -737,9 +737,6 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
|||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video (Currently
|
||||
supported only for YouTube)
|
||||
--user-agent UA Specify a custom user agent
|
||||
--referer URL Specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE Specify a custom HTTP header and its value,
|
||||
separated by a colon ":". You can use this
|
||||
option multiple times
|
||||
|
@ -1866,6 +1863,8 @@ While these options are redundant, they are still expected to be used due to the
|
|||
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
|
||||
--min-views COUNT --match-filter "view_count >=? COUNT"
|
||||
--max-views COUNT --match-filter "view_count <=? COUNT"
|
||||
--user-agent UA --add-header "User-Agent:UA"
|
||||
--referer URL --add-header "Referer:URL"
|
||||
|
||||
|
||||
#### Not recommended
|
||||
|
|
|
@ -83,6 +83,7 @@ from .utils import (
|
|||
make_dir,
|
||||
make_HTTPS_handler,
|
||||
MaxDownloadsReached,
|
||||
merge_headers,
|
||||
network_exceptions,
|
||||
number_of_digits,
|
||||
orderedSet,
|
||||
|
@ -332,6 +333,7 @@ class YoutubeDL(object):
|
|||
nocheckcertificate: Do not verify SSL certificates
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
http_headers: A dictionary of custom headers to be used for all requests
|
||||
proxy: URL of the proxy server to use
|
||||
geo_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on geo-restricted sites.
|
||||
|
@ -647,6 +649,9 @@ class YoutubeDL(object):
|
|||
else self.params['format'] if callable(self.params['format'])
|
||||
else self.build_format_selector(self.params['format']))
|
||||
|
||||
# Set http_headers defaults according to std_headers
|
||||
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
if auto_init:
|
||||
|
@ -2250,8 +2255,7 @@ class YoutubeDL(object):
|
|||
return _build_selector_function(parsed_selector)
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = std_headers.copy()
|
||||
res.update(info_dict.get('http_headers') or {})
|
||||
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
||||
|
||||
cookies = self._calc_cookies(info_dict)
|
||||
if cookies:
|
||||
|
|
|
@ -41,6 +41,7 @@ from .utils import (
|
|||
SameFileError,
|
||||
setproctitle,
|
||||
std_headers,
|
||||
traverse_obj,
|
||||
write_string,
|
||||
)
|
||||
from .update import run_update
|
||||
|
@ -75,20 +76,15 @@ def _real_main(argv=None):
|
|||
parser, opts, args = parseOpts(argv)
|
||||
warnings, deprecation_warnings = [], []
|
||||
|
||||
# Set user agent
|
||||
if opts.user_agent is not None:
|
||||
std_headers['User-Agent'] = opts.user_agent
|
||||
|
||||
# Set referer
|
||||
opts.headers.setdefault('User-Agent', opts.user_agent)
|
||||
if opts.referer is not None:
|
||||
std_headers['Referer'] = opts.referer
|
||||
|
||||
# Custom HTTP headers
|
||||
std_headers.update(opts.headers)
|
||||
opts.headers.setdefault('Referer', opts.referer)
|
||||
|
||||
# Dump user agent
|
||||
if opts.dump_user_agent:
|
||||
write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
|
||||
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
|
||||
write_string(f'{ua}\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Batch file verification
|
||||
|
@ -767,6 +763,7 @@ def _real_main(argv=None):
|
|||
'legacyserverconnect': opts.legacy_server_connect,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
'http_headers': opts.headers,
|
||||
'proxy': opts.proxy,
|
||||
'socket_timeout': opts.socket_timeout,
|
||||
'bidi_workaround': opts.bidi_workaround,
|
||||
|
|
|
@ -17,7 +17,6 @@ from ..utils import (
|
|||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
|
@ -503,7 +502,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
|
|||
'%s' % rhx_gis,
|
||||
'',
|
||||
'%s:%s' % (rhx_gis, csrf_token),
|
||||
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
|
||||
'%s:%s:%s' % (rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']),
|
||||
]
|
||||
|
||||
# try all of the ways to generate a GIS query, and not only use the
|
||||
|
|
|
@ -8,7 +8,6 @@ import json
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
std_headers,
|
||||
update_url_query,
|
||||
random_uuidv4,
|
||||
try_get,
|
||||
|
@ -70,7 +69,7 @@ class MildomBaseIE(InfoExtractor):
|
|||
'clu': '',
|
||||
'wh': '1919*810',
|
||||
'rtm': self.iso_timestamp(),
|
||||
'ua': std_headers['User-Agent'],
|
||||
'ua': self.get_param('http_headers')['User-Agent'],
|
||||
}).encode('utf8')).decode('utf8').replace('\n', ''),
|
||||
}).encode('utf8'))
|
||||
self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
|
||||
|
|
|
@ -16,7 +16,6 @@ from ..utils import (
|
|||
ExtractorError,
|
||||
get_exe_version,
|
||||
is_outdated_version,
|
||||
std_headers,
|
||||
Popen,
|
||||
)
|
||||
|
||||
|
@ -208,7 +207,7 @@ class PhantomJSwrapper(object):
|
|||
|
||||
replaces = self.options
|
||||
replaces['url'] = url
|
||||
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
|
||||
user_agent = headers.get('User-Agent') or self.get_param('http_headers')['User-Agent']
|
||||
replaces['ua'] = user_agent.replace('"', '\\"')
|
||||
replaces['jscode'] = jscode
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@ from ..utils import (
|
|||
qualities,
|
||||
remove_end,
|
||||
remove_start,
|
||||
std_headers,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
@ -71,7 +70,7 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||
user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||
self._manager = self._download_json(
|
||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||
None, 'Fetching manager info')['manager']
|
||||
|
|
|
@ -28,7 +28,6 @@ from ..utils import (
|
|||
parse_qs,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
|
@ -758,7 +757,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
url, data = unsmuggle_url(url, {})
|
||||
headers = std_headers.copy()
|
||||
headers = self.get_param('http_headers').copy()
|
||||
if 'http_headers' in data:
|
||||
headers.update(data['http_headers'])
|
||||
if 'Referer' not in headers:
|
||||
|
|
|
@ -860,17 +860,16 @@ def create_parser():
|
|||
workarounds.add_option(
|
||||
'--user-agent',
|
||||
metavar='UA', dest='user_agent',
|
||||
help='Specify a custom user agent')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
workarounds.add_option(
|
||||
'--referer',
|
||||
metavar='URL', dest='referer', default=None,
|
||||
help='Specify a custom referer, use if the video access is restricted to one domain',
|
||||
)
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
workarounds.add_option(
|
||||
'--add-header',
|
||||
metavar='FIELD:VALUE', dest='headers', default={}, type='str',
|
||||
action='callback', callback=_dict_from_options_callback,
|
||||
callback_kwargs={'multiple_keys': False, 'process_key': None},
|
||||
callback_kwargs={'multiple_keys': False},
|
||||
help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times',
|
||||
)
|
||||
workarounds.add_option(
|
||||
|
|
|
@ -1372,7 +1372,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||
if url != url_escaped:
|
||||
req = update_Request(req, url=url_escaped)
|
||||
|
||||
for h, v in std_headers.items():
|
||||
for h, v in self._params.get('http_headers', std_headers).items():
|
||||
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
|
||||
# The dict keys are capitalized because of this bug by urllib
|
||||
if h.capitalize() not in req.headers:
|
||||
|
@ -5436,3 +5436,8 @@ class WebSocketsWrapper():
|
|||
|
||||
|
||||
has_websockets = bool(compat_websockets)
|
||||
|
||||
|
||||
def merge_headers(*dicts):
|
||||
"""Merge dicts of network headers case insensitively, prioritizing the latter ones"""
|
||||
return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
|
||||
|
|
Loading…
Reference in a new issue