[networking] Ignore invalid proxies in env (#7704)

Authored by: coletdjnz
This commit is contained in:
coletdjnz 2023-07-28 02:56:02 +12:00 committed by GitHub
parent dae349da97
commit bbeacff7fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 7 deletions

View file

@ -930,10 +930,10 @@ class TestRequestHandlerValidation:
run_validation(handler, False, Request('http://', proxies={'http': None})) run_validation(handler, False, Request('http://', proxies={'http': None}))
run_validation(handler, False, Request('http://'), proxies={'http': None}) run_validation(handler, False, Request('http://'), proxies={'http': None})
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1']) @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True) @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
def test_missing_proxy_scheme(self, handler, proxy_url): def test_invalid_proxy_url(self, handler, proxy_url):
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': 'example.com'})) run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
@pytest.mark.parametrize('handler,extensions,fail', [ @pytest.mark.parametrize('handler,extensions,fail', [
(handler_tests[0], extensions, fail) (handler_tests[0], extensions, fail)
@ -1126,9 +1126,11 @@ class TestYoutubeDLNetworking:
('http', '__noproxy__', None), ('http', '__noproxy__', None),
('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'), ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
('https', 'example.com', 'http://example.com'), ('https', 'example.com', 'http://example.com'),
('https', '//example.com', 'http://example.com'),
('https', 'socks5://example.com', 'socks5h://example.com'), ('https', 'socks5://example.com', 'socks5h://example.com'),
('http', 'socks://example.com', 'socks4://example.com'), ('http', 'socks://example.com', 'socks4://example.com'),
('http', 'socks4://example.com', 'socks4://example.com'), ('http', 'socks4://example.com', 'socks4://example.com'),
('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
]) ])
def test_clean_proxy(self, proxy_key, proxy_url, expected): def test_clean_proxy(self, proxy_key, proxy_url, expected):
# proxies should be cleaned in urlopen() # proxies should be cleaned in urlopen()

View file

@ -262,9 +262,13 @@ class RequestHandler(abc.ABC):
# Skip proxy scheme checks # Skip proxy scheme checks
continue continue
# Scheme-less proxies are not supported try:
if urllib.request._parse_proxy(proxy_url)[0] is None: if urllib.request._parse_proxy(proxy_url)[0] is None:
raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme') # Scheme-less proxies are not supported
raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
except ValueError as e:
# parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
scheme = urllib.parse.urlparse(proxy_url).scheme.lower() scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
if scheme not in self._SUPPORTED_PROXY_SCHEMES: if scheme not in self._SUPPORTED_PROXY_SCHEMES:

View file

@ -98,7 +98,13 @@ def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
continue continue
if proxy_url is not None: if proxy_url is not None:
# Ensure proxies without a scheme are http. # Ensure proxies without a scheme are http.
proxy_scheme = urllib.request._parse_proxy(proxy_url)[0] try:
proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
except ValueError:
# Ignore invalid proxy URLs. Sometimes these may be introduced through environment
# variables unrelated to proxy settings - e.g. Colab `COLAB_LANGUAGE_SERVER_PROXY`.
# If the proxy is going to be used, the Request Handler proxy validation will handle it.
continue
if proxy_scheme is None: if proxy_scheme is None:
proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//') proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')