Index: Python-2.7.18/Lib/BaseHTTPServer.py =================================================================== --- Python-2.7.18.orig/Lib/BaseHTTPServer.py +++ Python-2.7.18/Lib/BaseHTTPServer.py @@ -287,6 +287,14 @@ class BaseHTTPRequestHandler(SocketServe return False self.command, self.path, self.request_version = command, path, version + # CVE-2021-28861: The purpose of replacing '//' with '/' is to + # protect against open redirect attacks possibly triggered if the + # path starts with '//' because http clients treat //path as an + # absolute URI without scheme (similar to http://path) rather than + # a path. + if self.path.startswith('//'): + self.path = '/' + self.path.lstrip('/') # Reduce to a single / + # Examine the headers and look for a Connection directive self.headers = self.MessageClass(self.rfile, 0) Index: Python-2.7.18/Lib/test/test_httpservers.py =================================================================== --- Python-2.7.18.orig/Lib/test/test_httpservers.py +++ Python-2.7.18/Lib/test/test_httpservers.py @@ -417,6 +417,52 @@ class SimpleHTTPServerTestCase(BaseTestC self.assertEqual(response.getheader("Location"), self.tempdir_name + "/?hi=1") + def test_get_dir_redirect_location_domain_injection_bug(self): + """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. + //netloc/ in a Location header is a redirect to a new host. + https://github.com/python/cpython/issues/87389 + This checks that a path resolving to a directory on our server cannot + resolve into a redirect to another server. + """ + os.mkdir(os.path.join(self.tempdir, 'existing_directory')) + url = '/python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../' + self.tempdir_name + '/existing_directory' + expected_location = url + '/' # /python.org.../ single slash single prefix, trailing slash + # Canonicalizes to /tmp/tempdir_name/existing_directory which does + # exist and is a dir, triggering the 301 redirect logic. + response = self.request(url) + self.check_status_and_reason(response, 301) + location = response.getheader('Location') + self.assertEqual(location, expected_location, msg='non-attack failed!') + + # //python.org... multi-slash prefix, no trailing slash + attack_url = '/' + url + response = self.request(attack_url) + self.check_status_and_reason(response, 301) + location = response.getheader('Location') + self.assertFalse(location.startswith('//'), msg=location) + self.assertEqual(location, expected_location, + msg='Expected Location header to start with a single / and ' + 'end with a / as this is a directory redirect.') + # ///python.org... triple-slash prefix, no trailing slash + attack3_url = '//' + url + response = self.request(attack3_url) + self.check_status_and_reason(response, 301) + self.assertEqual(response.getheader('Location'), expected_location) + + # If the second word in the http request (Request-URI for the http + # method) is a full URI, we don't worry about it, as that'll be parsed + # and reassembled as a full URI within BaseHTTPRequestHandler.send_head + # so no errant scheme-less //netloc//evil.co/ domain mixup can happen. + attack_scheme_netloc_2slash_url = 'https://pypi.org/' + url + expected_scheme_netloc_location = attack_scheme_netloc_2slash_url + '/' + response = self.request(attack_scheme_netloc_2slash_url) + self.check_status_and_reason(response, 301) + location = response.getheader('Location') + # We're just ensuring that the scheme and domain make it through, if + # there are or aren't multiple slashes at the start of the path that + # follows that isn't important in this Location: header. + self.assertTrue(location.startswith('https://pypi.org/'), msg=location) + cgi_file1 = """\ #!%s