changeset: 69366:495d12196487 branch: 3.1 parent: 69362:f5b9ad73157c user: Senthil Kumaran date: Fri Apr 15 18:20:24 2011 +0800 files: Lib/test/test_urlparse.py Lib/urllib/parse.py Misc/NEWS description: Issue #11467: Fix urlparse behavior when handling urls which contains scheme specific part only digits. diff -r f5b9ad73157c -r 495d12196487 Lib/test/test_urlparse.py --- a/Lib/test/test_urlparse.py Fri Apr 15 08:25:16 2011 +0300 +++ b/Lib/test/test_urlparse.py Fri Apr 15 18:20:24 2011 +0800 @@ -197,6 +197,11 @@ #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') #self.checkJoin(RFC1808_BASE, 'http:', 'http:') + def test_RFC2368(self): + # Issue 11467: path that starts with a number is not parsed correctly + self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'), + ('mailto', '', '1337@example.org', '', '', '')) + def test_RFC2396(self): # cases from RFC 2396 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') diff -r f5b9ad73157c -r 495d12196487 Lib/urllib/parse.py --- a/Lib/urllib/parse.py Fri Apr 15 08:25:16 2011 +0300 +++ b/Lib/urllib/parse.py Fri Apr 15 18:20:24 2011 +0800 @@ -184,12 +184,17 @@ v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v - if url.endswith(':') or not url[i+1].isdigit(): - for c in url[:i]: - if c not in scheme_chars: - break - else: + for c in url[:i]: + if c not in scheme_chars: + break + else: + try: + # make sure "url" is not actually a port number (in which case + # "scheme" is really part of the path + _testportnum = int(url[i+1:]) + except ValueError: scheme, url = url[:i].lower(), url[i+1:] + if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if allow_fragments and scheme in uses_fragment and '#' in url: diff -r f5b9ad73157c -r 495d12196487 Misc/NEWS --- a/Misc/NEWS Fri Apr 15 08:25:16 2011 +0300 +++ b/Misc/NEWS Fri Apr 15 18:20:24 2011 +0800 @@ -51,6 +51,9 @@ Library ------- +- Issue #11467: Fix urlparse behavior when handling urls which contains scheme + specific part only digits. Patch by Santoso Wijaya. + - Issue #11474: Fix the bug with url2pathname() handling of '/C|/' on Windows. Patch by Santoso Wijaya.