You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
62 lines
1.5 KiB
62 lines
1.5 KiB
"""tldextract helpers for testing and fetching remote resources."""
|
|
|
|
import re
|
|
import socket
|
|
from urllib.parse import scheme_chars
|
|
|
|
IP_RE = re.compile(
|
|
r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.)"
|
|
r"{3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
|
|
)
|
|
|
|
scheme_chars_set = set(scheme_chars)
|
|
|
|
|
|
def lenient_netloc(url: str) -> str:
|
|
"""Extract the netloc of a URL-like string.
|
|
|
|
Similar to the netloc attribute returned by
|
|
urllib.parse.{urlparse,urlsplit}, but extract more leniently, without
|
|
raising errors.
|
|
"""
|
|
return (
|
|
_schemeless_url(url)
|
|
.partition("/")[0]
|
|
.partition("?")[0]
|
|
.partition("#")[0]
|
|
.rpartition("@")[-1]
|
|
.partition(":")[0]
|
|
.strip()
|
|
.rstrip(".\u3002\uff0e\uff61")
|
|
)
|
|
|
|
|
|
def _schemeless_url(url: str) -> str:
|
|
double_slashes_start = url.find("//")
|
|
if double_slashes_start == 0:
|
|
return url[2:]
|
|
if (
|
|
double_slashes_start < 2
|
|
or not url[double_slashes_start - 1] == ":"
|
|
or set(url[: double_slashes_start - 1]) - scheme_chars_set
|
|
):
|
|
return url
|
|
return url[double_slashes_start + 2 :]
|
|
|
|
|
|
def looks_like_ip(maybe_ip: str) -> bool:
|
|
"""Check whether the given str looks like an IP address."""
|
|
if not maybe_ip[0].isdigit():
|
|
return False
|
|
|
|
try:
|
|
socket.inet_aton(maybe_ip)
|
|
return True
|
|
except (AttributeError, UnicodeError):
|
|
if IP_RE.match(maybe_ip):
|
|
return True
|
|
except OSError:
|
|
pass
|
|
|
|
return False
|