Karim shoair commited on
Commit ·
7e18800
1
Parent(s): 574271a
fix: improve checking for valid proxy and valid CDP URL
Browse files
scrapling/engines/toolbelt/navigation.py
CHANGED
|
@@ -65,16 +65,24 @@ def construct_proxy_dict(
|
|
| 65 |
"""
|
| 66 |
if isinstance(proxy_string, str):
|
| 67 |
proxy = urlparse(proxy_string)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
try:
|
| 69 |
result = {
|
| 70 |
-
"server": f"{proxy.scheme}://{proxy.hostname}
|
| 71 |
"username": proxy.username or "",
|
| 72 |
"password": proxy.password or "",
|
| 73 |
}
|
|
|
|
|
|
|
| 74 |
return tuple(result.items()) if as_tuple else result
|
| 75 |
except ValueError:
|
| 76 |
# Urllib will say that one of the parameters above can't be casted to the correct type like `int` for port etc...
|
| 77 |
-
raise
|
| 78 |
|
| 79 |
elif isinstance(proxy_string, dict):
|
| 80 |
try:
|
|
@@ -106,6 +114,13 @@ def construct_cdp_url(cdp_url: str, query_params: Optional[Dict] = None) -> str:
|
|
| 106 |
if not parsed.netloc:
|
| 107 |
raise ValueError("Invalid hostname for the CDP URL")
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
# Ensure the path starts with /
|
| 110 |
path = parsed.path
|
| 111 |
if not path.startswith("/"):
|
|
|
|
| 65 |
"""
|
| 66 |
if isinstance(proxy_string, str):
|
| 67 |
proxy = urlparse(proxy_string)
|
| 68 |
+
if (
|
| 69 |
+
proxy.scheme not in ("http", "https", "socks4", "socks5")
|
| 70 |
+
or not proxy.hostname
|
| 71 |
+
):
|
| 72 |
+
raise ValueError("Invalid proxy string!")
|
| 73 |
+
|
| 74 |
try:
|
| 75 |
result = {
|
| 76 |
+
"server": f"{proxy.scheme}://{proxy.hostname}",
|
| 77 |
"username": proxy.username or "",
|
| 78 |
"password": proxy.password or "",
|
| 79 |
}
|
| 80 |
+
if proxy.port:
|
| 81 |
+
result["server"] += f":{proxy.port}"
|
| 82 |
return tuple(result.items()) if as_tuple else result
|
| 83 |
except ValueError:
|
| 84 |
# Urllib will say that one of the parameters above can't be casted to the correct type like `int` for port etc...
|
| 85 |
+
raise ValueError("The proxy argument's string is in invalid format!")
|
| 86 |
|
| 87 |
elif isinstance(proxy_string, dict):
|
| 88 |
try:
|
|
|
|
| 114 |
if not parsed.netloc:
|
| 115 |
raise ValueError("Invalid hostname for the CDP URL")
|
| 116 |
|
| 117 |
+
try:
|
| 118 |
+
# Checking if the port is valid (if available)
|
| 119 |
+
_ = parsed.port
|
| 120 |
+
except ValueError:
|
| 121 |
+
# urlparse will raise `ValueError` if the port can't be casted to integer
|
| 122 |
+
raise ValueError("Invalid port for the CDP URL")
|
| 123 |
+
|
| 124 |
# Ensure the path starts with /
|
| 125 |
path = parsed.path
|
| 126 |
if not path.startswith("/"):
|