Karim shoair commited on
Commit
7e18800
·
1 Parent(s): 574271a

fix: improve checking for valid proxy and valid CDP URL

Browse files
scrapling/engines/toolbelt/navigation.py CHANGED
@@ -65,16 +65,24 @@ def construct_proxy_dict(
65
  """
66
  if isinstance(proxy_string, str):
67
  proxy = urlparse(proxy_string)
 
 
 
 
 
 
68
  try:
69
  result = {
70
- "server": f"{proxy.scheme}://{proxy.hostname}:{proxy.port}",
71
  "username": proxy.username or "",
72
  "password": proxy.password or "",
73
  }
 
 
74
  return tuple(result.items()) if as_tuple else result
75
  except ValueError:
76
  # Urllib will say that one of the parameters above can't be casted to the correct type like `int` for port etc...
77
- raise TypeError("The proxy argument's string is in invalid format!")
78
 
79
  elif isinstance(proxy_string, dict):
80
  try:
@@ -106,6 +114,13 @@ def construct_cdp_url(cdp_url: str, query_params: Optional[Dict] = None) -> str:
106
  if not parsed.netloc:
107
  raise ValueError("Invalid hostname for the CDP URL")
108
 
 
 
 
 
 
 
 
109
  # Ensure the path starts with /
110
  path = parsed.path
111
  if not path.startswith("/"):
 
65
  """
66
  if isinstance(proxy_string, str):
67
  proxy = urlparse(proxy_string)
68
+ if (
69
+ proxy.scheme not in ("http", "https", "socks4", "socks5")
70
+ or not proxy.hostname
71
+ ):
72
+ raise ValueError("Invalid proxy string!")
73
+
74
  try:
75
  result = {
76
+ "server": f"{proxy.scheme}://{proxy.hostname}",
77
  "username": proxy.username or "",
78
  "password": proxy.password or "",
79
  }
80
+ if proxy.port:
81
+ result["server"] += f":{proxy.port}"
82
  return tuple(result.items()) if as_tuple else result
83
  except ValueError:
84
  # Urllib will say that one of the parameters above can't be casted to the correct type like `int` for port etc...
85
+ raise ValueError("The proxy argument's string is in invalid format!")
86
 
87
  elif isinstance(proxy_string, dict):
88
  try:
 
114
  if not parsed.netloc:
115
  raise ValueError("Invalid hostname for the CDP URL")
116
 
117
+ try:
118
+ # Checking if the port is valid (if available)
119
+ _ = parsed.port
120
+ except ValueError:
121
+ # urlparse will raise `ValueError` if the port can't be casted to integer
122
+ raise ValueError("Invalid port for the CDP URL")
123
+
124
  # Ensure the path starts with /
125
  path = parsed.path
126
  if not path.startswith("/"):