frdel commited on
Commit
88069e2
·
1 Parent(s): 14104d3

browser headers polishing

Browse files
python/helpers/settings.py CHANGED
@@ -1290,13 +1290,7 @@ def convert_in(settings: dict) -> Settings:
1290
 
1291
  if not should_skip:
1292
  # Special handling for browser_http_headers
1293
- if field["id"] == "browser_http_headers":
1294
- headers_dict = _env_to_dict(field["value"])
1295
- # Validate headers before saving
1296
- validated_headers = _validate_http_headers(headers_dict)
1297
- current[field["id"]] = validated_headers
1298
- PrintStyle().info(f"Set browser_http_headers: {validated_headers}")
1299
- elif field["id"].endswith("_kwargs"):
1300
  current[field["id"]] = _env_to_dict(field["value"])
1301
  elif field["id"].startswith("api_key_"):
1302
  current["api_keys"][field["id"]] = field["value"]
@@ -1632,45 +1626,6 @@ def _dict_to_env(data_dict):
1632
  return "\n".join(lines)
1633
 
1634
 
1635
- def _validate_http_headers(headers: dict[str, str]) -> dict[str, str]:
1636
- """Validate and sanitize HTTP headers for browser requests"""
1637
- valid_headers = {}
1638
-
1639
- # Headers that should not be set manually as they're controlled by the browser
1640
- dangerous_headers = {
1641
- 'host', 'content-length', 'connection', 'upgrade', 'expect',
1642
- 'transfer-encoding', 'te', 'trailer', 'proxy-connection'
1643
- }
1644
-
1645
- for key, value in headers.items():
1646
- # Remove any leading/trailing whitespace
1647
- key = key.strip()
1648
- value = value.strip()
1649
-
1650
- # Skip empty keys or values
1651
- if not key or not value:
1652
- continue
1653
-
1654
- # Check for dangerous headers
1655
- if key.lower() in dangerous_headers:
1656
- PrintStyle().warning(f"Skipping potentially dangerous header: {key}")
1657
- continue
1658
-
1659
- # Basic header name validation (RFC 7230)
1660
- if not re.match(r'^[!#$%&\'*+\-.0-9A-Z^_`a-z|~]+$', key):
1661
- PrintStyle().warning(f"Invalid header name format: {key}")
1662
- continue
1663
-
1664
- # Header value validation - remove control characters except tab
1665
- cleaned_value = re.sub(r'[\x00-\x08\x0A-\x1F\x7F]', '', value)
1666
- if cleaned_value != value:
1667
- PrintStyle().warning(f"Cleaned invalid characters from header value: {key}")
1668
-
1669
- valid_headers[key] = cleaned_value
1670
-
1671
- return valid_headers
1672
-
1673
-
1674
  def set_root_password(password: str):
1675
  if not runtime.is_dockerized():
1676
  raise Exception("root password can only be set in dockerized environments")
 
1290
 
1291
  if not should_skip:
1292
  # Special handling for browser_http_headers
1293
+ if field["id"] == "browser_http_headers" or field["id"].endswith("_kwargs"):
 
 
 
 
 
 
1294
  current[field["id"]] = _env_to_dict(field["value"])
1295
  elif field["id"].startswith("api_key_"):
1296
  current["api_keys"][field["id"]] = field["value"]
 
1626
  return "\n".join(lines)
1627
 
1628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1629
  def set_root_password(password: str):
1630
  if not runtime.is_dockerized():
1631
  raise Exception("root password can only be set in dockerized environments")
python/tools/browser_agent.py CHANGED
@@ -39,18 +39,7 @@ class State:
39
 
40
  # for some reason we need to provide exact path to headless shell, otherwise it looks for headed browser
41
  pw_binary = ensure_playwright_binary()
42
-
43
- # Prepare HTTP headers with error handling
44
- try:
45
- http_headers = self.agent.config.browser_http_headers or {}
46
- if http_headers:
47
- PrintStyle().info(f"Using HTTP headers: {list(http_headers.keys())}")
48
- else:
49
- PrintStyle().info("No custom HTTP headers configured")
50
- except Exception as e:
51
- PrintStyle().warning(f"Error processing HTTP headers, using defaults: {e}")
52
- http_headers = {}
53
-
54
  self.browser_session = browser_use.BrowserSession(
55
  browser_profile=browser_use.BrowserProfile(
56
  headless=True,
@@ -75,8 +64,8 @@ class State:
75
  / "profiles"
76
  / f"agent_{self.agent.context.id}"
77
  ),
78
- extra_http_headers=http_headers,
79
- )
80
  )
81
 
82
  await self.browser_session.start() if self.browser_session else None
 
39
 
40
  # for some reason we need to provide exact path to headless shell, otherwise it looks for headed browser
41
  pw_binary = ensure_playwright_binary()
42
+
 
 
 
 
 
 
 
 
 
 
 
43
  self.browser_session = browser_use.BrowserSession(
44
  browser_profile=browser_use.BrowserProfile(
45
  headless=True,
 
64
  / "profiles"
65
  / f"agent_{self.agent.context.id}"
66
  ),
67
+ extra_http_headers=self.agent.config.browser_http_headers or {},
68
+ )
69
  )
70
 
71
  await self.browser_session.start() if self.browser_session else None