Spaces:
Paused
Paused
frdel commited on
Commit ·
88069e2
1
Parent(s): 14104d3
browser headers polishing
Browse files- python/helpers/settings.py +1 -46
- python/tools/browser_agent.py +3 -14
python/helpers/settings.py
CHANGED
|
@@ -1290,13 +1290,7 @@ def convert_in(settings: dict) -> Settings:
|
|
| 1290 |
|
| 1291 |
if not should_skip:
|
| 1292 |
# Special handling for browser_http_headers
|
| 1293 |
-
if field["id"] == "browser_http_headers":
|
| 1294 |
-
headers_dict = _env_to_dict(field["value"])
|
| 1295 |
-
# Validate headers before saving
|
| 1296 |
-
validated_headers = _validate_http_headers(headers_dict)
|
| 1297 |
-
current[field["id"]] = validated_headers
|
| 1298 |
-
PrintStyle().info(f"Set browser_http_headers: {validated_headers}")
|
| 1299 |
-
elif field["id"].endswith("_kwargs"):
|
| 1300 |
current[field["id"]] = _env_to_dict(field["value"])
|
| 1301 |
elif field["id"].startswith("api_key_"):
|
| 1302 |
current["api_keys"][field["id"]] = field["value"]
|
|
@@ -1632,45 +1626,6 @@ def _dict_to_env(data_dict):
|
|
| 1632 |
return "\n".join(lines)
|
| 1633 |
|
| 1634 |
|
| 1635 |
-
def _validate_http_headers(headers: dict[str, str]) -> dict[str, str]:
|
| 1636 |
-
"""Validate and sanitize HTTP headers for browser requests"""
|
| 1637 |
-
valid_headers = {}
|
| 1638 |
-
|
| 1639 |
-
# Headers that should not be set manually as they're controlled by the browser
|
| 1640 |
-
dangerous_headers = {
|
| 1641 |
-
'host', 'content-length', 'connection', 'upgrade', 'expect',
|
| 1642 |
-
'transfer-encoding', 'te', 'trailer', 'proxy-connection'
|
| 1643 |
-
}
|
| 1644 |
-
|
| 1645 |
-
for key, value in headers.items():
|
| 1646 |
-
# Remove any leading/trailing whitespace
|
| 1647 |
-
key = key.strip()
|
| 1648 |
-
value = value.strip()
|
| 1649 |
-
|
| 1650 |
-
# Skip empty keys or values
|
| 1651 |
-
if not key or not value:
|
| 1652 |
-
continue
|
| 1653 |
-
|
| 1654 |
-
# Check for dangerous headers
|
| 1655 |
-
if key.lower() in dangerous_headers:
|
| 1656 |
-
PrintStyle().warning(f"Skipping potentially dangerous header: {key}")
|
| 1657 |
-
continue
|
| 1658 |
-
|
| 1659 |
-
# Basic header name validation (RFC 7230)
|
| 1660 |
-
if not re.match(r'^[!#$%&\'*+\-.0-9A-Z^_`a-z|~]+$', key):
|
| 1661 |
-
PrintStyle().warning(f"Invalid header name format: {key}")
|
| 1662 |
-
continue
|
| 1663 |
-
|
| 1664 |
-
# Header value validation - remove control characters except tab
|
| 1665 |
-
cleaned_value = re.sub(r'[\x00-\x08\x0A-\x1F\x7F]', '', value)
|
| 1666 |
-
if cleaned_value != value:
|
| 1667 |
-
PrintStyle().warning(f"Cleaned invalid characters from header value: {key}")
|
| 1668 |
-
|
| 1669 |
-
valid_headers[key] = cleaned_value
|
| 1670 |
-
|
| 1671 |
-
return valid_headers
|
| 1672 |
-
|
| 1673 |
-
|
| 1674 |
def set_root_password(password: str):
|
| 1675 |
if not runtime.is_dockerized():
|
| 1676 |
raise Exception("root password can only be set in dockerized environments")
|
|
|
|
| 1290 |
|
| 1291 |
if not should_skip:
|
| 1292 |
# Special handling for browser_http_headers
|
| 1293 |
+
if field["id"] == "browser_http_headers" or field["id"].endswith("_kwargs"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1294 |
current[field["id"]] = _env_to_dict(field["value"])
|
| 1295 |
elif field["id"].startswith("api_key_"):
|
| 1296 |
current["api_keys"][field["id"]] = field["value"]
|
|
|
|
| 1626 |
return "\n".join(lines)
|
| 1627 |
|
| 1628 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1629 |
def set_root_password(password: str):
|
| 1630 |
if not runtime.is_dockerized():
|
| 1631 |
raise Exception("root password can only be set in dockerized environments")
|
python/tools/browser_agent.py
CHANGED
|
@@ -39,18 +39,7 @@ class State:
|
|
| 39 |
|
| 40 |
# for some reason we need to provide exact path to headless shell, otherwise it looks for headed browser
|
| 41 |
pw_binary = ensure_playwright_binary()
|
| 42 |
-
|
| 43 |
-
# Prepare HTTP headers with error handling
|
| 44 |
-
try:
|
| 45 |
-
http_headers = self.agent.config.browser_http_headers or {}
|
| 46 |
-
if http_headers:
|
| 47 |
-
PrintStyle().info(f"Using HTTP headers: {list(http_headers.keys())}")
|
| 48 |
-
else:
|
| 49 |
-
PrintStyle().info("No custom HTTP headers configured")
|
| 50 |
-
except Exception as e:
|
| 51 |
-
PrintStyle().warning(f"Error processing HTTP headers, using defaults: {e}")
|
| 52 |
-
http_headers = {}
|
| 53 |
-
|
| 54 |
self.browser_session = browser_use.BrowserSession(
|
| 55 |
browser_profile=browser_use.BrowserProfile(
|
| 56 |
headless=True,
|
|
@@ -75,8 +64,8 @@ class State:
|
|
| 75 |
/ "profiles"
|
| 76 |
/ f"agent_{self.agent.context.id}"
|
| 77 |
),
|
| 78 |
-
extra_http_headers=
|
| 79 |
-
|
| 80 |
)
|
| 81 |
|
| 82 |
await self.browser_session.start() if self.browser_session else None
|
|
|
|
| 39 |
|
| 40 |
# for some reason we need to provide exact path to headless shell, otherwise it looks for headed browser
|
| 41 |
pw_binary = ensure_playwright_binary()
|
| 42 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
self.browser_session = browser_use.BrowserSession(
|
| 44 |
browser_profile=browser_use.BrowserProfile(
|
| 45 |
headless=True,
|
|
|
|
| 64 |
/ "profiles"
|
| 65 |
/ f"agent_{self.agent.context.id}"
|
| 66 |
),
|
| 67 |
+
extra_http_headers=self.agent.config.browser_http_headers or {},
|
| 68 |
+
)
|
| 69 |
)
|
| 70 |
|
| 71 |
await self.browser_session.start() if self.browser_session else None
|