meirk-brd commited on
Commit
2c4cae4
·
1 Parent(s): ebf4777

handle gradio file wrapping

Browse files
Files changed (1) hide show
  1. tool.py +32 -2
tool.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
 
3
  import json
4
  import os
 
5
 
6
  import requests
7
  from smolagents.tools import Tool
@@ -25,7 +26,19 @@ class BrightDataScraperTool(Tool):
25
  }
26
  super().__init__()
27
 
28
- def forward(self, url: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
29
  api_token = os.getenv("BRIGHT_DATA_API_TOKEN")
30
  unlocker_zone = os.getenv("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker1")
31
 
@@ -39,7 +52,7 @@ class BrightDataScraperTool(Tool):
39
  }
40
 
41
  payload = {
42
- "url": url,
43
  "zone": unlocker_zone,
44
  "format": "raw",
45
  "data_format": "markdown",
@@ -52,3 +65,20 @@ class BrightDataScraperTool(Tool):
52
  except requests.exceptions.RequestException as exc:
53
  details = exc.response.text if getattr(exc, "response", None) is not None else ""
54
  return json.dumps({"error": str(exc), "details": details})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import json
4
  import os
5
+ from typing import Optional, Tuple
6
 
7
  import requests
8
  from smolagents.tools import Tool
 
26
  }
27
  super().__init__()
28
 
29
+ def forward(self, url) -> str:
30
+ url_str, file_path = self._coerce_url_input(url)
31
+
32
+ if file_path:
33
+ try:
34
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
35
+ return f.read()
36
+ except OSError as exc:
37
+ return json.dumps({"error": f"Failed to read uploaded file: {exc}"})
38
+
39
+ if not url_str:
40
+ return json.dumps({"error": "No valid URL provided"})
41
+
42
  api_token = os.getenv("BRIGHT_DATA_API_TOKEN")
43
  unlocker_zone = os.getenv("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker1")
44
 
 
52
  }
53
 
54
  payload = {
55
+ "url": url_str,
56
  "zone": unlocker_zone,
57
  "format": "raw",
58
  "data_format": "markdown",
 
65
  except requests.exceptions.RequestException as exc:
66
  details = exc.response.text if getattr(exc, "response", None) is not None else ""
67
  return json.dumps({"error": str(exc), "details": details})
68
+
69
+ def _coerce_url_input(self, raw) -> Tuple[Optional[str], Optional[str]]:
70
+ if isinstance(raw, str):
71
+ return raw, None
72
+
73
+ if isinstance(raw, dict):
74
+ file_path = raw.get("path") or raw.get("name")
75
+ if file_path and os.path.isfile(file_path):
76
+ return None, file_path
77
+
78
+ orig_name = raw.get("orig_name")
79
+ if isinstance(orig_name, str) and orig_name:
80
+ if orig_name.startswith(("http://", "https://")):
81
+ return orig_name, None
82
+ return f"https://{orig_name}", None
83
+
84
+ return None, None