Spaces:
Sleeping
Sleeping
meirk-brd
commited on
Commit
·
810f177
1
Parent(s):
0acfa35
add markdown to the payload
Browse files
tool.py
CHANGED
|
@@ -27,14 +27,7 @@ class BrightDataScraperTool(Tool):
|
|
| 27 |
super().__init__()
|
| 28 |
|
| 29 |
def forward(self, url) -> str:
|
| 30 |
-
url_str,
|
| 31 |
-
|
| 32 |
-
if file_path:
|
| 33 |
-
try:
|
| 34 |
-
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
| 35 |
-
return f.read()
|
| 36 |
-
except OSError as exc:
|
| 37 |
-
return json.dumps({"error": f"Failed to read uploaded file: {exc}"})
|
| 38 |
|
| 39 |
if not url_str:
|
| 40 |
return json.dumps({"error": "No valid URL provided"})
|
|
@@ -68,25 +61,16 @@ class BrightDataScraperTool(Tool):
|
|
| 68 |
|
| 69 |
def _coerce_url_input(self, raw) -> Tuple[Optional[str], Optional[str]]:
|
| 70 |
if isinstance(raw, str):
|
| 71 |
-
|
| 72 |
-
parsed = self._parse_file_dict_string(raw)
|
| 73 |
-
if parsed:
|
| 74 |
-
raw = parsed
|
| 75 |
-
else:
|
| 76 |
-
return raw, None
|
| 77 |
-
else:
|
| 78 |
-
return raw, None
|
| 79 |
|
| 80 |
if isinstance(raw, dict):
|
| 81 |
-
file_path = raw.get("path") or raw.get("name")
|
| 82 |
-
if file_path and os.path.isfile(file_path):
|
| 83 |
-
return None, file_path
|
| 84 |
-
|
| 85 |
orig_name = raw.get("orig_name")
|
| 86 |
if isinstance(orig_name, str) and orig_name:
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
| 90 |
|
| 91 |
return None, None
|
| 92 |
|
|
|
|
| 27 |
super().__init__()
|
| 28 |
|
| 29 |
def forward(self, url) -> str:
|
| 30 |
+
url_str, _ = self._coerce_url_input(url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
if not url_str:
|
| 33 |
return json.dumps({"error": "No valid URL provided"})
|
|
|
|
| 61 |
|
| 62 |
def _coerce_url_input(self, raw) -> Tuple[Optional[str], Optional[str]]:
|
| 63 |
if isinstance(raw, str):
|
| 64 |
+
return raw, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
if isinstance(raw, dict):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
orig_name = raw.get("orig_name")
|
| 68 |
if isinstance(orig_name, str) and orig_name:
|
| 69 |
+
return self._ensure_scheme(orig_name), None
|
| 70 |
+
|
| 71 |
+
url_value = raw.get("url")
|
| 72 |
+
if isinstance(url_value, str):
|
| 73 |
+
return url_value, None
|
| 74 |
|
| 75 |
return None, None
|
| 76 |
|