Spaces:
Sleeping
Sleeping
meirk-brd
commited on
Commit
·
49cd6f4
1
Parent(s):
1cbc47e
remove gradio parse
Browse files
tool.py
CHANGED
|
@@ -222,32 +222,25 @@ class BrightDataDatasetTool(Tool):
|
|
| 222 |
return self._extract_url_from_text(raw)
|
| 223 |
|
| 224 |
if isinstance(raw, dict):
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
orig_name = raw.get("orig_name")
|
| 233 |
-
if isinstance(orig_name, str) and orig_name:
|
| 234 |
-
return self._ensure_scheme(orig_name)
|
| 235 |
|
| 236 |
url_value = raw.get("url")
|
| 237 |
if isinstance(url_value, str):
|
| 238 |
if url_value.startswith(("http://", "https://")):
|
| 239 |
return url_value
|
| 240 |
if url_value.startswith("/gradio_api/file="):
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
extracted = self._extract_url_from_text(url_value)
|
| 248 |
-
if extracted:
|
| 249 |
-
return extracted
|
| 250 |
-
return None
|
| 251 |
|
| 252 |
return None
|
| 253 |
|
|
|
|
| 222 |
return self._extract_url_from_text(raw)
|
| 223 |
|
| 224 |
if isinstance(raw, dict):
|
| 225 |
+
# Prefer original text name fields if present.
|
| 226 |
+
for key in ("orig_name", "name"):
|
| 227 |
+
candidate = raw.get(key)
|
| 228 |
+
if isinstance(candidate, str) and candidate:
|
| 229 |
+
extracted = self._extract_url_from_text(candidate)
|
| 230 |
+
if extracted:
|
| 231 |
+
return extracted
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
url_value = raw.get("url")
|
| 234 |
if isinstance(url_value, str):
|
| 235 |
if url_value.startswith(("http://", "https://")):
|
| 236 |
return url_value
|
| 237 |
if url_value.startswith("/gradio_api/file="):
|
| 238 |
+
# Do not parse HTML/CSS file contents; treat as no URL.
|
| 239 |
+
return None
|
| 240 |
+
extracted = self._extract_url_from_text(url_value)
|
| 241 |
+
if extracted:
|
| 242 |
+
return extracted
|
| 243 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
return None
|
| 246 |
|