Spaces:
Sleeping
Sleeping
meirk-brd
commited on
Commit
·
258dbf2
1
Parent(s):
49cd6f4
fix url parsing
Browse files
tool.py
CHANGED
|
@@ -222,14 +222,7 @@ class BrightDataDatasetTool(Tool):
|
|
| 222 |
return self._extract_url_from_text(raw)
|
| 223 |
|
| 224 |
if isinstance(raw, dict):
|
| 225 |
-
#
|
| 226 |
-
for key in ("orig_name", "name"):
|
| 227 |
-
candidate = raw.get(key)
|
| 228 |
-
if isinstance(candidate, str) and candidate:
|
| 229 |
-
extracted = self._extract_url_from_text(candidate)
|
| 230 |
-
if extracted:
|
| 231 |
-
return extracted
|
| 232 |
-
|
| 233 |
url_value = raw.get("url")
|
| 234 |
if isinstance(url_value, str):
|
| 235 |
if url_value.startswith(("http://", "https://")):
|
|
@@ -240,6 +233,15 @@ class BrightDataDatasetTool(Tool):
|
|
| 240 |
extracted = self._extract_url_from_text(url_value)
|
| 241 |
if extracted:
|
| 242 |
return extracted
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
return None
|
| 244 |
|
| 245 |
return None
|
|
|
|
| 222 |
return self._extract_url_from_text(raw)
|
| 223 |
|
| 224 |
if isinstance(raw, dict):
|
| 225 |
+
# Check for direct url field first (common in Gradio FileData from smolagents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
url_value = raw.get("url")
|
| 227 |
if isinstance(url_value, str):
|
| 228 |
if url_value.startswith(("http://", "https://")):
|
|
|
|
| 233 |
extracted = self._extract_url_from_text(url_value)
|
| 234 |
if extracted:
|
| 235 |
return extracted
|
| 236 |
+
|
| 237 |
+
# Fallback: check original text name fields if present
|
| 238 |
+
for key in ("orig_name", "name"):
|
| 239 |
+
candidate = raw.get(key)
|
| 240 |
+
if isinstance(candidate, str) and candidate:
|
| 241 |
+
extracted = self._extract_url_from_text(candidate)
|
| 242 |
+
if extracted:
|
| 243 |
+
return extracted
|
| 244 |
+
|
| 245 |
return None
|
| 246 |
|
| 247 |
return None
|