meirk-brd commited on
Commit
258dbf2
·
1 Parent(s): 49cd6f4

fix url parsing

Browse files
Files changed (1) hide show
  1. tool.py +10 -8
tool.py CHANGED
@@ -222,14 +222,7 @@ class BrightDataDatasetTool(Tool):
222
  return self._extract_url_from_text(raw)
223
 
224
  if isinstance(raw, dict):
225
- # Prefer original text name fields if present.
226
- for key in ("orig_name", "name"):
227
- candidate = raw.get(key)
228
- if isinstance(candidate, str) and candidate:
229
- extracted = self._extract_url_from_text(candidate)
230
- if extracted:
231
- return extracted
232
-
233
  url_value = raw.get("url")
234
  if isinstance(url_value, str):
235
  if url_value.startswith(("http://", "https://")):
@@ -240,6 +233,15 @@ class BrightDataDatasetTool(Tool):
240
  extracted = self._extract_url_from_text(url_value)
241
  if extracted:
242
  return extracted
 
 
 
 
 
 
 
 
 
243
  return None
244
 
245
  return None
 
222
  return self._extract_url_from_text(raw)
223
 
224
  if isinstance(raw, dict):
225
+ # Check for direct url field first (common in Gradio FileData from smolagents)
 
 
 
 
 
 
 
226
  url_value = raw.get("url")
227
  if isinstance(url_value, str):
228
  if url_value.startswith(("http://", "https://")):
 
233
  extracted = self._extract_url_from_text(url_value)
234
  if extracted:
235
  return extracted
236
+
237
+ # Fallback: check original text name fields if present
238
+ for key in ("orig_name", "name"):
239
+ candidate = raw.get(key)
240
+ if isinstance(candidate, str) and candidate:
241
+ extracted = self._extract_url_from_text(candidate)
242
+ if extracted:
243
+ return extracted
244
+
245
  return None
246
 
247
  return None