meirk-brd commited on
Commit
49cd6f4
·
1 Parent(s): 1cbc47e

remove gradio parse

Browse files
Files changed (1) hide show
  1. tool.py +13 -20
tool.py CHANGED
@@ -222,32 +222,25 @@ class BrightDataDatasetTool(Tool):
222
  return self._extract_url_from_text(raw)
223
 
224
  if isinstance(raw, dict):
225
- file_path = raw.get("path")
226
- if isinstance(file_path, str) and os.path.isfile(file_path):
227
- content = self._read_text_file(file_path)
228
- url_from_file = self._extract_url_from_text(content or "")
229
- if url_from_file:
230
- return url_from_file
231
-
232
- orig_name = raw.get("orig_name")
233
- if isinstance(orig_name, str) and orig_name:
234
- return self._ensure_scheme(orig_name)
235
 
236
  url_value = raw.get("url")
237
  if isinstance(url_value, str):
238
  if url_value.startswith(("http://", "https://")):
239
  return url_value
240
  if url_value.startswith("/gradio_api/file="):
241
- file_path = url_value.split("=", 1)[-1]
242
- content = self._read_text_file(file_path)
243
- url_from_file = self._extract_url_from_text(content or "")
244
- if url_from_file:
245
- return url_from_file
246
- else:
247
- extracted = self._extract_url_from_text(url_value)
248
- if extracted:
249
- return extracted
250
- return None
251
 
252
  return None
253
 
 
222
  return self._extract_url_from_text(raw)
223
 
224
  if isinstance(raw, dict):
225
+ # Prefer original text name fields if present.
226
+ for key in ("orig_name", "name"):
227
+ candidate = raw.get(key)
228
+ if isinstance(candidate, str) and candidate:
229
+ extracted = self._extract_url_from_text(candidate)
230
+ if extracted:
231
+ return extracted
 
 
 
232
 
233
  url_value = raw.get("url")
234
  if isinstance(url_value, str):
235
  if url_value.startswith(("http://", "https://")):
236
  return url_value
237
  if url_value.startswith("/gradio_api/file="):
238
+ # Do not parse HTML/CSS file contents; treat as no URL.
239
+ return None
240
+ extracted = self._extract_url_from_text(url_value)
241
+ if extracted:
242
+ return extracted
243
+ return None
 
 
 
 
244
 
245
  return None
246