marcos-banik commited on
Commit
459f673
·
1 Parent(s): 79818ec

🚧 fetch_file

Browse files
Files changed (2) hide show
  1. app.py +2 -0
  2. tools.py +31 -0
app.py CHANGED
@@ -22,6 +22,7 @@ from tools import (
22
  fetch_raw_html,
23
  extract_links,
24
  extract_text_from_pdf,
 
25
  )
26
 
27
  # (Keep Constants as is)
@@ -84,6 +85,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
84
  fetch_raw_html,
85
  extract_links,
86
  extract_text_from_pdf,
 
87
  ],
88
  verbosity_level=2,
89
  additional_authorized_imports=authorized_imports,
 
22
  fetch_raw_html,
23
  extract_links,
24
  extract_text_from_pdf,
25
+ fetch_file,
26
  )
27
 
28
  # (Keep Constants as is)
 
85
  fetch_raw_html,
86
  extract_links,
87
  extract_text_from_pdf,
88
+ fetch_file,
89
  ],
90
  verbosity_level=2,
91
  additional_authorized_imports=authorized_imports,
tools.py CHANGED
@@ -6,6 +6,7 @@ import torch
6
  import spaces
7
  from transformers import pipeline
8
  from pdfminer.high_level import extract_text
 
9
 
10
 
11
  @tool
@@ -291,3 +292,33 @@ def extract_text_from_pdf(pdf_path: str) -> str:
291
  return text or ""
292
  except Exception:
293
  return ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import spaces
7
  from transformers import pipeline
8
  from pdfminer.high_level import extract_text
9
+ import os
10
 
11
 
12
  @tool
 
292
  return text or ""
293
  except Exception:
294
  return ""
295
+
296
+
297
+ @tool
298
+ def fetch_file(url: str, save_path: str) -> str:
299
+ """
300
+ Download a file from a URL and save it locally, creating directories if needed.
301
+
302
+ Args:
303
+ url (str): Direct link to the file (e.g., a PDF).
304
+ save_path (str): Local file path where content will be saved
305
+ (e.g., "input/paper.pdf").
306
+
307
+ Returns:
308
+ str: The local save_path if download and save succeeded;
309
+ empty string if an error occurred.
310
+ """
311
+ try:
312
+ parent_dir = os.path.dirname(save_path)
313
+ if parent_dir:
314
+ os.makedirs(parent_dir, exist_ok=True)
315
+
316
+ resp = requests.get(url, timeout=20)
317
+ resp.raise_for_status()
318
+
319
+ with open(save_path, "wb") as f:
320
+ f.write(resp.content)
321
+
322
+ return save_path
323
+ except Exception:
324
+ return ""