Spaces:
Runtime error
Runtime error
Commit
·
459f673
1
Parent(s):
79818ec
🚧 fetch_file
Browse files
app.py
CHANGED
|
@@ -22,6 +22,7 @@ from tools import (
|
|
| 22 |
fetch_raw_html,
|
| 23 |
extract_links,
|
| 24 |
extract_text_from_pdf,
|
|
|
|
| 25 |
)
|
| 26 |
|
| 27 |
# (Keep Constants as is)
|
|
@@ -84,6 +85,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 84 |
fetch_raw_html,
|
| 85 |
extract_links,
|
| 86 |
extract_text_from_pdf,
|
|
|
|
| 87 |
],
|
| 88 |
verbosity_level=2,
|
| 89 |
additional_authorized_imports=authorized_imports,
|
|
|
|
| 22 |
fetch_raw_html,
|
| 23 |
extract_links,
|
| 24 |
extract_text_from_pdf,
|
| 25 |
+
fetch_file,
|
| 26 |
)
|
| 27 |
|
| 28 |
# (Keep Constants as is)
|
|
|
|
| 85 |
fetch_raw_html,
|
| 86 |
extract_links,
|
| 87 |
extract_text_from_pdf,
|
| 88 |
+
fetch_file,
|
| 89 |
],
|
| 90 |
verbosity_level=2,
|
| 91 |
additional_authorized_imports=authorized_imports,
|
tools.py
CHANGED
|
@@ -6,6 +6,7 @@ import torch
|
|
| 6 |
import spaces
|
| 7 |
from transformers import pipeline
|
| 8 |
from pdfminer.high_level import extract_text
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
@tool
|
|
@@ -291,3 +292,33 @@ def extract_text_from_pdf(pdf_path: str) -> str:
|
|
| 291 |
return text or ""
|
| 292 |
except Exception:
|
| 293 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import spaces
|
| 7 |
from transformers import pipeline
|
| 8 |
from pdfminer.high_level import extract_text
|
| 9 |
+
import os
|
| 10 |
|
| 11 |
|
| 12 |
@tool
|
|
|
|
| 292 |
return text or ""
|
| 293 |
except Exception:
|
| 294 |
return ""
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
@tool
|
| 298 |
+
def fetch_file(url: str, save_path: str) -> str:
|
| 299 |
+
"""
|
| 300 |
+
Download a file from a URL and save it locally, creating directories if needed.
|
| 301 |
+
|
| 302 |
+
Args:
|
| 303 |
+
url (str): Direct link to the file (e.g., a PDF).
|
| 304 |
+
save_path (str): Local file path where content will be saved
|
| 305 |
+
(e.g., "input/paper.pdf").
|
| 306 |
+
|
| 307 |
+
Returns:
|
| 308 |
+
str: The local save_path if download and save succeeded;
|
| 309 |
+
empty string if an error occurred.
|
| 310 |
+
"""
|
| 311 |
+
try:
|
| 312 |
+
parent_dir = os.path.dirname(save_path)
|
| 313 |
+
if parent_dir:
|
| 314 |
+
os.makedirs(parent_dir, exist_ok=True)
|
| 315 |
+
|
| 316 |
+
resp = requests.get(url, timeout=20)
|
| 317 |
+
resp.raise_for_status()
|
| 318 |
+
|
| 319 |
+
with open(save_path, "wb") as f:
|
| 320 |
+
f.write(resp.content)
|
| 321 |
+
|
| 322 |
+
return save_path
|
| 323 |
+
except Exception:
|
| 324 |
+
return ""
|