Spaces:
Sleeping
Sleeping
Update tools.py
Browse files
tools.py
CHANGED
|
@@ -22,10 +22,11 @@ def _download_file_for_task(task_id: str, ext: str) -> str:
|
|
| 22 |
os.makedirs("hf_files", exist_ok=True)
|
| 23 |
local_path = os.path.join("hf_files", f"{task_id}.{ext}")
|
| 24 |
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 25 |
-
|
| 26 |
try:
|
| 27 |
resp = requests.get(url, timeout=10)
|
| 28 |
if resp.status_code == 200 and resp.content:
|
|
|
|
| 29 |
with open(local_path, "wb") as f:
|
| 30 |
f.write(resp.content)
|
| 31 |
return local_path
|
|
@@ -56,7 +57,7 @@ def web_search_tool(state: AgentState) -> AgentState:
|
|
| 56 |
break
|
| 57 |
if attempt < max_retries - 1:
|
| 58 |
print(f"web_search_result: rate limit error, retrying in 10 seconds")
|
| 59 |
-
time.sleep(
|
| 60 |
print(f"web_search_result reached ")
|
| 61 |
return {
|
| 62 |
"web_search_query": None,
|
|
@@ -102,7 +103,7 @@ def ocr_image_tool(state: AgentState) -> AgentState:
|
|
| 102 |
text = pytesseract.image_to_string(img).strip() or "(no visible text)"
|
| 103 |
except Exception as e:
|
| 104 |
text = f"Error during OCR: {e}"
|
| 105 |
-
|
| 106 |
return {
|
| 107 |
"ocr_path": None,
|
| 108 |
"ocr_result": text
|
|
@@ -179,7 +180,7 @@ def parse_excel_tool(state: AgentState) -> AgentState:
|
|
| 179 |
# 4) Strip out separator rows and return the table block
|
| 180 |
clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
|
| 181 |
table_block = "\n".join(clean_rows).strip()
|
| 182 |
-
|
| 183 |
return {
|
| 184 |
"excel_path": None,
|
| 185 |
"excel_sheet_name": None,
|
|
@@ -258,7 +259,7 @@ def audio_transcriber_tool(state: AgentState) -> AgentState:
|
|
| 258 |
text = response.get("text", "").strip()
|
| 259 |
except Exception as e:
|
| 260 |
text = f"Error during transcription: {e}"
|
| 261 |
-
|
| 262 |
return {
|
| 263 |
"audio_path": None,
|
| 264 |
"transcript": text
|
|
|
|
| 22 |
os.makedirs("hf_files", exist_ok=True)
|
| 23 |
local_path = os.path.join("hf_files", f"{task_id}.{ext}")
|
| 24 |
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 25 |
+
|
| 26 |
try:
|
| 27 |
resp = requests.get(url, timeout=10)
|
| 28 |
if resp.status_code == 200 and resp.content:
|
| 29 |
+
print(f"Downloaded file from {url} to {local_path}")
|
| 30 |
with open(local_path, "wb") as f:
|
| 31 |
f.write(resp.content)
|
| 32 |
return local_path
|
|
|
|
| 57 |
break
|
| 58 |
if attempt < max_retries - 1:
|
| 59 |
print(f"web_search_result: rate limit error, retrying in 10 seconds")
|
| 60 |
+
time.sleep(4)
|
| 61 |
print(f"web_search_result reached ")
|
| 62 |
return {
|
| 63 |
"web_search_query": None,
|
|
|
|
| 103 |
text = pytesseract.image_to_string(img).strip() or "(no visible text)"
|
| 104 |
except Exception as e:
|
| 105 |
text = f"Error during OCR: {e}"
|
| 106 |
+
print(f"OCRed as ocr_result: {text}")
|
| 107 |
return {
|
| 108 |
"ocr_path": None,
|
| 109 |
"ocr_result": text
|
|
|
|
| 180 |
# 4) Strip out separator rows and return the table block
|
| 181 |
clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
|
| 182 |
table_block = "\n".join(clean_rows).strip()
|
| 183 |
+
print(f"Parsed excel as excel_result: {table_block}")
|
| 184 |
return {
|
| 185 |
"excel_path": None,
|
| 186 |
"excel_sheet_name": None,
|
|
|
|
| 259 |
text = response.get("text", "").strip()
|
| 260 |
except Exception as e:
|
| 261 |
text = f"Error during transcription: {e}"
|
| 262 |
+
print(f"Transcripted as transcript: {text}")
|
| 263 |
return {
|
| 264 |
"audio_path": None,
|
| 265 |
"transcript": text
|