New_Final_Assignment

Sleeping

App Files Files Community

naman1102 commited on Jun 2, 2025

Commit

7c5f7b3

1 Parent(s): 838224c

Update tools.py

Browse files

Files changed (1) hide show

tools.py +29 -43

tools.py CHANGED Viewed

@@ -41,7 +41,7 @@ def web_search_tool(state: AgentState) -> AgentState:
     """
     Expects: state["web_search_query"] is a non‐empty string.
     Returns: {"web_search_query": None, "web_search_result": <string>}
-    We also clear web_search_query so we don’t loop forever.
     If the result is a DuckDuckGo 202 Ratelimit error, retry up to 5 times with a 5 second sleep between attempts.
     """
     # print("reached web search tool")
@@ -54,11 +54,12 @@ def web_search_tool(state: AgentState) -> AgentState:
     for attempt in range(max_retries):
         result_text = ddg.run(query)
         if "202 Ratelimit" not in result_text:
             break
         if attempt < max_retries - 1:
             print(f"web_search_result: rate limit error, retrying in 10 seconds")
             time.sleep(4)
-    print(f"web_search_result reached ")
     return {
         "web_search_query": None,
         "web_search_result": result_text
@@ -73,31 +74,27 @@ def ocr_image_tool(state: AgentState) -> AgentState:
       • A Task ID string like "abc123", in which case we GET /files/abc123.
     Returns:
       { "ocr_path": None, "ocr_result": "<OCRed text or error string>" }
     """
     path_or_id = state.get("ocr_path", "")
     if not path_or_id:
         return {}
-    # 1) If local file exists, use it. Otherwise, treat "path_or_id" as task_id and download.
-    if os.path.exists(path_or_id):
-        local_img = path_or_id
-    else:
-        # Assume it's a task_id; try to download a PNG or JPG
-        # (We don’t know extension, so try common ones in order)
-        local_img = ""
-        for ext in ("png", "jpg", "jpeg"):
-            candidate = _download_file_for_task(path_or_id, ext)
-            if candidate:
-                local_img = candidate
-                break
     if not local_img or not os.path.exists(local_img):
         return {
             "ocr_path": None,
-            "ocr_result": "Error: No image file found (neither local nor downloadable)."
         }
-    # 2) Run OCR
     try:
         img = Image.open(local_img)
         text = pytesseract.image_to_string(img).strip() or "(no visible text)"
@@ -121,19 +118,17 @@ def parse_excel_tool(state: AgentState) -> AgentState:
         "excel_sheet_name": None,
         "excel_result": "<stringified records or Markdown table>"
       }
     """
     path_or_id = state.get("excel_path", "")
     sheet = state.get("excel_sheet_name", "")
     if not path_or_id:
         return {}
-    # 1) If local .xlsx exists, use it. Otherwise, try downloading.
-    if os.path.exists(path_or_id):
-        local_xlsx = path_or_id
-    else:
-        local_xlsx = _download_file_for_task(path_or_id, "xlsx")
-    # 2) If we finally have a real file, read it
     if local_xlsx and os.path.exists(local_xlsx):
         try:
             xls = pd.ExcelFile(local_xlsx)
@@ -152,7 +147,7 @@ def parse_excel_tool(state: AgentState) -> AgentState:
             print(f">>> parse_excel_tool: Error reading Excel file {local_xlsx}: {e}")
             # Fall back to scanning for Markdown below
-    # 3) Fallback: scan any HumanMessage for a Markdown‐style table
     messages = state.get("messages", [])
     table_lines = []
     collecting = False
@@ -177,7 +172,6 @@ def parse_excel_tool(state: AgentState) -> AgentState:
             "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
         }
-    # 4) Strip out separator rows and return the table block
     clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
     table_block = "\n".join(clean_rows).strip()
     print(f"Parsed excel as excel_result: {table_block}")
@@ -210,52 +204,44 @@ from state import AgentState
 def audio_transcriber_tool(state: AgentState) -> AgentState:
     """
-    LangGraph tool for transcribing audio via OpenAI’s Whisper API.
     Expects: state["audio_path"] to be either:
       • A local file path (e.g. "./hf_files/abc.mp3"), OR
       • A Task ID (e.g. "abc123"), in which case we try downloading
         GET {DEFAULT_API_URL}/files/{task_id} with .mp3, .wav, .m4a extensions.
     Returns:
       {
         "audio_path": None,
         "transcript": "<text or error message>"
       }
     """
     path_or_id = state.get("audio_path", "")
     if not path_or_id:
         return {}
-    # 1) If local file exists, use it. Otherwise, treat path_or_id as task_id and try downloads:
-    if os.path.exists(path_or_id):
-        local_audio = path_or_id
-    else:
-        local_audio = ""
-        for ext in ("mp3", "wav", "m4a"):
-            candidate = _download_file_for_task(path_or_id, ext)
-            if candidate:
-                local_audio = candidate
-                break
     if not local_audio or not os.path.exists(local_audio):
-        # Neither a real file nor a downloadable attachment
         return {
             "audio_path": None,
-            "transcript": "Error: No audio file found (neither local nor downloadable)."
         }
-    # 2) Send to OpenAI Whisper
     try:
         openai.api_key = os.getenv("OPENAI_API_KEY")
         if not openai.api_key:
             raise RuntimeError("OPENAI_API_KEY is not set in environment.")
         with open(local_audio, "rb") as audio_file:
-            # For OpenAI Python library v0.27.0+:
             response = openai.Audio.transcribe("whisper-1", audio_file)
-            # If you’re on an older library:
-            # response = openai.Audio.create_transcription(file=audio_file, model="whisper-1")
         text = response.get("text", "").strip()
     except Exception as e:
         text = f"Error during transcription: {e}"

     """
     Expects: state["web_search_query"] is a non‐empty string.
     Returns: {"web_search_query": None, "web_search_result": <string>}
+    We also clear web_search_query so we don't loop forever.
     If the result is a DuckDuckGo 202 Ratelimit error, retry up to 5 times with a 5 second sleep between attempts.
     """
     # print("reached web search tool")
     for attempt in range(max_retries):
         result_text = ddg.run(query)
         if "202 Ratelimit" not in result_text:
+            print(f"web_search_result success:")
             break
         if attempt < max_retries - 1:
             print(f"web_search_result: rate limit error, retrying in 10 seconds")
             time.sleep(4)
+    # print(f"web_search_result reached ")
     return {
         "web_search_query": None,
         "web_search_result": result_text
       • A Task ID string like "abc123", in which case we GET /files/abc123.
     Returns:
       { "ocr_path": None, "ocr_result": "<OCRed text or error string>" }
+    Always attempts to download the file for the given path or task ID.
     """
     path_or_id = state.get("ocr_path", "")
     if not path_or_id:
         return {}
+    # Always attempt to download the file, regardless of local existence
+    local_img = ""
+    for ext in ("png", "jpg", "jpeg"):
+        candidate = _download_file_for_task(path_or_id, ext)
+        if candidate:
+            local_img = candidate
+            break
     if not local_img or not os.path.exists(local_img):
         return {
             "ocr_path": None,
+            "ocr_result": "Error: No image file found (download failed)."
         }
+    # Run OCR
     try:
         img = Image.open(local_img)
         text = pytesseract.image_to_string(img).strip() or "(no visible text)"
         "excel_sheet_name": None,
         "excel_result": "<stringified records or Markdown table>"
       }
+    Always attempts to download the file for the given path or task ID.
     """
     path_or_id = state.get("excel_path", "")
     sheet = state.get("excel_sheet_name", "")
     if not path_or_id:
         return {}
+    # Always attempt to download the file, regardless of local existence
+    local_xlsx = _download_file_for_task(path_or_id, "xlsx")
+    # If we finally have a real file, read it
     if local_xlsx and os.path.exists(local_xlsx):
         try:
             xls = pd.ExcelFile(local_xlsx)
             print(f">>> parse_excel_tool: Error reading Excel file {local_xlsx}: {e}")
             # Fall back to scanning for Markdown below
+    # Fallback: scan any HumanMessage for a Markdown‐style table
     messages = state.get("messages", [])
     table_lines = []
     collecting = False
             "excel_result": "Error: No Excel file found and no Markdown table detected in prompt."
         }
     clean_rows = [row for row in table_lines if not re.match(r"^\s*\|\s*-+", row)]
     table_block = "\n".join(clean_rows).strip()
     print(f"Parsed excel as excel_result: {table_block}")
 def audio_transcriber_tool(state: AgentState) -> AgentState:
     """
+    LangGraph tool for transcribing audio via OpenAI's Whisper API.
     Expects: state["audio_path"] to be either:
       • A local file path (e.g. "./hf_files/abc.mp3"), OR
       • A Task ID (e.g. "abc123"), in which case we try downloading
         GET {DEFAULT_API_URL}/files/{task_id} with .mp3, .wav, .m4a extensions.
     Returns:
       {
         "audio_path": None,
         "transcript": "<text or error message>"
       }
+    Always attempts to download the file for the given path or task ID.
     """
     path_or_id = state.get("audio_path", "")
     if not path_or_id:
         return {}
+    # Always attempt to download the file, regardless of local existence
+    local_audio = ""
+    for ext in ("mp3", "wav", "m4a"):
+        candidate = _download_file_for_task(path_or_id, ext)
+        if candidate:
+            local_audio = candidate
+            break
     if not local_audio or not os.path.exists(local_audio):
         return {
             "audio_path": None,
+            "transcript": "Error: No audio file found (download failed)."
         }
+    # Send to OpenAI Whisper
     try:
         openai.api_key = os.getenv("OPENAI_API_KEY")
         if not openai.api_key:
             raise RuntimeError("OPENAI_API_KEY is not set in environment.")
         with open(local_audio, "rb") as audio_file:
             response = openai.Audio.transcribe("whisper-1", audio_file)
         text = response.get("text", "").strip()
     except Exception as e:
         text = f"Error during transcription: {e}"