Final_Assignment_Template

Sleeping

App Files Files Community

Ghisalbertifederico commited on Apr 10

Commit

a0b70c8

verified ·

1 Parent(s): 3853928

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -150

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import io
 import os
 import re
 import sys
@@ -9,11 +8,8 @@ if sys.platform == "win32":
     sys.stdout.reconfigure(encoding="utf-8", errors="replace")
     sys.stderr.reconfigure(encoding="utf-8", errors="replace")
 import gradio as gr
-import pypdf
 import requests
-import inspect
 import pandas as pd
-import markdownify
 from typing import Literal, TypedDict, get_args
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_openai import ChatOpenAI
@@ -22,22 +18,27 @@ from config import DEFAULT_API_URL, HF_TOKEN, GROQ_API_KEY, OPENROUTER_API_KEY,
 from tools import (
     web_search,
     wikipedia_search,
     get_youtube_transcript,
     describe_image,
     transcribe_audio,
     run_python_file,
-    read_task_file
 )
 # ---------------------------------------------------------------------------
 # Model fallback chain (primary → backup → last-resort)
 # ---------------------------------------------------------------------------
-MODEL_CONFIGS = [
-    {"model_id": "llama-3.3-70b-versatile"},
-    {"model_id": "meta-llama/llama-4-scout-17b-16e-instruct"},
-    {"model_id": "moonshotai/kimi-k2-instruct"},
-    {"model_id": "openai/gpt-oss-120b"},
-    {"model_id": "llama-3.1-8b-instant"},
 ]
 _LABELS = Literal[
@@ -53,23 +54,18 @@ _LABELS = Literal[
 def _download_task_file(task_id: str, api_url: str = DEFAULT_API_URL) -> tuple[bytes, str]:
     """Download a file attached to a GAIA task."""
     url = f"{api_url}/files/{task_id}"
-    # local_path = os.path.join(_DOWNLOAD_DIR, f"task_{task_id}_{file_name}")
-    # Try with auth first, then without (some endpoints don't require it)
-    # for headers in [
-    #     {"Authorization": f"Bearer {HF_TOKEN}"},
-    #     {},
-    # ]:
     try:
         headers = {"Authorization": f"Bearer {HF_TOKEN}"}
         resp = requests.get(url, headers=headers, timeout=30)
-    except requests.exceptions.HTTPError as e:
-        status = e.response.status_code if e.response is not None else "?"
-        print(f"Download attempt for {task_id} returned {status}")
     if resp.status_code != 200:
         print(f"[DEBUG] GET {url} → {resp.status_code}")
         return b"", ""
-    return resp.content, resp.headers.get("content-type", "").lower()
 class AgentState(TypedDict):
     question: str
@@ -80,72 +76,28 @@ class AgentState(TypedDict):
     file_name: str | None
-# class WebSearchAgent:
-#     def __init__(self, model_id: str = None):
-#         model_id = model_id or MODEL_CONFIGS[0]["model_id"]
-#         print(f"Initializing WebSearchAgent with {model_id}...")
-#         self.agent = CodeAgent(
-#             model=OpenAIServerModel(
-#                 model_id=model_id,
-#                 api_base="https://api.groq.com/openai/v1",
-#                 api_key=GROQ_API_KEY,
-#                 timeout=60,
-#             ),
-#             tools=[
-#                 web_search,
-#                 visit_webpage,
-#                 wikipedia_search,
-#                 get_youtube_transcript,
-#                 describe_image,
-#                 read_task_file,
-#                 transcribe_audio,
-#                 run_python_file,
-#             ],
-#             name="fast_agent",
-#             description="Answers questions using web search, Wikipedia, or attached files as appropriate.",
-#             additional_authorized_imports=[
-#                 "re", "math", "datetime", "collections", "itertools",
-#                 "statistics", "random", "unicodedata", "json", "string",
-#                 "pandas", "csv", "os", "subprocess",
-#             ],
-#             verbosity_level=1,
-#             max_steps=10,
-#         )
-#         # Prepend guidance so the LLM knows which tools exist
-#         self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + "\n\n" + SYSTEM_PROMPT_ADDITION
-#         print("WebSearchAgent initialized.")
-#     def __call__(self, question: str) -> str:
-#         print(f"\nAgent received question: {question[:50]}...")
-#         try:
-#             result = self.agent.run(question)
-#             print("Agent final answer:", result)
-#             return result
-#         except Exception as e:
-#             print("Agent error:", e)
-#             msg = str(e)
-#             # Re-raise rate-limit errors so _answer_question can fall back to the next model
-#             if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg or "model_decommissioned" in msg or "decommissioned" in msg:
-#                 raise
-#             return f"AGENT ERROR: {e}"
-MAX_WORKERS = 1        # sequential to stay within Groq's 12K tokens/min limit
 QUESTION_TIMEOUT = 300  # seconds before a single question is abandoned
-_exhausted_models: set[str] = set()  # models that hit daily rate limits are skipped for remaining questions
 # --------------------------------------------------------------------------- #
 #                         NODES  (LangGraph  functions)                        #
 # --------------------------------------------------------------------------- #
 _llm_router = ChatOpenAI(
-    model=MODEL_CONFIGS[0]["model_id"],
     base_url="https://api.groq.com/openai/v1",
     api_key=GROQ_API_KEY,
     timeout=60,
 )
-_llm_answer = _llm_router
 def route_question(state: AgentState) -> AgentState:
     """Label the task so we know which toolchain to invoke."""
@@ -168,65 +120,120 @@ def call_tools(state: AgentState) -> AgentState:
     matched_obj = re.search(r"https?://\S+", question)
     # ---- attachment (only when a file is actually attached to this task) -----
-    file_fetched = False
     if task_id and file_name:
         blob, ctype = _download_task_file(api_url=DEFAULT_API_URL, task_id=task_id)
-        if any([blob, ctype]):
-            file_fetched = True
-            print(f"[DEBUG] attachment type={ctype}")
-            if "python" in ctype:
                 print("[DEBUG] Working with a Python attachment file")
-                state["answer"] = run_python_file.invoke({"code": blob.decode("utf-8")})
                 state["label"] = "python_script"
                 return state
-            if "audio" in ctype:
                 print("[DEBUG] Working with an audio attachment file")
                 state["context"] = transcribe_audio.invoke({"audio_bytes": blob})
                 state["label"] = "audio"
                 return state
-            if "image" in ctype:
                 print("[DEBUG] Working with an image attachment file")
                 state["answer"] = describe_image.invoke({"img_bytes": blob, "question": question})
                 state["label"] = "image"
                 return state
-            # Excel / CSV / other binary
-            print("[DEBUG] Working with an Excel/CSV attachment file")
-            state["answer"] = read_task_file.invoke({"xls_bytes": blob})
             state["label"] = "other_ext"
             return state
-    # ---- label-based routing (always runs when no file was fetched) ----------
     if label == "youtube":
         print("[TOOL] youtube_transcript")
         if matched_obj:
             url = re.sub(r'[.,;:!?")\]]+$', '', matched_obj.group(0))
             print(f"[TOOL] fetching transcript for: {url}")
-            state["context"] = get_youtube_transcript.invoke({"video_url": url})
         else:
             print("[TOOL] youtube label but no URL found — falling back to web search")
             state["context"] = web_search.invoke({"query": question})
     elif label == "research":
-        print("[TOOL] web search")
         search_query_prompt = (
-            "Write a short Google search query (max 10 words) to answer this question. "
             "Output ONLY the query, nothing else.\n\nQuestion: " + question
         )
-        focused_query = _llm_router.invoke(search_query_prompt).content.strip().strip('"')
         print(f"[TOOL] search query: {focused_query}")
         search_json = web_search.invoke({"query": focused_query})
         wiki_text = wikipedia_search.invoke({"query": focused_query})
-        state["context"] = f"{search_json}\n\n{wiki_text}"
     else:
         print("[TOOL] reasoning only (no search)")
         state["context"] = ""
     return state
 def synthesize_response(state: AgentState) -> AgentState:
-    # Skip if a tool already produced a direct answer (image / python / excel paths)
-    if state.get("answer"):
-        print(f"[SYNTHESIZE] skipped — answer already set by tool")
         return state
     # Pass 1: chain-of-thought reasoning
     reasoning_prompt = [
         SystemMessage(content=get_prompt("reasoning_system")),
@@ -295,16 +302,24 @@ def build_graph() -> StateGraph:
 class LGAgent:
     """Callable wrapper used by run_and_submit_all."""
-    def __init__(self, model_id: str | None = None) -> None:
         global _llm_router, _llm_answer
-        mid = model_id or MODEL_CONFIGS[0]["model_id"]
         _llm_router = ChatOpenAI(
-            model=mid,
             base_url="https://api.groq.com/openai/v1",
             api_key=GROQ_API_KEY,
             timeout=60,
         )
-        _llm_answer = _llm_router
         self.graph = build_graph()
     def __call__(self, question: str, task_id: str | None = None, file_name: str | None = None) -> str:
@@ -348,70 +363,47 @@ def _to_str(val) -> str:
 def _answer_question(item: dict) -> str:
-    """Instantiate a fresh agent and answer one question, retrying on 429."""
     question_text = item["question"]
     task_id = item.get("task_id", "")
     file_name = item.get("file_name") or ""
-    # Download attached file (if any) and inject its path into the question
     augmented_question = question_text
-    # if file_name:
-    #     local_path = _download_task_file(task_id, file_name)
-    #     if local_path:
-    #         ext = os.path.splitext(file_name)[1].lower()
-    #         if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'):
-    #             augmented_question = (
-    #                 f"{question_text}\n\n"
-    #                 f"[Task context: an image file is available at local path '{local_path}'. "
-    #                 f"Use the describe_image tool with this path and a focused question to analyze it.]"
-    #             )
-    #         elif ext == '.py':
-    #             augmented_question = (
-    #                 f"{question_text}\n\n"
-    #                 f"[Task context: a Python file is available at local path '{local_path}'. "
-    #                 f"Use run_python_file to execute it and/or read_task_file to read its source.]"
-    #             )
-    #         else:
-    #             augmented_question = (
-    #                 f"{question_text}\n\n"
-    #                 f"[Task context: an attached file is available at local path '{local_path}'. "
-    #                 f"Use the read_task_file tool with this path to read its contents.]"
-    #             )
-    for cfg in MODEL_CONFIGS:
-        model_id = cfg["model_id"]
-        if model_id in _exhausted_models:
-            print(f"[{model_id}] Skipped (previously rate-limited)")
             continue
         for attempt in range(2):
             try:
-                result = LGAgent(model_id=model_id)(augmented_question, task_id=task_id, file_name=file_name)
-                # Pause between questions to respect Groq's tokens/min limit
-                time.sleep(5)
                 return result
             except Exception as e:
                 msg = str(e)
-                # Model permanently removed by provider — skip forever
                 if "model_decommissioned" in msg or "decommissioned" in msg:
-                    _exhausted_models.add(model_id)
-                    print(f"[{model_id}] Model decommissioned — skipping permanently")
                     break
                 if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg:
-                    # Check if it's a daily (TPD) limit — skip model for all remaining questions
                     if "on tokens per day" in msg or "TPD" in msg:
-                        _exhausted_models.add(model_id)
-                        print(f"[{model_id}] Daily token limit hit — skipping for remaining questions")
-                        break  # move to next model immediately
-                    # TPM or 413 — skip to next model for THIS question only
-                    # (don't add to _exhausted_models so it's retried on the next question)
-                    print(f"[{model_id}] TPM rate limit or request too large — trying next model for this question")
-                    break  # move to next model immediately
                 else:
-                    return f"AGENT ERROR: {e}"
-        else:
-            # Only reached if inner loop didn't break (both retries used on non-TPD limits)
-            print(f"[{model_id}] Exhausted retries, falling back to next model...")
-    return "AGENT ERROR: all models rate-limited"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 import os
 import re
 import sys
     sys.stdout.reconfigure(encoding="utf-8", errors="replace")
     sys.stderr.reconfigure(encoding="utf-8", errors="replace")
 import gradio as gr
 import requests
 import pandas as pd
 from typing import Literal, TypedDict, get_args
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_openai import ChatOpenAI
 from tools import (
     web_search,
     wikipedia_search,
+    visit_webpage,
     get_youtube_transcript,
     describe_image,
     transcribe_audio,
     run_python_file,
+    read_task_file,
 )
 # ---------------------------------------------------------------------------
 # Model fallback chain (primary → backup → last-resort)
 # ---------------------------------------------------------------------------
+# Use OpenRouter for the main reasoning model (better quality) and Groq for routing (fast)
+GROQ_MODELS = [
+    {"model_id": "llama-3.3-70b-versatile"},
+    {"model_id": "llama-3.1-8b-instant"},
+]
+OPENROUTER_MODELS = [
+    {"model_id": "google/gemini-2.0-flash-001"},
+    {"model_id": "qwen/qwen-2.5-72b-instruct"},
+    {"model_id": "meta-llama/llama-3.3-70b-instruct"},
 ]
 _LABELS = Literal[
 def _download_task_file(task_id: str, api_url: str = DEFAULT_API_URL) -> tuple[bytes, str]:
     """Download a file attached to a GAIA task."""
     url = f"{api_url}/files/{task_id}"
     try:
         headers = {"Authorization": f"Bearer {HF_TOKEN}"}
         resp = requests.get(url, headers=headers, timeout=30)
+    except requests.exceptions.RequestException as e:
+        print(f"[DEBUG] Download error for {task_id}: {e}")
+        return b"", ""
     if resp.status_code != 200:
         print(f"[DEBUG] GET {url} → {resp.status_code}")
         return b"", ""
+    ctype = resp.headers.get("content-type", "").lower()
+    print(f"[DEBUG] Downloaded file for {task_id}: {len(resp.content)} bytes, type={ctype}")
+    return resp.content, ctype
 class AgentState(TypedDict):
     question: str
     file_name: str | None
+MAX_WORKERS = 1        # sequential to stay within rate limits
 QUESTION_TIMEOUT = 300  # seconds before a single question is abandoned
+_exhausted_models: set[str] = set()
 # --------------------------------------------------------------------------- #
 #                         NODES  (LangGraph  functions)                        #
 # --------------------------------------------------------------------------- #
+# Router uses Groq (fast, cheap)
 _llm_router = ChatOpenAI(
+    model=GROQ_MODELS[0]["model_id"],
     base_url="https://api.groq.com/openai/v1",
     api_key=GROQ_API_KEY,
     timeout=60,
 )
+# Reasoning uses OpenRouter (higher quality)
+_llm_answer = ChatOpenAI(
+    model=OPENROUTER_MODELS[0]["model_id"],
+    base_url="https://openrouter.ai/api/v1",
+    api_key=OPENROUTER_API_KEY,
+    timeout=120,
+)
 def route_question(state: AgentState) -> AgentState:
     """Label the task so we know which toolchain to invoke."""
     matched_obj = re.search(r"https?://\S+", question)
     # ---- attachment (only when a file is actually attached to this task) -----
     if task_id and file_name:
         blob, ctype = _download_task_file(api_url=DEFAULT_API_URL, task_id=task_id)
+        if blob:
+            print(f"[DEBUG] attachment type={ctype}, size={len(blob)} bytes")
+            if "python" in ctype or file_name.endswith(".py"):
                 print("[DEBUG] Working with a Python attachment file")
+                state["answer"] = run_python_file.invoke({"code": blob.decode("utf-8", errors="replace")})
                 state["label"] = "python_script"
                 return state
+            if "audio" in ctype or any(file_name.endswith(ext) for ext in [".mp3", ".wav", ".m4a", ".flac"]):
                 print("[DEBUG] Working with an audio attachment file")
                 state["context"] = transcribe_audio.invoke({"audio_bytes": blob})
                 state["label"] = "audio"
                 return state
+            if "image" in ctype or any(file_name.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".webp"]):
                 print("[DEBUG] Working with an image attachment file")
                 state["answer"] = describe_image.invoke({"img_bytes": blob, "question": question})
                 state["label"] = "image"
                 return state
+            # Excel / CSV / PDF / other binary
+            print("[DEBUG] Working with a data file attachment")
+            state["context"] = read_task_file.invoke({"xls_bytes": blob})
             state["label"] = "other_ext"
             return state
+    # ---- label-based routing (when no file was fetched) ----------
     if label == "youtube":
         print("[TOOL] youtube_transcript")
         if matched_obj:
             url = re.sub(r'[.,;:!?")\]]+$', '', matched_obj.group(0))
             print(f"[TOOL] fetching transcript for: {url}")
+            transcript = get_youtube_transcript.invoke({"video_url": url})
+            if transcript and transcript != "TRANSCRIPT_UNAVAILABLE":
+                state["context"] = transcript
+            else:
+                # Fallback: search for info about the video
+                print("[TOOL] Transcript unavailable — searching web for video info")
+                search_json = web_search.invoke({"query": f"youtube {url} transcript content"})
+                state["context"] = f"TRANSCRIPT_UNAVAILABLE. Web search results about the video:\n{search_json}"
         else:
             print("[TOOL] youtube label but no URL found — falling back to web search")
             state["context"] = web_search.invoke({"query": question})
     elif label == "research":
+        print("[TOOL] research — multi-step search")
+        # Step 1: Generate a focused search query
         search_query_prompt = (
+            "Write a short, precise search query (max 10 words) to answer this question. "
+            "Include key proper nouns, dates, and specific terms. "
             "Output ONLY the query, nothing else.\n\nQuestion: " + question
         )
+        focused_query = _llm_router.invoke(search_query_prompt).content.strip().strip('"').strip("'")
         print(f"[TOOL] search query: {focused_query}")
+        # Step 2: Run web search + Wikipedia in parallel
         search_json = web_search.invoke({"query": focused_query})
         wiki_text = wikipedia_search.invoke({"query": focused_query})
+        context_parts = []
+        # Step 3: Visit top search result URLs to get full page content
+        if search_json and search_json != "No search results found.":
+            context_parts.append(f"WEB SEARCH RESULTS:\n{search_json}")
+            try:
+                import json as _json
+                hits = _json.loads(search_json)
+                # Visit top 2 result URLs for detailed content
+                visited = 0
+                for hit in hits[:4]:
+                    link = hit.get("link", "")
+                    if link and visited < 2:
+                        page_content = visit_webpage.invoke({"url": link})
+                        if page_content and "Could not fetch" not in page_content:
+                            context_parts.append(f"\nPAGE CONTENT ({link}):\n{page_content[:15000]}")
+                            visited += 1
+            except Exception as e:
+                print(f"[TOOL] Error visiting search results: {e}")
+        if wiki_text and "No Wikipedia results found" not in wiki_text and "failed" not in wiki_text.lower():
+            context_parts.append(f"\nWIKIPEDIA:\n{wiki_text}")
+        # Step 4: If initial results are thin, try an alternative query
+        if not context_parts or all("No " in p or "error" in p.lower() for p in context_parts):
+            print("[TOOL] Initial search thin — trying alternative query")
+            alt_query = focused_query.replace('"', '').replace("'", "")
+            if alt_query != focused_query:
+                alt_results = web_search.invoke({"query": alt_query})
+                if alt_results and alt_results != "No search results found.":
+                    context_parts.append(f"\nALTERNATIVE SEARCH:\n{alt_results}")
+        state["context"] = "\n\n".join(context_parts) if context_parts else "No information found from web search or Wikipedia."
     else:
+        # Logic / pure reasoning — no search needed
         print("[TOOL] reasoning only (no search)")
         state["context"] = ""
     return state
 def synthesize_response(state: AgentState) -> AgentState:
+    # If a tool produced a direct final answer (python execution), skip reasoning
+    if state.get("answer") and state["label"] == "python_script":
+        print(f"[SYNTHESIZE] skipped — python output: {state['answer'][:200]}")
         return state
+    # For image: the vision model already answered, but wrap it through reasoning
+    # to extract the precise answer from the description
+    if state.get("answer") and state["label"] == "image":
+        state["context"] = f"VISION MODEL OUTPUT:\n{state['answer']}"
+        state["answer"] = ""  # clear so reasoning runs
+    # For other_ext with context (file data), make sure reasoning runs
+    if state["label"] == "other_ext" and state.get("context") and not state.get("answer"):
+        pass  # context is set, reasoning will run below
     # Pass 1: chain-of-thought reasoning
     reasoning_prompt = [
         SystemMessage(content=get_prompt("reasoning_system")),
 class LGAgent:
     """Callable wrapper used by run_and_submit_all."""
+    def __init__(self, model_id: str | None = None, answer_model_id: str | None = None) -> None:
         global _llm_router, _llm_answer
+        # Router: fast Groq model
+        router_mid = model_id or GROQ_MODELS[0]["model_id"]
         _llm_router = ChatOpenAI(
+            model=router_mid,
             base_url="https://api.groq.com/openai/v1",
             api_key=GROQ_API_KEY,
             timeout=60,
         )
+        # Answering: higher quality OpenRouter model
+        answer_mid = answer_model_id or OPENROUTER_MODELS[0]["model_id"]
+        _llm_answer = ChatOpenAI(
+            model=answer_mid,
+            base_url="https://openrouter.ai/api/v1",
+            api_key=OPENROUTER_API_KEY,
+            timeout=120,
+        )
         self.graph = build_graph()
     def __call__(self, question: str, task_id: str | None = None, file_name: str | None = None) -> str:
 def _answer_question(item: dict) -> str:
+    """Instantiate a fresh agent and answer one question, retrying on errors."""
     question_text = item["question"]
     task_id = item.get("task_id", "")
     file_name = item.get("file_name") or ""
     augmented_question = question_text
+    # Try each OpenRouter answer model with Groq router
+    for answer_cfg in OPENROUTER_MODELS:
+        answer_model_id = answer_cfg["model_id"]
+        if answer_model_id in _exhausted_models:
+            print(f"[{answer_model_id}] Skipped (previously rate-limited)")
             continue
         for attempt in range(2):
             try:
+                result = LGAgent(
+                    model_id=GROQ_MODELS[0]["model_id"],
+                    answer_model_id=answer_model_id,
+                )(augmented_question, task_id=task_id, file_name=file_name)
+                # Pause between questions to respect rate limits
+                time.sleep(3)
                 return result
             except Exception as e:
                 msg = str(e)
                 if "model_decommissioned" in msg or "decommissioned" in msg:
+                    _exhausted_models.add(answer_model_id)
+                    print(f"[{answer_model_id}] Model decommissioned — skipping permanently")
                     break
                 if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg:
                     if "on tokens per day" in msg or "TPD" in msg:
+                        _exhausted_models.add(answer_model_id)
+                        print(f"[{answer_model_id}] Daily token limit hit — skipping for remaining questions")
+                        break
+                    wait = _parse_retry_after(msg)
+                    print(f"[{answer_model_id}] Rate limited — waiting {wait:.0f}s then retry")
+                    time.sleep(min(wait, 30))
+                    continue
                 else:
+                    print(f"[{answer_model_id}] Error: {msg[:200]}")
+                    break  # try next model
+    return "AGENT ERROR: all models exhausted"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """