Final_Assignment_debug

Sleeping

App Files Files Community

shan gao commited on Sep 20, 2025

Commit

a53e629

1 Parent(s): 15f210e

change

Browse files

Files changed (3) hide show

agent.py +19 -395
app.py +0 -5
requirements.txt +1 -8

agent.py CHANGED Viewed

@@ -1,3 +1,4 @@
 # Develop an AI agent with LangGraph and LangChain
 # to answer the questions in the "gaia-benchmark/GAIA" dataset.
@@ -13,24 +14,7 @@ from langchain_core.tools import tool
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph, START, END
-from tavily import TavilyClient
-import serpapi
-import trafilatura
-from readability import Document
-import html as _html
-import wikipedia
-from urllib.parse import parse_qs
-from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
-import yt_dlp
-# ==== NEW: (optional) tiny helpers used by browsing nodes ====
-def _has_search_key() -> bool:
-    """Return True if any supported search backend is configured."""
-    return bool(
-        os.getenv("TAVILY_API_KEY")
-        or os.getenv("SERPAPI_API_KEY")
-        or (os.getenv("GOOGLE_API_KEY") and os.getenv("GOOGLE_CSE_ID"))
-    )
 # Optional: pdf parsing if GAIA sometimes includes PDFs
 try:
@@ -42,8 +26,7 @@ except Exception:
 # -------------- State -------------
 class EvidenceItem(TypedDict):
-    # ==== CHANGED: expanded allowed kinds to match actual usage paths ====
-    kind: Literal["audio_transcript","image_ocr","image_vqa","doc_text","unknown_file","preprocess_error"]
     text: str
     path: Optional[str]
     meta: Dict[str, Any]
@@ -57,12 +40,6 @@ class AgentState(TypedDict):
     answer: Optional[str]
     parsed_final_answer: Optional[str]
     emit_final_answer: bool  # <<< add this (default True if you want old behavior)
-    # ==== NEW: state used by browse pipeline (optional) ====
-    use_browsing: Optional[bool]
-    web_hits: Optional[List[Dict[str, str]]]
-    # ==== NEW: urls found directly in the question ====
-    question_urls: Optional[List[str]]
-    question_youtube_urls: Optional[List[str]]
 # -------------- helpers ---------------
 def _filename_from_cd(cd: str) -> str | None:
@@ -98,10 +75,6 @@ def _summarize_evidence(evidence: List[Dict[str, Any]], limit_chars: int = 6000)
         tag = f"{e.get('kind','?')}"
         if meta.get("mime"):
             tag += f"({meta['mime']})"
-        if meta.get("title"):
-            tag += f"[{meta['title']}]"
-        if meta.get("url"):
-            tag += f"<{meta['url']}>"
         chunks.append(f"[{i}:{tag}] {t}")
     out = "\n".join(chunks)
     return out if len(out) <= limit_chars else out[:limit_chars] + " …"
@@ -156,13 +129,6 @@ def _convert_to_wav_mono16k(src_path: str) -> str:
         raise RuntimeError(f"ffmpeg failed: {p.stderr[-500:]}")
     return out
-# ==== NEW: URL helpers ====
-_URL_RE = re.compile(r'https?://\S+')
-def _extract_urls(text: str) -> List[str]:
-    return _URL_RE.findall(text or "")
 # ----------------------Tools ----------------------
 @tool
 def download_file(url: str, headers: dict | None = None, auth_token: str | None = None) -> str:
@@ -197,6 +163,10 @@ def download_file(url: str, headers: dict | None = None, auth_token: str | None
         out_dir = tempfile.mkdtemp(prefix="gaia_tmpdl_")
         out_path = os.path.join(out_dir, fname)
         print("out_path:", out_path)
         with open(out_path, "wb") as f:
@@ -207,9 +177,6 @@ def download_file(url: str, headers: dict | None = None, auth_token: str | None
         return out_path
-# ==== NEW: cache Whisper model so we don't reload each call ====
-_WHISPER = None
 @tool
 def transcribe_audio(path: str, model_size: str = "base") -> str:
     """
@@ -217,15 +184,13 @@ def transcribe_audio(path: str, model_size: str = "base") -> str:
     Returns the transcript text; raises on failure (caller handles).
     """
     print("running transcribe_audio")
-    global _WHISPER
     try:
-        if _WHISPER is None:
-            _WHISPER = whisper.load_model(model_size)
-        result = _WHISPER.transcribe(path)
         return (result.get("text") or "").strip()
     except Exception as e:
         raise RuntimeError(f"Whisper error: {e}")
 @tool
 def ocr_image(path: str) -> str:
@@ -237,194 +202,6 @@ def ocr_image(path: str) -> str:
     return text.strip()
-# ==== NEW: WEB / WIKI / YOUTUBE TOOLS =========================================
-# Choose your search backend (Tavily simplest). Set env var before use.
-_USE_TAVILY = False  # flip to False to use SerpAPI example
-if _USE_TAVILY:
-    @tool
-    def web_search(query: str, k: int = 6) -> List[Dict[str, str]]:
-        """
-        Web search via Tavily. Returns a list of {title, url, snippet}.
-        Requires TAVILY_API_KEY.
-        """
-        try:
-            tv = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
-            res = tv.search(
-                query=query,
-                search_depth="advanced",
-                max_results=k,
-                include_answer=False,
-                include_images=False,
-            )
-            out = []
-            for r in res.get("results", []):
-                out.append({
-                    "title": r.get("title",""),
-                    "url": r.get("url",""),
-                    "snippet": (r.get("content","") or "")[:400]
-                })
-            return out
-        except Exception as e:
-            return [{"title":"", "url":"", "snippet": f"[search error: {e}]"}]
-else:
-    @tool
-    def web_search(query: str, k: int = 6) -> List[Dict[str, str]]:
-        """
-        Web search via SerpAPI. Returns a list of {title, url, snippet}.
-        Requires SERPAPI_API_KEY.
-        """
-        try:
-            params = {"engine":"google", "q":query, "num":k, "api_key":os.getenv("SERPAPI_API_KEY")}
-            search = serpapi.search(params)
-            # results = search.get_dict()
-            results = search
-            items = results.get("organic_results", [])
-            out = []
-            for it in items[:k]:
-                out.append({
-                    "title": it.get("title",""),
-                    "url": it.get("link",""),
-                    "snippet": (it.get("snippet","") or "")[:400]
-                })
-            return out
-        except Exception as e:
-            return [{"title":"", "url":"", "snippet": f"[search error: {e}]"}]
-@tool
-def fetch_url_text(url: str, max_chars: int = 12000, timeout: int = 30) -> Dict[str, Any]:
-    """
-    Download a web page and extract main article text using trafilatura,
-    with a readability-lxml fallback. Returns {url, title, text}.
-    """
-    sess = requests.Session()
-    headers = {
-        "User-Agent": "gaia-agent/1.0 (+https://example.org)",
-        "Accept": "text/html,*/*;q=0.8",
-    }
-    try:
-        r = sess.get(url, headers=headers, timeout=timeout)
-        r.raise_for_status()
-        html_content = r.text
-    except Exception as e:
-        return {"url": url, "title": "", "text": f"[fetch error: {e}]"}
-    # 1) try trafilatura (best for boilerplate removal)
-    try:
-        downloaded = trafilatura.extract(html_content, include_comments=False, include_tables=False, url=url)
-        if downloaded and len(downloaded) > 200:
-            text = downloaded
-            title = ""
-        else:
-            raise ValueError("trafilatura extraction too short")
-    except Exception:
-        # 2) fallback: readability
-        try:
-            doc = Document(html_content)
-            title = doc.short_title() or ""
-            text = doc.summary(html_partial=False)
-            # rudimentary HTML strip
-            text = re.sub(r"<[^>]+>", " ", text)
-            text = re.sub(r"\s+", " ", text).strip()
-        except Exception as e2:
-            return {"url": url, "title": "", "text": f"[extraction error: {e2}]"}
-    if len(text) > max_chars:
-        text = text[:max_chars] + " …"
-    # Try to fill title if empty
-    if not title:
-        m = re.search(r"<title[^>]*>(.*?)</title>", html_content, flags=re.I|re.S)
-        if m:
-            title = _html.unescape(m.group(1).strip())
-    return {"url": url, "title": title or "", "text": text}
-@tool
-def wikipedia_lookup(query: str, sentences: int = 4) -> Dict[str, Any]:
-    """
-    Simple Wikipedia lookup. Returns {title, url, summary}.
-    """
-    try:
-        wikipedia.set_lang("en")
-        try:
-            title = wikipedia.search(query, results=1)[0]
-        except Exception as e:
-            return {"title":"", "url":"", "summary": f"[wikipedia search error: {e}]"}
-        try:
-            summary = wikipedia.summary(title, sentences=sentences, auto_suggest=False)
-            page = wikipedia.page(title, auto_suggest=False, preload=False)
-            return {"title": page.title, "url": page.url, "summary": summary}
-        except Exception as e:
-            return {"title": title, "url":"", "summary": f"[wikipedia fetch error: {e}]"}
-    except Exception as e:
-        return {"title":"", "url":"", "summary": f"[wikipedia import error: {e}]"}
-@tool
-def youtube_get_transcript(url_or_id: str, prefer_langs: List[str] | None = None) -> str:
-    """
-    Get YouTube transcript via API (no download). Returns plain text.
-    """
-    print('try to get youtube video transcript')
-    try:
-        prefer_langs = prefer_langs or ["en", "en-US", "en-GB", "auto"]
-        vid = url_or_id
-        print("vid: ", vid)
-        if "youtube.com" in url_or_id or "youtu.be" in url_or_id:
-            u = urlparse(url_or_id)
-            if u.netloc.endswith("youtu.be"):
-                vid = u.path.lstrip("/")
-            else:
-                vid = parse_qs(u.query).get("v", [""])[0]
-        trs_list = YouTubeTranscriptApi.list_transcripts(vid)
-        # choose first matching language
-        for lang in prefer_langs:
-            try:
-                trs = trs_list.find_transcript([lang])
-                chunks = trs.fetch()
-                print("transcript from youtube website?")
-                print(" ".join([c["text"] for c in chunks if c.get("text")]).strip())
-                return " ".join([c["text"] for c in chunks if c.get("text")]).strip()
-            except Exception:
-                continue
-        # fallback: first any transcript
-        trs = list(trs_list)[0]
-        chunks = trs.fetch()
-        print("transcript from youtube website?")
-        print(" ".join([c["text"] for c in chunks if c.get("text")]).strip())
-        return " ".join([c["text"] for c in chunks if c.get("text")]).strip()
-    except (TranscriptsDisabled, NoTranscriptFound):
-        return "[no captions available]"
-    except Exception as e:
-        return f"[youtube transcript error: {e}]"
-@tool
-def youtube_transcribe_audio(url: str, model_size: str = "base") -> str:
-    """
-    Download YouTube audio (yt-dlp) and transcribe with Whisper.
-    """
-    tmpdir = tempfile.mkdtemp(prefix="gaia_yt_")
-    outfile = os.path.join(tmpdir, "%(id)s.%(ext)s")
-    ydl_opts = {
-        "format": "bestaudio/best",
-        "outtmpl": outfile,
-        "quiet": True,
-        "no_warnings": True,
-        "noplaylist": True,
-    }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=True)
-            path = ydl.prepare_filename(info)
-        # convert & transcribe
-        wav = _convert_to_wav_mono16k(path)
-        txt = transcribe_audio.invoke({"path": wav, "model_size": model_size})
-        return txt
-    except Exception as e:
-        return f"[youtube download/transcribe error: {e}]"
 # ------------------------------- Nodes ------------------------------
 def check_attachment_node(state: AgentState) -> AgentState:
     """Check if there is attachment."""
@@ -506,6 +283,7 @@ def preprocess_node(state: AgentState) -> AgentState:
         try:
             if mime and mime.startswith("audio"):
                 print("mime start with audio")
                 # --- ASR ---
                 try:
                     wav = _convert_to_wav_mono16k(path)
@@ -574,7 +352,7 @@ def solve_multimodal_node(state: AgentState) -> AgentState:
     vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)  # vision-capable
     sys = SystemMessage(content=(
         "You solve GAIA tasks using the provided evidence and attached images.\n"
-        "Be precise, quote numbers/strings exactly. If uncertain, say so.\n"
         "Your answer to the GAIA tasks should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. If your answer only include a single word, make the first letter capital.\n" + end_instr
     ))
@@ -623,7 +401,7 @@ def solve_text_only_node(state: "AgentState") -> "AgentState":
     sys = SystemMessage(content=(
         "You solve GAIA tasks. Use careful step-by-step reasoning but keep it concise.\n"
-        "You can use the provided textual evidence if there is any. \n"
         "Your answer to the GAIA tasks should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. If your answer only include a single word, make the first letter capital.\n" + end_instr
     ))
@@ -649,7 +427,7 @@ def validate_format_node(state: AgentState) -> AgentState:
     emit = bool(state.get("emit_final_answer", True))
     txt = (state.get("answer") or "").strip()
     if not txt:
         if emit:
             state["answer"] = "No answer generated.\n\nfinal_answer: [NO_ANSWER]"
@@ -690,151 +468,13 @@ def has_images(state: AgentState) -> bool:
             return True
     return False
-# ==== CHANGED: fix return type Literal to match actual branch key ====
 def route_after_preprocess(state: AgentState) -> Literal["vision","text"]:
     return "vision" if has_images(state) else "text"
-# ==== NEW: Browsing router ====
-def needs_browsing(q: str) -> bool:
-    q = (q or "").lower()
-    hot = ["today","current","latest","price","How","who","where","what","How many",
-           "2023","2024","2025","news","wins","Which",
-           "http://","https://","wikipedia","youtube.com"]
-    # Only browse if we *also* have a search key, so the sample runs without keys.
-    return _has_search_key() and any(w in q for w in hot)
-# ==== NEW: Decide browse node ====
-def decide_browse_node(state: AgentState) -> AgentState:
-    print("enter decide_browse_node")
-    q = state.get("question", "")
-    urls = _extract_urls(q)
-    yt_urls = [u for u in urls if _is_youtube(u)]
-    # Save for later stages
-    state["question_urls"] = urls
-    state["question_youtube_urls"] = yt_urls
-    # Browse if:
-    #  - we have any YouTube links in the question (can handle w/o search key), OR
-    #  - the normal heuristic says we should browse (requires a search key)
-    state["use_browsing"] = bool(yt_urls) or needs_browsing(q)
-    return state
-def route_browse(state: AgentState) -> Literal["browse","skip"]:
-    return "browse" if state.get("use_browsing") else "skip"
-# ==== NEW: Search node ====
-def search_node(state: AgentState) -> AgentState:
-    print("enter search_node")
-    q = state.get("question","")
-    # Start with YouTube links found in the question
-    preseed = [{"title": "(from question)", "url": u, "snippet": ""}
-               for u in (state.get("question_youtube_urls") + state.get("question_urls") or [])]
-    # Do a web search only if keys are configured
-    hits = []
-    if _has_search_key():
-        hits = web_search.invoke({"query": q, "k": 6}) or []
-    # Optionally seed Wikipedia for short queries
-    if len(q.split()) <= 30:  #8
-        wiki = wikipedia_lookup.invoke({"query": q, "sentences": 4})
-        if (wiki.get("summary") or "").strip():
-            state.setdefault("evidence", []).append({
-                "kind": "doc_text",
-                "text": wiki["summary"],
-                "path": None,
-                "meta": {"source": "wikipedia", "title": wiki.get("title",""),
-                         "url": wiki.get("url",""), "mime":"text/plain"}
-            })
-    # Combine: question YouTube links first, then search hits
-    state["web_hits"] = preseed + hits
-    return state
-def _is_youtube(u: str) -> bool:
-    try:
-        net = urlparse(u).netloc.lower()
-        return ("youtube.com" in net) or ("youtu.be" in net)
-    except Exception:
-        return False
-def crawl_node(state: AgentState) -> AgentState:
-    print("enter crawl_node")
-    ev = list(state.get("evidence", []))
-    hits: List[Dict[str,str]] = state.get("web_hits", []) or []
-    print("hits: ", hits)
-    # choose top M distinct domains
-    def _domain(u: str) -> str:
-        try: return urlparse(u).netloc.lower().lstrip("www.")
-        except: return ""
-    seen_domains = set()
-    picked = []
-    for h in hits:
-        u = h.get("url","")
-        d = _domain(u)
-        if not u or not d:
-            continue
-        if d in seen_domains:
-            continue
-        seen_domains.add(d)
-        picked.append(h)
-        if len(picked) >= 4:
-            break
-    print("picked: ", picked)
-    # Fetch & extract
-    for h in picked:
-        u = h["url"]
-        print("url: ", u)
-        title = h.get("title","")
-        # Special-case YouTube
-        if _is_youtube(u):
-            print("is_youtube? ", _is_youtube(u))
-            cap = youtube_get_transcript.invoke({"url_or_id": u})
-            print('cap: ', cap)
-            if cap and not cap.startswith("[no captions"):
-                ev.append({"kind":"doc_text","text":cap,"path":None,
-                           "meta":{"source":"youtube","title": title, "url":u,"mime":"text/plain"}})
-                continue
-            # fallback: download+ASR (heavier)
-            cap2 = youtube_transcribe_audio.invoke({"url": u, "model_size":"base"})
-            ev.append({"kind":"audio_transcript","text":cap2,"path":None,
-                       "meta":{"source":"youtube","title": title, "url":u,"mime":"audio"}})
-            continue
-        out = fetch_url_text.invoke({"url": u, "max_chars": 12000})
-        text = out.get("text","") or ""
-        page_title = out.get("title","") or title
-        if not text:
-            continue
-        ev.append({
-            "kind": "doc_text",
-            "text": text,
-            "path": None,
-            "meta": {"source":"web", "title": page_title, "url": u, "mime":"text/html"}
-        })
-    state["evidence"] = ev
-    return state
 # ---------- Graph ----------
 # Build graph function
 def build_graph():
     g = StateGraph(AgentState)
-    # ==== NEW: browsing nodes ====
-    g.add_node("decide_browse", decide_browse_node)
-    g.add_node("search", search_node)
-    g.add_node("crawl", crawl_node)
-    # Existing nodes
     g.add_node("check_attachment", check_attachment_node)
     g.add_node("fetch", fetch_node)
     g.add_node("preprocess", preprocess_node)
@@ -843,15 +483,7 @@ def build_graph():
     g.add_node("validate", validate_format_node)
     # Start the edges
-    g.add_edge(START, "decide_browse")
-    # Browse or skip
-    g.add_conditional_edges("decide_browse", route_browse, {
-        "browse": "search",
-        "skip": "check_attachment"
-    })
-    g.add_edge("search", "crawl")
-    g.add_edge("crawl", "check_attachment")
     # Add conditional branching from check_attachment
     g.add_conditional_edges(
@@ -889,26 +521,18 @@ def build_graph():
 if __name__ == "__main__":
     task_id = '0001'
     task_q = 'Who is the current president of France'
-    # ==== CHANGED: make it a flat empty list (not `[[]]`)
-    attachment_urls: List[str] = []
-    sample: AgentState = {
         "task_id": task_id,
         "question": task_q,
-        "attachment_urls": attachment_urls,  # from GAIA sample
         "local_files": [],
         "evidence": [],
         "answer": None,
         "parsed_final_answer": None,
-        # Tip: set True to force a final_answer line for scoring
         "emit_final_answer": False,   # <<< pure output mode
-        # new optional fields:
-        "use_browsing": None,
-        "web_hits": None,
-        "question_urls": None,
-        "question_youtube_urls": None
     }
     agent_GAIA = build_graph()
     out = agent_GAIA.invoke(sample)
     print("---------------------------")
-    print(out["answer"])

+# agent_v6.py
 # Develop an AI agent with LangGraph and LangChain
 # to answer the questions in the "gaia-benchmark/GAIA" dataset.
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph, START, END
 # Optional: pdf parsing if GAIA sometimes includes PDFs
 try:
 # -------------- State -------------
 class EvidenceItem(TypedDict):
+    kind: Literal["audio_transcript","image_ocr","image_vqa","doc_text"]
     text: str
     path: Optional[str]
     meta: Dict[str, Any]
     answer: Optional[str]
     parsed_final_answer: Optional[str]
     emit_final_answer: bool  # <<< add this (default True if you want old behavior)
 # -------------- helpers ---------------
 def _filename_from_cd(cd: str) -> str | None:
         tag = f"{e.get('kind','?')}"
         if meta.get("mime"):
             tag += f"({meta['mime']})"
         chunks.append(f"[{i}:{tag}] {t}")
     out = "\n".join(chunks)
     return out if len(out) <= limit_chars else out[:limit_chars] + " …"
         raise RuntimeError(f"ffmpeg failed: {p.stderr[-500:]}")
     return out
 # ----------------------Tools ----------------------
 @tool
 def download_file(url: str, headers: dict | None = None, auth_token: str | None = None) -> str:
         out_dir = tempfile.mkdtemp(prefix="gaia_tmpdl_")
         out_path = os.path.join(out_dir, fname)
+        # # Write to colab folder
+        # out_dir: str | Path = "."
+        # out_path = Path(out_dir) / fname
         print("out_path:", out_path)
         with open(out_path, "wb") as f:
         return out_path
 @tool
 def transcribe_audio(path: str, model_size: str = "base") -> str:
     """
     Returns the transcript text; raises on failure (caller handles).
     """
     print("running transcribe_audio")
     try:
+        model = whisper.load_model(model_size)
+        result = model.transcribe(path)
         return (result.get("text") or "").strip()
     except Exception as e:
         raise RuntimeError(f"Whisper error: {e}")
 @tool
 def ocr_image(path: str) -> str:
     return text.strip()
 # ------------------------------- Nodes ------------------------------
 def check_attachment_node(state: AgentState) -> AgentState:
     """Check if there is attachment."""
         try:
             if mime and mime.startswith("audio"):
                 print("mime start with audio")
+                # print("path: ", path)
                 # --- ASR ---
                 try:
                     wav = _convert_to_wav_mono16k(path)
     vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)  # vision-capable
     sys = SystemMessage(content=(
         "You solve GAIA tasks using the provided evidence and attached images.\n"
+        "Be precise, quote numbers/strings exactly. If uncertain, say so.\n"
         "Your answer to the GAIA tasks should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. If your answer only include a single word, make the first letter capital.\n" + end_instr
     ))
     sys = SystemMessage(content=(
         "You solve GAIA tasks. Use careful step-by-step reasoning but keep it concise.\n"
+        "You can use the provided textual evidence if there is any. \n"
         "Your answer to the GAIA tasks should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. If your answer only include a single word, make the first letter capital.\n" + end_instr
     ))
     emit = bool(state.get("emit_final_answer", True))
     txt = (state.get("answer") or "").strip()
     if not txt:
         if emit:
             state["answer"] = "No answer generated.\n\nfinal_answer: [NO_ANSWER]"
             return True
     return False
 def route_after_preprocess(state: AgentState) -> Literal["vision","text"]:
     return "vision" if has_images(state) else "text"
 # ---------- Graph ----------
 # Build graph function
 def build_graph():
     g = StateGraph(AgentState)
     g.add_node("check_attachment", check_attachment_node)
     g.add_node("fetch", fetch_node)
     g.add_node("preprocess", preprocess_node)
     g.add_node("validate", validate_format_node)
     # Start the edges
+    g.add_edge(START, "check_attachment")
     # Add conditional branching from check_attachment
     g.add_conditional_edges(
 if __name__ == "__main__":
     task_id = '0001'
     task_q = 'Who is the current president of France'
+    task_url = []
+    sample = {
         "task_id": task_id,
         "question": task_q,
+        "attachment_urls": [task_url],  # from GAIA sample
         "local_files": [],
         "evidence": [],
         "answer": None,
         "parsed_final_answer": None,
         "emit_final_answer": False,   # <<< pure output mode
     }
     agent_GAIA = build_graph()
     out = agent_GAIA.invoke(sample)
     print("---------------------------")
+    print(out["answer"])

app.py CHANGED Viewed

@@ -77,11 +77,6 @@ def run_and_submit_all( profile: bool = True):
             "answer": None,
             "parsed_final_answer": None,
             "emit_final_answer": False,   # <<< pure output mode
-            # new optional fields:
-            "use_browsing": None,
-            "web_hits": None,
-            "question_urls": None,
-            "question_youtube_urls": None
         }
         if not task_id or question_text is None:

             "answer": None,
             "parsed_final_answer": None,
             "emit_final_answer": False,   # <<< pure output mode
         }
         if not task_id or question_text is None:

requirements.txt CHANGED Viewed

@@ -8,11 +8,4 @@ langchain-community
 ddgs
 openai-whisper
 pytesseract
-ffmpeg
-tavily-python
-trafilatura
-readability-lxml
-youtube-transcript-api
-yt-dlp
-wikipedia
-serpapi

 ddgs
 openai-whisper
 pytesseract
+ffmpeg