Final_Assignment_debug

Sleeping

App Files Files Community

shan gao commited on Sep 20, 2025

Commit

15f210e

1 Parent(s): b9de22e

change

Browse files

Files changed (3) hide show

agent.py +395 -19
app.py +5 -0
requirements.txt +8 -1

agent.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# agent_v6.py
 # Develop an AI agent with LangGraph and LangChain
 # to answer the questions in the "gaia-benchmark/GAIA" dataset.
@@ -14,7 +13,24 @@ from langchain_core.tools import tool
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph, START, END
 # Optional: pdf parsing if GAIA sometimes includes PDFs
 try:
@@ -26,7 +42,8 @@ except Exception:
 # -------------- State -------------
 class EvidenceItem(TypedDict):
-    kind: Literal["audio_transcript","image_ocr","image_vqa","doc_text"]
     text: str
     path: Optional[str]
     meta: Dict[str, Any]
@@ -40,6 +57,12 @@ class AgentState(TypedDict):
     answer: Optional[str]
     parsed_final_answer: Optional[str]
     emit_final_answer: bool  # <<< add this (default True if you want old behavior)
 # -------------- helpers ---------------
 def _filename_from_cd(cd: str) -> str | None:
@@ -75,6 +98,10 @@ def _summarize_evidence(evidence: List[Dict[str, Any]], limit_chars: int = 6000)
         tag = f"{e.get('kind','?')}"
         if meta.get("mime"):
             tag += f"({meta['mime']})"
         chunks.append(f"[{i}:{tag}] {t}")
     out = "\n".join(chunks)
     return out if len(out) <= limit_chars else out[:limit_chars] + " …"
@@ -129,6 +156,13 @@ def _convert_to_wav_mono16k(src_path: str) -> str:
         raise RuntimeError(f"ffmpeg failed: {p.stderr[-500:]}")
     return out
 # ----------------------Tools ----------------------
 @tool
 def download_file(url: str, headers: dict | None = None, auth_token: str | None = None) -> str:
@@ -163,10 +197,6 @@ def download_file(url: str, headers: dict | None = None, auth_token: str | None
         out_dir = tempfile.mkdtemp(prefix="gaia_tmpdl_")
         out_path = os.path.join(out_dir, fname)
-        # # Write to colab folder
-        # out_dir: str | Path = "."
-        # out_path = Path(out_dir) / fname
         print("out_path:", out_path)
         with open(out_path, "wb") as f:
@@ -177,6 +207,9 @@ def download_file(url: str, headers: dict | None = None, auth_token: str | None
         return out_path
 @tool
 def transcribe_audio(path: str, model_size: str = "base") -> str:
     """
@@ -184,13 +217,15 @@ def transcribe_audio(path: str, model_size: str = "base") -> str:
     Returns the transcript text; raises on failure (caller handles).
     """
     print("running transcribe_audio")
     try:
-        model = whisper.load_model(model_size)
-        result = model.transcribe(path)
         return (result.get("text") or "").strip()
     except Exception as e:
         raise RuntimeError(f"Whisper error: {e}")
 @tool
 def ocr_image(path: str) -> str:
@@ -202,6 +237,194 @@ def ocr_image(path: str) -> str:
     return text.strip()
 # ------------------------------- Nodes ------------------------------
 def check_attachment_node(state: AgentState) -> AgentState:
     """Check if there is attachment."""
@@ -283,7 +506,6 @@ def preprocess_node(state: AgentState) -> AgentState:
         try:
             if mime and mime.startswith("audio"):
                 print("mime start with audio")
-                # print("path: ", path)
                 # --- ASR ---
                 try:
                     wav = _convert_to_wav_mono16k(path)
@@ -352,7 +574,7 @@ def solve_multimodal_node(state: AgentState) -> AgentState:
     vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)  # vision-capable
     sys = SystemMessage(content=(
         "You solve GAIA tasks using the provided evidence and attached images.\n"
-        "Be precise, quote numbers/strings exactly. If uncertain, say so.\n"
         "Your answer to the GAIA tasks should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. If your answer only include a single word, make the first letter capital.\n" + end_instr
     ))
@@ -401,7 +623,7 @@ def solve_text_only_node(state: "AgentState") -> "AgentState":
     sys = SystemMessage(content=(
         "You solve GAIA tasks. Use careful step-by-step reasoning but keep it concise.\n"
-        "You can use the provided textual evidence if there is any. \n"
         "Your answer to the GAIA tasks should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. If your answer only include a single word, make the first letter capital.\n" + end_instr
     ))
@@ -427,7 +649,7 @@ def validate_format_node(state: AgentState) -> AgentState:
     emit = bool(state.get("emit_final_answer", True))
     txt = (state.get("answer") or "").strip()
     if not txt:
         if emit:
             state["answer"] = "No answer generated.\n\nfinal_answer: [NO_ANSWER]"
@@ -468,13 +690,151 @@ def has_images(state: AgentState) -> bool:
             return True
     return False
 def route_after_preprocess(state: AgentState) -> Literal["vision","text"]:
     return "vision" if has_images(state) else "text"
 # ---------- Graph ----------
 # Build graph function
 def build_graph():
     g = StateGraph(AgentState)
     g.add_node("check_attachment", check_attachment_node)
     g.add_node("fetch", fetch_node)
     g.add_node("preprocess", preprocess_node)
@@ -483,7 +843,15 @@ def build_graph():
     g.add_node("validate", validate_format_node)
     # Start the edges
-    g.add_edge(START, "check_attachment")
     # Add conditional branching from check_attachment
     g.add_conditional_edges(
@@ -521,18 +889,26 @@ def build_graph():
 if __name__ == "__main__":
     task_id = '0001'
     task_q = 'Who is the current president of France'
-    task_url = []
-    sample = {
         "task_id": task_id,
         "question": task_q,
-        "attachment_urls": [task_url],  # from GAIA sample
         "local_files": [],
         "evidence": [],
         "answer": None,
         "parsed_final_answer": None,
         "emit_final_answer": False,   # <<< pure output mode
     }
     agent_GAIA = build_graph()
     out = agent_GAIA.invoke(sample)
     print("---------------------------")
-    print(out["answer"])

 # Develop an AI agent with LangGraph and LangChain
 # to answer the questions in the "gaia-benchmark/GAIA" dataset.
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph, START, END
+from tavily import TavilyClient
+import serpapi
+import trafilatura
+from readability import Document
+import html as _html
+import wikipedia
+from urllib.parse import parse_qs
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+import yt_dlp
+# ==== NEW: (optional) tiny helpers used by browsing nodes ====
+def _has_search_key() -> bool:
+    """Return True if any supported search backend is configured."""
+    return bool(
+        os.getenv("TAVILY_API_KEY")
+        or os.getenv("SERPAPI_API_KEY")
+        or (os.getenv("GOOGLE_API_KEY") and os.getenv("GOOGLE_CSE_ID"))
+    )
 # Optional: pdf parsing if GAIA sometimes includes PDFs
 try:
 # -------------- State -------------
 class EvidenceItem(TypedDict):
+    # ==== CHANGED: expanded allowed kinds to match actual usage paths ====
+    kind: Literal["audio_transcript","image_ocr","image_vqa","doc_text","unknown_file","preprocess_error"]
     text: str
     path: Optional[str]
     meta: Dict[str, Any]
     answer: Optional[str]
     parsed_final_answer: Optional[str]
     emit_final_answer: bool  # <<< add this (default True if you want old behavior)
+    # ==== NEW: state used by browse pipeline (optional) ====
+    use_browsing: Optional[bool]
+    web_hits: Optional[List[Dict[str, str]]]
+    # ==== NEW: urls found directly in the question ====
+    question_urls: Optional[List[str]]
+    question_youtube_urls: Optional[List[str]]
 # -------------- helpers ---------------
 def _filename_from_cd(cd: str) -> str | None:
         tag = f"{e.get('kind','?')}"
         if meta.get("mime"):
             tag += f"({meta['mime']})"
+        if meta.get("title"):
+            tag += f"[{meta['title']}]"
+        if meta.get("url"):
+            tag += f"<{meta['url']}>"
         chunks.append(f"[{i}:{tag}] {t}")
     out = "\n".join(chunks)
     return out if len(out) <= limit_chars else out[:limit_chars] + " …"
         raise RuntimeError(f"ffmpeg failed: {p.stderr[-500:]}")
     return out
+# ==== NEW: URL helpers ====
+_URL_RE = re.compile(r'https?://\S+')
+def _extract_urls(text: str) -> List[str]:
+    return _URL_RE.findall(text or "")
 # ----------------------Tools ----------------------
 @tool
 def download_file(url: str, headers: dict | None = None, auth_token: str | None = None) -> str:
         out_dir = tempfile.mkdtemp(prefix="gaia_tmpdl_")
         out_path = os.path.join(out_dir, fname)
         print("out_path:", out_path)
         with open(out_path, "wb") as f:
         return out_path
+# ==== NEW: cache Whisper model so we don't reload each call ====
+_WHISPER = None
 @tool
 def transcribe_audio(path: str, model_size: str = "base") -> str:
     """
     Returns the transcript text; raises on failure (caller handles).
     """
     print("running transcribe_audio")
+    global _WHISPER
     try:
+        if _WHISPER is None:
+            _WHISPER = whisper.load_model(model_size)
+        result = _WHISPER.transcribe(path)
         return (result.get("text") or "").strip()
     except Exception as e:
         raise RuntimeError(f"Whisper error: {e}")
 @tool
 def ocr_image(path: str) -> str:
     return text.strip()
+# ==== NEW: WEB / WIKI / YOUTUBE TOOLS =========================================
+# Choose your search backend (Tavily simplest). Set env var before use.
+_USE_TAVILY = False  # flip to False to use SerpAPI example
+if _USE_TAVILY:
+    @tool
+    def web_search(query: str, k: int = 6) -> List[Dict[str, str]]:
+        """
+        Web search via Tavily. Returns a list of {title, url, snippet}.
+        Requires TAVILY_API_KEY.
+        """
+        try:
+            tv = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
+            res = tv.search(
+                query=query,
+                search_depth="advanced",
+                max_results=k,
+                include_answer=False,
+                include_images=False,
+            )
+            out = []
+            for r in res.get("results", []):
+                out.append({
+                    "title": r.get("title",""),
+                    "url": r.get("url",""),
+                    "snippet": (r.get("content","") or "")[:400]
+                })
+            return out
+        except Exception as e:
+            return [{"title":"", "url":"", "snippet": f"[search error: {e}]"}]
+else:
+    @tool
+    def web_search(query: str, k: int = 6) -> List[Dict[str, str]]:
+        """
+        Web search via SerpAPI. Returns a list of {title, url, snippet}.
+        Requires SERPAPI_API_KEY.
+        """
+        try:
+            params = {"engine":"google", "q":query, "num":k, "api_key":os.getenv("SERPAPI_API_KEY")}
+            search = serpapi.search(params)
+            # results = search.get_dict()
+            results = search
+            items = results.get("organic_results", [])
+            out = []
+            for it in items[:k]:
+                out.append({
+                    "title": it.get("title",""),
+                    "url": it.get("link",""),
+                    "snippet": (it.get("snippet","") or "")[:400]
+                })
+            return out
+        except Exception as e:
+            return [{"title":"", "url":"", "snippet": f"[search error: {e}]"}]
+@tool
+def fetch_url_text(url: str, max_chars: int = 12000, timeout: int = 30) -> Dict[str, Any]:
+    """
+    Download a web page and extract main article text using trafilatura,
+    with a readability-lxml fallback. Returns {url, title, text}.
+    """
+    sess = requests.Session()
+    headers = {
+        "User-Agent": "gaia-agent/1.0 (+https://example.org)",
+        "Accept": "text/html,*/*;q=0.8",
+    }
+    try:
+        r = sess.get(url, headers=headers, timeout=timeout)
+        r.raise_for_status()
+        html_content = r.text
+    except Exception as e:
+        return {"url": url, "title": "", "text": f"[fetch error: {e}]"}
+    # 1) try trafilatura (best for boilerplate removal)
+    try:
+        downloaded = trafilatura.extract(html_content, include_comments=False, include_tables=False, url=url)
+        if downloaded and len(downloaded) > 200:
+            text = downloaded
+            title = ""
+        else:
+            raise ValueError("trafilatura extraction too short")
+    except Exception:
+        # 2) fallback: readability
+        try:
+            doc = Document(html_content)
+            title = doc.short_title() or ""
+            text = doc.summary(html_partial=False)
+            # rudimentary HTML strip
+            text = re.sub(r"<[^>]+>", " ", text)
+            text = re.sub(r"\s+", " ", text).strip()
+        except Exception as e2:
+            return {"url": url, "title": "", "text": f"[extraction error: {e2}]"}
+    if len(text) > max_chars:
+        text = text[:max_chars] + " …"
+    # Try to fill title if empty
+    if not title:
+        m = re.search(r"<title[^>]*>(.*?)</title>", html_content, flags=re.I|re.S)
+        if m:
+            title = _html.unescape(m.group(1).strip())
+    return {"url": url, "title": title or "", "text": text}
+@tool
+def wikipedia_lookup(query: str, sentences: int = 4) -> Dict[str, Any]:
+    """
+    Simple Wikipedia lookup. Returns {title, url, summary}.
+    """
+    try:
+        wikipedia.set_lang("en")
+        try:
+            title = wikipedia.search(query, results=1)[0]
+        except Exception as e:
+            return {"title":"", "url":"", "summary": f"[wikipedia search error: {e}]"}
+        try:
+            summary = wikipedia.summary(title, sentences=sentences, auto_suggest=False)
+            page = wikipedia.page(title, auto_suggest=False, preload=False)
+            return {"title": page.title, "url": page.url, "summary": summary}
+        except Exception as e:
+            return {"title": title, "url":"", "summary": f"[wikipedia fetch error: {e}]"}
+    except Exception as e:
+        return {"title":"", "url":"", "summary": f"[wikipedia import error: {e}]"}
+@tool
+def youtube_get_transcript(url_or_id: str, prefer_langs: List[str] | None = None) -> str:
+    """
+    Get YouTube transcript via API (no download). Returns plain text.
+    """
+    print('try to get youtube video transcript')
+    try:
+        prefer_langs = prefer_langs or ["en", "en-US", "en-GB", "auto"]
+        vid = url_or_id
+        print("vid: ", vid)
+        if "youtube.com" in url_or_id or "youtu.be" in url_or_id:
+            u = urlparse(url_or_id)
+            if u.netloc.endswith("youtu.be"):
+                vid = u.path.lstrip("/")
+            else:
+                vid = parse_qs(u.query).get("v", [""])[0]
+        trs_list = YouTubeTranscriptApi.list_transcripts(vid)
+        # choose first matching language
+        for lang in prefer_langs:
+            try:
+                trs = trs_list.find_transcript([lang])
+                chunks = trs.fetch()
+                print("transcript from youtube website?")
+                print(" ".join([c["text"] for c in chunks if c.get("text")]).strip())
+                return " ".join([c["text"] for c in chunks if c.get("text")]).strip()
+            except Exception:
+                continue
+        # fallback: first any transcript
+        trs = list(trs_list)[0]
+        chunks = trs.fetch()
+        print("transcript from youtube website?")
+        print(" ".join([c["text"] for c in chunks if c.get("text")]).strip())
+        return " ".join([c["text"] for c in chunks if c.get("text")]).strip()
+    except (TranscriptsDisabled, NoTranscriptFound):
+        return "[no captions available]"
+    except Exception as e:
+        return f"[youtube transcript error: {e}]"
+@tool
+def youtube_transcribe_audio(url: str, model_size: str = "base") -> str:
+    """
+    Download YouTube audio (yt-dlp) and transcribe with Whisper.
+    """
+    tmpdir = tempfile.mkdtemp(prefix="gaia_yt_")
+    outfile = os.path.join(tmpdir, "%(id)s.%(ext)s")
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "outtmpl": outfile,
+        "quiet": True,
+        "no_warnings": True,
+        "noplaylist": True,
+    }
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            path = ydl.prepare_filename(info)
+        # convert & transcribe
+        wav = _convert_to_wav_mono16k(path)
+        txt = transcribe_audio.invoke({"path": wav, "model_size": model_size})
+        return txt
+    except Exception as e:
+        return f"[youtube download/transcribe error: {e}]"
 # ------------------------------- Nodes ------------------------------
 def check_attachment_node(state: AgentState) -> AgentState:
     """Check if there is attachment."""
         try:
             if mime and mime.startswith("audio"):
                 print("mime start with audio")
                 # --- ASR ---
                 try:
                     wav = _convert_to_wav_mono16k(path)
     vision_llm = ChatOpenAI(model="gpt-4o", temperature=0)  # vision-capable
     sys = SystemMessage(content=(
         "You solve GAIA tasks using the provided evidence and attached images.\n"
+        "Be precise, quote numbers/strings exactly. If uncertain, say so.\n"
         "Your answer to the GAIA tasks should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. If your answer only include a single word, make the first letter capital.\n" + end_instr
     ))
     sys = SystemMessage(content=(
         "You solve GAIA tasks. Use careful step-by-step reasoning but keep it concise.\n"
+        "You can use the provided textual evidence if there is any. \n"
         "Your answer to the GAIA tasks should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. If your answer only include a single word, make the first letter capital.\n" + end_instr
     ))
     emit = bool(state.get("emit_final_answer", True))
     txt = (state.get("answer") or "").strip()
     if not txt:
         if emit:
             state["answer"] = "No answer generated.\n\nfinal_answer: [NO_ANSWER]"
             return True
     return False
+# ==== CHANGED: fix return type Literal to match actual branch key ====
 def route_after_preprocess(state: AgentState) -> Literal["vision","text"]:
     return "vision" if has_images(state) else "text"
+# ==== NEW: Browsing router ====
+def needs_browsing(q: str) -> bool:
+    q = (q or "").lower()
+    hot = ["today","current","latest","price","How","who","where","what","How many",
+           "2023","2024","2025","news","wins","Which",
+           "http://","https://","wikipedia","youtube.com"]
+    # Only browse if we *also* have a search key, so the sample runs without keys.
+    return _has_search_key() and any(w in q for w in hot)
+# ==== NEW: Decide browse node ====
+def decide_browse_node(state: AgentState) -> AgentState:
+    print("enter decide_browse_node")
+    q = state.get("question", "")
+    urls = _extract_urls(q)
+    yt_urls = [u for u in urls if _is_youtube(u)]
+    # Save for later stages
+    state["question_urls"] = urls
+    state["question_youtube_urls"] = yt_urls
+    # Browse if:
+    #  - we have any YouTube links in the question (can handle w/o search key), OR
+    #  - the normal heuristic says we should browse (requires a search key)
+    state["use_browsing"] = bool(yt_urls) or needs_browsing(q)
+    return state
+def route_browse(state: AgentState) -> Literal["browse","skip"]:
+    return "browse" if state.get("use_browsing") else "skip"
+# ==== NEW: Search node ====
+def search_node(state: AgentState) -> AgentState:
+    print("enter search_node")
+    q = state.get("question","")
+    # Start with YouTube links found in the question
+    preseed = [{"title": "(from question)", "url": u, "snippet": ""}
+               for u in (state.get("question_youtube_urls") + state.get("question_urls") or [])]
+    # Do a web search only if keys are configured
+    hits = []
+    if _has_search_key():
+        hits = web_search.invoke({"query": q, "k": 6}) or []
+    # Optionally seed Wikipedia for short queries
+    if len(q.split()) <= 30:  #8
+        wiki = wikipedia_lookup.invoke({"query": q, "sentences": 4})
+        if (wiki.get("summary") or "").strip():
+            state.setdefault("evidence", []).append({
+                "kind": "doc_text",
+                "text": wiki["summary"],
+                "path": None,
+                "meta": {"source": "wikipedia", "title": wiki.get("title",""),
+                         "url": wiki.get("url",""), "mime":"text/plain"}
+            })
+    # Combine: question YouTube links first, then search hits
+    state["web_hits"] = preseed + hits
+    return state
+def _is_youtube(u: str) -> bool:
+    try:
+        net = urlparse(u).netloc.lower()
+        return ("youtube.com" in net) or ("youtu.be" in net)
+    except Exception:
+        return False
+def crawl_node(state: AgentState) -> AgentState:
+    print("enter crawl_node")
+    ev = list(state.get("evidence", []))
+    hits: List[Dict[str,str]] = state.get("web_hits", []) or []
+    print("hits: ", hits)
+    # choose top M distinct domains
+    def _domain(u: str) -> str:
+        try: return urlparse(u).netloc.lower().lstrip("www.")
+        except: return ""
+    seen_domains = set()
+    picked = []
+    for h in hits:
+        u = h.get("url","")
+        d = _domain(u)
+        if not u or not d:
+            continue
+        if d in seen_domains:
+            continue
+        seen_domains.add(d)
+        picked.append(h)
+        if len(picked) >= 4:
+            break
+    print("picked: ", picked)
+    # Fetch & extract
+    for h in picked:
+        u = h["url"]
+        print("url: ", u)
+        title = h.get("title","")
+        # Special-case YouTube
+        if _is_youtube(u):
+            print("is_youtube? ", _is_youtube(u))
+            cap = youtube_get_transcript.invoke({"url_or_id": u})
+            print('cap: ', cap)
+            if cap and not cap.startswith("[no captions"):
+                ev.append({"kind":"doc_text","text":cap,"path":None,
+                           "meta":{"source":"youtube","title": title, "url":u,"mime":"text/plain"}})
+                continue
+            # fallback: download+ASR (heavier)
+            cap2 = youtube_transcribe_audio.invoke({"url": u, "model_size":"base"})
+            ev.append({"kind":"audio_transcript","text":cap2,"path":None,
+                       "meta":{"source":"youtube","title": title, "url":u,"mime":"audio"}})
+            continue
+        out = fetch_url_text.invoke({"url": u, "max_chars": 12000})
+        text = out.get("text","") or ""
+        page_title = out.get("title","") or title
+        if not text:
+            continue
+        ev.append({
+            "kind": "doc_text",
+            "text": text,
+            "path": None,
+            "meta": {"source":"web", "title": page_title, "url": u, "mime":"text/html"}
+        })
+    state["evidence"] = ev
+    return state
 # ---------- Graph ----------
 # Build graph function
 def build_graph():
     g = StateGraph(AgentState)
+    # ==== NEW: browsing nodes ====
+    g.add_node("decide_browse", decide_browse_node)
+    g.add_node("search", search_node)
+    g.add_node("crawl", crawl_node)
+    # Existing nodes
     g.add_node("check_attachment", check_attachment_node)
     g.add_node("fetch", fetch_node)
     g.add_node("preprocess", preprocess_node)
     g.add_node("validate", validate_format_node)
     # Start the edges
+    g.add_edge(START, "decide_browse")
+    # Browse or skip
+    g.add_conditional_edges("decide_browse", route_browse, {
+        "browse": "search",
+        "skip": "check_attachment"
+    })
+    g.add_edge("search", "crawl")
+    g.add_edge("crawl", "check_attachment")
     # Add conditional branching from check_attachment
     g.add_conditional_edges(
 if __name__ == "__main__":
     task_id = '0001'
     task_q = 'Who is the current president of France'
+    # ==== CHANGED: make it a flat empty list (not `[[]]`)
+    attachment_urls: List[str] = []
+    sample: AgentState = {
         "task_id": task_id,
         "question": task_q,
+        "attachment_urls": attachment_urls,  # from GAIA sample
         "local_files": [],
         "evidence": [],
         "answer": None,
         "parsed_final_answer": None,
+        # Tip: set True to force a final_answer line for scoring
         "emit_final_answer": False,   # <<< pure output mode
+        # new optional fields:
+        "use_browsing": None,
+        "web_hits": None,
+        "question_urls": None,
+        "question_youtube_urls": None
     }
     agent_GAIA = build_graph()
     out = agent_GAIA.invoke(sample)
     print("---------------------------")
+    print(out["answer"])

app.py CHANGED Viewed

@@ -77,6 +77,11 @@ def run_and_submit_all( profile: bool = True):
             "answer": None,
             "parsed_final_answer": None,
             "emit_final_answer": False,   # <<< pure output mode
         }
         if not task_id or question_text is None:

             "answer": None,
             "parsed_final_answer": None,
             "emit_final_answer": False,   # <<< pure output mode
+            # new optional fields:
+            "use_browsing": None,
+            "web_hits": None,
+            "question_urls": None,
+            "question_youtube_urls": None
         }
         if not task_id or question_text is None:

requirements.txt CHANGED Viewed

@@ -8,4 +8,11 @@ langchain-community
 ddgs
 openai-whisper
 pytesseract
-ffmpeg

 ddgs
 openai-whisper
 pytesseract
+ffmpeg
+tavily-python
+trafilatura
+readability-lxml
+youtube-transcript-api
+yt-dlp
+wikipedia
+serpapi