Final_Assignment_debug

Sleeping

App Files Files Community

shan gao commited on Sep 23, 2025

Commit

e9b8f0b

1 Parent(s): 5c7c966

change

Browse files

Files changed (1) hide show

agent.py +15 -11

agent.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# v8
 # Develop an AI agent with LangGraph and LangChain
 # to answer the questions in the "gaia-benchmark/GAIA" dataset.
@@ -366,6 +365,7 @@ def wikipedia_lookup(query: str, sentences: int = 4) -> Dict[str, Any]:
 def youtube_get_transcript(url_or_id: str, prefer_langs: List[str] = ["en"] ) -> str:
     """
     Get YouTube transcript via API (no download). Returns plain text.
     """
     print('try to get youtube video transcript')
     try:
@@ -718,10 +718,17 @@ def decide_browse_node(state: AgentState) -> AgentState:
     state["use_browsing"] = bool(yt_urls) or needs_browsing(q)
     return state
 def route_browse(state: AgentState) -> Literal["browse","skip"]:
     return "browse" if state.get("use_browsing") else "skip"
 # ==== NEW: Search node ====
 def search_node(state: AgentState) -> AgentState:
     print("enter search_node")
@@ -736,6 +743,10 @@ def search_node(state: AgentState) -> AgentState:
     if _has_search_key():
         hits = web_search.invoke({"query": q, "k": 6}) or []
     # Optionally seed Wikipedia for short queries
     if len(q.split()) <= 30:  #8
         wiki = wikipedia_lookup.invoke({"query": q, "sentences": 4})
@@ -752,14 +763,6 @@ def search_node(state: AgentState) -> AgentState:
     state["web_hits"] = preseed + hits
     return state
-def _is_youtube(u: str) -> bool:
-    try:
-        net = urlparse(u).netloc.lower()
-        return ("youtube.com" in net) or ("youtu.be" in net)
-    except Exception:
-        return False
 def crawl_node(state: AgentState) -> AgentState:
     print("enter crawl_node")
     ev = list(state.get("evidence", []))
@@ -795,7 +798,8 @@ def crawl_node(state: AgentState) -> AgentState:
         # Special-case YouTube
         if _is_youtube(u):
             print("is_youtube? ", _is_youtube(u))
-            cap = youtube_get_transcript.invoke({"url_or_id": u})
             print('caption: ', cap)
             if cap and not cap.startswith("[no captions"):
                 ev.append({"kind":"doc_text","text":cap,"path":None,

 # Develop an AI agent with LangGraph and LangChain
 # to answer the questions in the "gaia-benchmark/GAIA" dataset.
 def youtube_get_transcript(url_or_id: str, prefer_langs: List[str] = ["en"] ) -> str:
     """
     Get YouTube transcript via API (no download). Returns plain text.
+    If request too many times, will be blocked by youtube and lead to Agent error.
     """
     print('try to get youtube video transcript')
     try:
     state["use_browsing"] = bool(yt_urls) or needs_browsing(q)
     return state
 def route_browse(state: AgentState) -> Literal["browse","skip"]:
     return "browse" if state.get("use_browsing") else "skip"
+def _is_youtube(u: str) -> bool:
+    try:
+        net = urlparse(u).netloc.lower()
+        return ("youtube.com" in net) or ("youtu.be" in net)
+    except Exception:
+        return False
 # ==== NEW: Search node ====
 def search_node(state: AgentState) -> AgentState:
     print("enter search_node")
     if _has_search_key():
         hits = web_search.invoke({"query": q, "k": 6}) or []
+    # Create a new list with non-YouTube links in the search results
+    if len(hits) > 0:
+        hits = [hit for hit in hits if not _is_youtube(hit["url"])]
     # Optionally seed Wikipedia for short queries
     if len(q.split()) <= 30:  #8
         wiki = wikipedia_lookup.invoke({"query": q, "sentences": 4})
     state["web_hits"] = preseed + hits
     return state
 def crawl_node(state: AgentState) -> AgentState:
     print("enter crawl_node")
     ev = list(state.get("evidence", []))
         # Special-case YouTube
         if _is_youtube(u):
             print("is_youtube? ", _is_youtube(u))
+            # cap = youtube_get_transcript.invoke({"url_or_id": u})  # blocked by youtube
+            cap = "[no captions available]"
             print('caption: ', cap)
             if cap and not cap.startswith("[no captions"):
                 ev.append({"kind":"doc_text","text":cap,"path":None,