Final_Assignment

Sleeping

App Files Files Community

BiGuan commited on 27 days ago

Commit

951a5f7

verified ·

1 Parent(s): 72161cf

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -18

app.py CHANGED Viewed

@@ -35,6 +35,7 @@ QWEN_MODEL = "qwen3.5-35b-a3b"
 # 进度监控器
 # =============================================================================
 class ProgressMonitor:
     def __init__(self):
         self.current = 0
         self.total = 0
@@ -81,6 +82,7 @@ class ProgressMonitor:
 # Qwen LLM 封装
 # =============================================================================
 class QwenLLM:
     def __init__(self, model=QWEN_MODEL):
         self.model = model
         self.api_key = AGICTO_API_KEY
@@ -185,7 +187,7 @@ class QwenLLM:
         return formatted
 # =============================================================================
-# 工具定义（所有工具均附带 description）
 # =============================================================================
 api_url_tasks = DEFAULT_API_URL
@@ -195,6 +197,7 @@ def _get_api_base():
         base = base[:-3]
     return base
 @tool(description="搜索互联网信息，返回相关摘要。")
 def web_search(query: str) -> str:
     try:
@@ -312,10 +315,54 @@ def download_file_for_task(task_id: str) -> str:
             os.unlink(temp_path)
             return result
         else:
             return resp.text[:4000]
     except Exception as e:
         return f"文件下载失败: {e}"
 # =============================================================================
 # LangGraph 状态与节点
 # =============================================================================
@@ -323,9 +370,20 @@ class AgentState(TypedDict):
     messages: Annotated[Sequence[BaseMessage], operator.add]
     final_answer: str
     task_id: str
-    tool_attempts: int   # 已执行工具调用次数
-tools = [web_search, web_scraper, calculator, analyze_image, transcribe_audio, get_youtube_transcript, download_file_for_task]
 tool_node = ToolNode(tools)
 llm = QwenLLM()
 functions = [convert_to_openai_function(t) for t in tools]
@@ -334,9 +392,13 @@ llm_with_tools = llm.bind_functions(functions)
 def agent_node(state: AgentState) -> dict:
     messages = state["messages"]
     task_id = state.get("task_id", "")
-    sys_prompt = f"""You are a helpful assistant answering GAIA Level 1 questions.
-IMPORTANT: You MUST use at least one tool (e.g., web_search, web_scraper, download_file_for_task) to verify or retrieve information, even if you think you already know the answer.
-When you have the final answer, output only the answer string, without any extra text or "FINAL ANSWER:".
 Current task ID: {task_id}. If the question requires a file, use download_file_for_task with task_id="{task_id}"."""
     full = [SystemMessage(content=sys_prompt)] + list(messages)
     response = llm_with_tools.invoke(full)
@@ -346,28 +408,28 @@ def should_continue(state: AgentState) -> str:
     messages = state["messages"]
     last = messages[-1]
     tool_attempts = state.get("tool_attempts", 0)
-    MAX_TOOL_CALLS = 5
-    # 超过最大调用次数，强制结束
     if tool_attempts >= MAX_TOOL_CALLS:
         return "finish"
-    # 如果 LLM 请求了工具调用，允许执行
     if hasattr(last, "additional_kwargs") and "function_call" in last.additional_kwargs:
         return "tools"
-    # 尚未调用过任何工具？强制要求使用工具
     tool_msg_count = sum(1 for m in messages if isinstance(m, ToolMessage))
     if tool_msg_count == 0:
         return "force_tool"
-    # 已经用过工具，可以结束
     return "finish"
 def force_tool_node(state: AgentState) -> dict:
     new_msg = HumanMessage(
-        content="You have not used any tools yet. Please use at least one tool to find or verify the answer. "
-                "Search the web, download a file, or analyze an image if provided."
     )
     return {"messages": [new_msg]}
@@ -381,17 +443,15 @@ def finish_node(state: AgentState) -> dict:
     if "FINAL ANSWER:" in answer:
         answer = answer.split("FINAL ANSWER:")[-1].strip()
-    # 若答案仍为空，尝试从历史消息中提取最后一条有内容的 AI 消息
     if not answer:
         for m in reversed(state["messages"]):
             if isinstance(m, AIMessage) and m.content.strip():
                 answer = m.content.strip().split("\n")[-1].strip()
                 break
-    # 依然无答案时，输出原因
     if not answer:
-        if state.get("tool_attempts", 0) >= 5:
-            answer = "Unable to determine answer: reached maximum tool calls without conclusion."
         else:
             answer = "Unable to determine answer: insufficient information."
@@ -521,7 +581,8 @@ with gr.Blocks(title="GAIA Agent") as demo:
     gr.Markdown("""
     # 🤖 GAIA Level 1 Agent (LangGraph + Qwen)
     **模型:** Qwen3.5-35B-A3B | **API:** agicto.com
-    点击按钮获取题目，Agent 自动调用工具并回答，最后提交评分。
     """)
     gr.LoginButton()
     run_btn = gr.Button("🚀 运行评测并提交", variant="primary")

 # 进度监控器
 # =============================================================================
 class ProgressMonitor:
+    # ... 保持不变 ...
     def __init__(self):
         self.current = 0
         self.total = 0
 # Qwen LLM 封装
 # =============================================================================
 class QwenLLM:
+    # ... 保持不变 ...
     def __init__(self, model=QWEN_MODEL):
         self.model = model
         self.api_key = AGICTO_API_KEY
         return formatted
 # =============================================================================
+# 工具定义
 # =============================================================================
 api_url_tasks = DEFAULT_API_URL
         base = base[:-3]
     return base
+# --- 原有工具 ---
 @tool(description="搜索互联网信息，返回相关摘要。")
 def web_search(query: str) -> str:
     try:
             os.unlink(temp_path)
             return result
         else:
+            # 对于文本文件（包括 .py, .txt 等），直接返回文本内容
             return resp.text[:4000]
     except Exception as e:
         return f"文件下载失败: {e}"
+# --- 新增：维基百科搜索工具 ---
+@tool(description="在维基百科中搜索关键词，返回页面摘要或详细信息。")
+def search_wikipedia(query: str) -> str:
+    """
+    使用维基百科 API 搜索关键词。
+    首先尝试 opensearch 获取页面标题，然后用 extract 获取摘要。
+    """
+    try:
+        # 第一步：搜索相关页面标题
+        search_url = "https://en.wikipedia.org/w/api.php"
+        params = {
+            "action": "opensearch",
+            "search": query,
+            "limit": 1,
+            "format": "json"
+        }
+        resp = requests.get(search_url, params=params, timeout=10)
+        data = resp.json()
+        titles = data[1]  # 标题列表
+        if not titles:
+            return "维基百科未找到相关页面。"
+        title = titles[0]
+        # 第二步：获取页面摘要
+        extract_params = {
+            "action": "query",
+            "prop": "extracts",
+            "exintro": True,
+            "explaintext": True,
+            "titles": title,
+            "format": "json"
+        }
+        resp2 = requests.get(search_url, params=extract_params, timeout=10)
+        data2 = resp2.json()
+        pages = data2.get("query", {}).get("pages", {})
+        for page_id, page_info in pages.items():
+            extract = page_info.get("extract", "")
+            if extract:
+                # 返回前2000字符，避免过长
+                return f"Wikipedia - {title}:\n{extract[:2000]}"
+        return f"维基百科页面 '{title}' 未提供摘要。"
+    except Exception as e:
+        return f"维基百科搜索失败: {e}"
 # =============================================================================
 # LangGraph 状态与节点
 # =============================================================================
     messages: Annotated[Sequence[BaseMessage], operator.add]
     final_answer: str
     task_id: str
+    tool_attempts: int
+# 所有工具（包含新增的 search_wikipedia）
+tools = [
+    search_wikipedia,       # 优先搜索维基百科
+    web_search,             # 备用网络搜索
+    web_scraper,
+    calculator,
+    analyze_image,
+    transcribe_audio,
+    get_youtube_transcript,
+    download_file_for_task
+]
 tool_node = ToolNode(tools)
 llm = QwenLLM()
 functions = [convert_to_openai_function(t) for t in tools]
 def agent_node(state: AgentState) -> dict:
     messages = state["messages"]
     task_id = state.get("task_id", "")
+    # 更新系统提示，强调维基百科、文件处理和 YouTube 工具的使用
+    sys_prompt = f"""You are a helpful assistant answering GAIA Level 1 questions.
+IMPORTANT GUIDELINES:
+- For fact-based questions, first try to find the answer using the `search_wikipedia` tool. Only if Wikipedia fails, use `web_search` or other tools.
+- If the question provides a file (image, audio, or code), use `download_file_for_task` with the given task_id to retrieve it. The tool will automatically analyze images, transcribe audio, or return text for Python/text files.
+- For YouTube links, use `get_youtube_transcript` to obtain the captions.
+- When you have the final answer, output ONLY the answer string (a word, number, short phrase, or letter). Do NOT include any extra text, explanations, or "FINAL ANSWER:".
 Current task ID: {task_id}. If the question requires a file, use download_file_for_task with task_id="{task_id}"."""
     full = [SystemMessage(content=sys_prompt)] + list(messages)
     response = llm_with_tools.invoke(full)
     messages = state["messages"]
     last = messages[-1]
     tool_attempts = state.get("tool_attempts", 0)
+    MAX_TOOL_CALLS = 3   # 限制最多3次工具调用，避免循环
     if tool_attempts >= MAX_TOOL_CALLS:
         return "finish"
     if hasattr(last, "additional_kwargs") and "function_call" in last.additional_kwargs:
         return "tools"
     tool_msg_count = sum(1 for m in messages if isinstance(m, ToolMessage))
     if tool_msg_count == 0:
         return "force_tool"
+    # 如果 LLM 已经给出了一个简洁答案，结束
+    content = last.content
+    if "?" not in content and len(content.strip()) < 100:
+        return "finish"
     return "finish"
 def force_tool_node(state: AgentState) -> dict:
     new_msg = HumanMessage(
+        content="You haven't used any tool yet. Please use an appropriate tool (e.g., search_wikipedia, download_file_for_task) to find the answer."
     )
     return {"messages": [new_msg]}
     if "FINAL ANSWER:" in answer:
         answer = answer.split("FINAL ANSWER:")[-1].strip()
     if not answer:
         for m in reversed(state["messages"]):
             if isinstance(m, AIMessage) and m.content.strip():
                 answer = m.content.strip().split("\n")[-1].strip()
                 break
     if not answer:
+        if state.get("tool_attempts", 0) >= 3:
+            answer = "Unable to determine answer: max tool calls reached."
         else:
             answer = "Unable to determine answer: insufficient information."
     gr.Markdown("""
     # 🤖 GAIA Level 1 Agent (LangGraph + Qwen)
     **模型:** Qwen3.5-35B-A3B | **API:** agicto.com
+    点击按钮获取题目，Agent 自动调用工具并回答，最后提交评分。
+    **新增维基百科搜索、文件处理（图片/音频/代码）、YouTube 字幕提取。**
     """)
     gr.LoginButton()
     run_btn = gr.Button("🚀 运行评测并提交", variant="primary")