Final_Assignment

Sleeping

App Files Files Community

BiGuan commited on 29 days ago

Commit

055e7ec

verified ·

1 Parent(s): 81917a3

Update app.py

Browse files

Files changed (1) hide show

app.py +486 -158

app.py CHANGED Viewed

@@ -1,196 +1,524 @@
 import os
 import gradio as gr
 import requests
-import inspect
-import pandas as pd
-# (Keep Constants as is)
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
-    """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
-    and displays the results.
-    """
-    # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
-    if profile:
-        username= f"{profile.username}"
-        print(f"User logged in: {username}")
-    else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
-        print(f"Error instantiating agent: {e}")
-        return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
-    # 2. Fetch Questions
-    print(f"Fetching questions from: {questions_url}")
     try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-        if not questions_data:
-             print("Fetched questions list is empty.")
-             return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching questions: {e}")
-        return f"Error fetching questions: {e}", None
-    except requests.exceptions.JSONDecodeError as e:
-         print(f"Error decoding JSON response from questions endpoint: {e}")
-         print(f"Response text: {response.text[:500]}")
-         return f"Error decoding server response for questions: {e}", None
     except Exception as e:
-        print(f"An unexpected error occurred fetching questions: {e}")
-        return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
-    print(f"Running agent on {len(questions_data)} questions...")
-    for item in questions_data:
         task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
-    if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
         final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
-with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown(
-        """
-        **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
-        """
-    )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
         fn=run_and_submit_all,
-        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
-    space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
-    if space_host_startup:
-        print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
-    else:
-        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
-    if space_id_startup: # Print repo URLs if SPACE_ID is found
-        print(f"✅ SPACE_ID found: {space_id_startup}")
-        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
-    else:
-        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
-    print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import os
 import gradio as gr
 import requests
+import json
+import re
+import tempfile
+import base64
+import io
+import time
+import threading
+from typing import TypedDict, Annotated, Sequence, List, Dict, Any, Generator
+from datetime import datetime
+import operator
+# LangChain / LangGraph
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
+from langchain_core.tools import tool
+from langgraph.graph import StateGraph, END
+from langgraph.prebuilt import ToolExecutor
+from langchain_core.utils.function_calling import convert_to_openai_function
+# 其他工具依赖
+from bs4 import BeautifulSoup
+from youtube_transcript_api import YouTubeTranscriptApi
+# =============================================================================
+# 配置常量
+# =============================================================================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+AGICTO_BASE_URL = os.getenv("AGICTO_BASE_URL", "https://agicto.com/model")
+AGICTO_API_KEY = os.getenv("AGICTO_API_KEY", "")
+QWEN_MODEL = "qwen3.5-35b-a3b"
+# =============================================================================
+# 进度监控器（仅用于 UI，不参与评分）
+# =============================================================================
+class ProgressMonitor:
     def __init__(self):
+        self.current = 0
+        self.total = 0
+        self.last_question = ""
+        self.last_answer = ""
+        self.logs = []
+        self._lock = threading.Lock()
+    def start(self, total: int):
+        with self._lock:
+            self.total = total
+            self.current = 0
+            self.logs = []
+    def step(self, question: str, answer: str):
+        with self._lock:
+            self.current += 1
+            self.last_question = question
+            self.last_answer = answer
+            self.logs.append(f"✅ 第 {self.current}/{self.total} 题完成：{answer[:50]}...")
+    def get_html(self) -> str:
+        with self._lock:
+            pct = int(self.current / self.total * 100) if self.total > 0 else 0
+            html = f"""
+            <div style="border:1px solid #ddd; padding:10px; border-radius:8px; background:#fafafa;">
+                <h3>📊 实时进度</h3>
+                <div style="background:#eee; height:20px; border-radius:10px; margin-bottom:10px;">
+                    <div style="width:{pct}%; background:#4CAF50; height:100%; border-radius:10px; text-align:center; color:white; font-size:12px; line-height:20px;">
+                        {pct}% ({self.current}/{self.total})
+                    </div>
+                </div>
+                <p><b>最新题目：</b> {self.last_question[:100]}{"..." if len(self.last_question)>100 else ""}</p>
+                <p><b>答案：</b> <span style="color:#2e7d32;">{self.last_answer}</span></p>
+                <details>
+                    <summary>详细日志</summary>
+                    <pre style="background:#f5f5f5; padding:10px; border-radius:4px; max-height:200px; overflow:auto;">{chr(10).join(self.logs)}</pre>
+                </details>
+            </div>
+            """
+            return html
+# =============================================================================
+# Qwen LLM 封装（通过 agicto.com API）
+# =============================================================================
+class QwenLLM:
+    def __init__(self, model=QWEN_MODEL):
+        self.model = model
+        self.api_key = AGICTO_API_KEY
+        self.base_url = AGICTO_BASE_URL
+        if not self.api_key:
+            print("⚠️ 未设置 AGICTO_API_KEY，请检查环境变量")
+    def _call_api(self, messages: list, functions: list = None, max_tokens=2000):
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}"
+        }
+        body = {
+            "model": self.model,
+            "messages": messages,
+            "temperature": 0.0,
+            "max_tokens": max_tokens
+        }
+        if functions:
+            body["tools"] = [{"type": "function", "function": f} for f in functions]
+            body["tool_choice"] = "auto"
+        try:
+            resp = requests.post(f"{self.base_url}/v1/chat/completions", headers=headers, json=body, timeout=60)
+            resp.raise_for_status()
+            return resp.json()
+        except Exception as e:
+            print(f"API 调用失败: {e}")
+            return None
+    def invoke(self, messages: list) -> AIMessage:
+        formatted = self._format_messages(messages)
+        result = self._call_api(formatted)
+        if not result:
+            return AIMessage(content="模型调用失败")
+        choice = result["choices"][0]
+        msg = choice["message"]
+        if "tool_calls" in msg and msg["tool_calls"]:
+            tool_call = msg["tool_calls"][0]
+            return AIMessage(
+                content=msg.get("content", ""),
+                additional_kwargs={
+                    "function_call": {
+                        "name": tool_call["function"]["name"],
+                        "arguments": tool_call["function"]["arguments"]
+                    }
+                }
+            )
+        return AIMessage(content=msg["content"])
+    def bind_functions(self, functions: list):
+        # 返回一个临时对象，模拟 LangChain 的 bind 行为
+        class BoundLLM:
+            def __init__(self, llm, funcs):
+                self.llm = llm
+                self.functions = funcs
+            def invoke(self, messages: list) -> AIMessage:
+                formatted = self.llm._format_messages(messages)
+                result = self.llm._call_api(formatted, functions=self.functions)
+                if not result:
+                    return AIMessage(content="模型调用失败")
+                choice = result["choices"][0]
+                msg = choice["message"]
+                if "tool_calls" in msg and msg["tool_calls"]:
+                    tool_call = msg["tool_calls"][0]
+                    return AIMessage(
+                        content=msg.get("content", ""),
+                        additional_kwargs={
+                            "function_call": {
+                                "name": tool_call["function"]["name"],
+                                "arguments": tool_call["function"]["arguments"]
+                            }
+                        }
+                    )
+                return AIMessage(content=msg["content"])
+        return BoundLLM(self, functions)
+    def _format_messages(self, messages: list) -> list:
+        formatted = []
+        for m in messages:
+            if isinstance(m, SystemMessage):
+                formatted.append({"role": "system", "content": m.content})
+            elif isinstance(m, HumanMessage):
+                formatted.append({"role": "user", "content": m.content})
+            elif isinstance(m, AIMessage):
+                entry = {"role": "assistant", "content": m.content}
+                if hasattr(m, "additional_kwargs") and "function_call" in m.additional_kwargs:
+                    entry["tool_calls"] = [{
+                        "id": "call_1",
+                        "type": "function",
+                        "function": m.additional_kwargs["function_call"]
+                    }]
+                formatted.append(entry)
+            elif isinstance(m, ToolMessage):
+                formatted.append({
+                    "role": "tool",
+                    "tool_call_id": m.tool_call_id if hasattr(m, "tool_call_id") else "call_1",
+                    "content": m.content
+                })
+        return formatted
+# =============================================================================
+# 工具定义
+# =============================================================================
+api_url_tasks = DEFAULT_API_URL  # 用于文件下载
+@tool
+def web_search(query: str) -> str:
+    """搜索互联网信息"""
     try:
+        url = "https://api.duckduckgo.com/"
+        params = {"q": query, "format": "json", "no_html": 1}
+        resp = requests.get(url, params=params, timeout=10)
+        data = resp.json()
+        parts = []
+        if data.get("AbstractText"):
+            parts.append(f"摘要: {data['AbstractText']}")
+        for topic in data.get("RelatedTopics", [])[:3]:
+            if isinstance(topic, dict) and "Text" in topic:
+                parts.append(topic["Text"])
+        return "\n".join(parts) if parts else "未找到相关信息"
     except Exception as e:
+        return f"搜索失败: {e}"
+@tool
+def web_scraper(url: str) -> str:
+    """抓取网页文本内容"""
+    try:
+        headers = {"User-Agent": "Mozilla/5.0"}
+        resp = requests.get(url, headers=headers, timeout=15)
+        soup = BeautifulSoup(resp.text, "html.parser")
+        for el in soup(["script", "style", "nav", "footer"]):
+            el.decompose()
+        text = soup.get_text()
+        lines = [line.strip() for line in text.splitlines() if line.strip()]
+        return " ".join(lines)[:5000]
+    except Exception as e:
+        return f"抓取失败: {e}"
+@tool
+def calculator(expression: str) -> str:
+    """计算数学表达式"""
+    try:
+        import math
+        allowed = {k: v for k, v in math.__dict__.items() if not k.startswith("__")}
+        result = eval(expression, {"__builtins__": {}}, allowed)
+        return str(result)
+    except Exception as e:
+        return f"计算失败: {e}"
+@tool
+def analyze_image(image_data: str) -> str:
+    """分析图片内容（URL 或 base64）"""
+    try:
+        headers = {"Authorization": f"Bearer {AGICTO_API_KEY}", "Content-Type": "application/json"}
+        if not image_data.startswith("http"):
+            image_data = f"data:image/jpeg;base64,{image_data}"
+        body = {
+            "model": QWEN_MODEL,
+            "messages": [{"role": "user", "content": [
+                {"type": "text", "text": "请详细描述这张图片的内容，包括文字、数字等信息。"},
+                {"type": "image_url", "image_url": {"url": image_data}}
+            ]}],
+            "max_tokens": 800
+        }
+        resp = requests.post(f"{AGICTO_BASE_URL}/v1/chat/completions", headers=headers, json=body, timeout=30)
+        if resp.status_code == 200:
+            return resp.json()["choices"][0]["message"]["content"]
+        return f"图片分析失败: {resp.status_code}"
+    except Exception as e:
+        return f"图片分析失败: {e}"
+@tool
+def transcribe_audio(audio_path: str) -> str:
+    """转录音频文件（路径或 URL）"""
+    try:
+        headers = {"Authorization": f"Bearer {AGICTO_API_KEY}"}
+        if audio_path.startswith("http"):
+            resp = requests.get(audio_path, timeout=30)
+            audio_data = io.BytesIO(resp.content)
+            audio_data.name = "audio.mp3"
+        else:
+            audio_data = open(audio_path, "rb")
+        files = {"file": audio_data, "model": (None, "whisper-1")}
+        resp = requests.post(f"{AGICTO_BASE_URL}/v1/audio/transcriptions", headers=headers, files=files, timeout=60)
+        if resp.status_code == 200:
+            return resp.json()["text"]
+        return f"转录失败: {resp.status_code}"
+    except Exception as e:
+        return f"转录失败: {e}"
+@tool
+def get_youtube_transcript(video_url: str) -> str:
+    """获取 YouTube 视频字幕"""
+    try:
+        if "watch?v=" in video_url:
+            vid = video_url.split("v=")[1].split("&")[0]
+        elif "youtu.be/" in video_url:
+            vid = video_url.split("youtu.be/")[1].split("?")[0]
+        else:
+            return "无法提取视频 ID"
+        transcript = YouTubeTranscriptApi.get_transcript(vid, languages=['en', 'zh'])
+        return " ".join([t['text'] for t in transcript])[:4000]
+    except Exception as e:
+        return f"获取字幕失败: {e}"
+@tool
+def download_file_for_task(task_id: str) -> str:
+    """下载 GAIA 任务关联的文件（图片、音频等）并返回内容或描述"""
+    try:
+        url = f"{api_url_tasks}/files/{task_id}"
+        resp = requests.get(url, timeout=20)
+        if resp.status_code != 200:
+            return f"文件不存在 (HTTP {resp.status_code})"
+        content_type = resp.headers.get("content-type", "")
+        if "image" in content_type:
+            b64 = base64.b64encode(resp.content).decode()
+            return analyze_image(b64)
+        elif "audio" in content_type:
+            with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+                f.write(resp.content)
+                temp_path = f.name
+            result = transcribe_audio(temp_path)
+            os.unlink(temp_path)
+            return result
+        else:
+            return resp.text[:4000]
+    except Exception as e:
+        return f"文件下载失败: {e}"
+# =============================================================================
+# LangGraph Agent 状态与图构建
+# =============================================================================
+class AgentState(TypedDict):
+    messages: Annotated[Sequence[BaseMessage], operator.add]
+    next_step: str
+    final_answer: str
+    task_id: str  # 当前任务 ID，供工具使用
+tools = [web_search, web_scraper, calculator, analyze_image, transcribe_audio, get_youtube_transcript, download_file_for_task]
+tool_executor = ToolExecutor(tools)
+llm = QwenLLM()
+functions = [convert_to_openai_function(t) for t in tools]
+llm_with_tools = llm.bind_functions(functions)
+def agent_node(state: AgentState) -> AgentState:
+    messages = state["messages"]
+    task_id = state.get("task_id", "")
+    sys_prompt = f"""You are a helpful assistant answering GAIA Level 1 questions. Use tools if needed.
+When you know the answer, output only the answer string, without any extra text or "FINAL ANSWER:".
+Current task ID: {task_id}. If you need the file for this task, use download_file_for_task with task_id="{task_id}"."""
+    full = [SystemMessage(content=sys_prompt)] + list(messages)
+    response = llm_with_tools.invoke(full)
+    return {
+        "messages": [response],
+        "next_step": "decide",
+        "final_answer": state.get("final_answer", ""),
+        "task_id": task_id
+    }
+def decide_node(state: AgentState) -> str:
+    last = state["messages"][-1]
+    if hasattr(last, "additional_kwargs") and "function_call" in last.additional_kwargs:
+        return "use_tool"
+    if len(state["messages"]) > 12:
+        return "finish"
+    return "finish"
+def tool_node(state: AgentState) -> AgentState:
+    last = state["messages"][-1]
+    func_call = last.additional_kwargs["function_call"]
+    name = func_call["name"]
+    args = json.loads(func_call["arguments"])
+    # 如果是 download_file_for_task，自动注入 task_id
+    if name == "download_file_for_task" and "task_id" in state:
+        args.setdefault("task_id", state["task_id"])
+    result = tool_executor.invoke({"name": name, "arguments": args})
+    tool_msg = ToolMessage(content=str(result), tool_call_id="call_1")
+    return {
+        "messages": [tool_msg],
+        "next_step": "agent",
+        "final_answer": state.get("final_answer", ""),
+        "task_id": state.get("task_id", "")
+    }
+def finish_node(state: AgentState) -> AgentState:
+    last = state["messages"][-1]
+    content = last.content
+    # 提取最终答案（纯文本，去除可能的前缀）
+    answer = content.strip().split("\n")[-1].strip()
+    # 如果仍然包含 "FINAL ANSWER:" 则做最后清理
+    if "FINAL ANSWER:" in answer:
+        answer = answer.split("FINAL ANSWER:")[-1].strip()
+    return {
+        "messages": state["messages"],
+        "next_step": "end",
+        "final_answer": answer,
+        "task_id": state.get("task_id", "")
+    }
+def build_graph():
+    workflow = StateGraph(AgentState)
+    workflow.add_node("agent", agent_node)
+    workflow.add_node("tools", tool_node)
+    workflow.add_node("finish", finish_node)
+    workflow.set_entry_point("agent")
+    workflow.add_conditional_edges("agent", decide_node, {"use_tool": "tools", "finish": "finish"})
+    workflow.add_edge("tools", "agent")
+    workflow.add_edge("finish", END)
+    return workflow.compile()
+# =============================================================================
+# 真正的 Agent 类（替换 BasicAgent）
+# =============================================================================
+class LangGraphAgent:
+    def __init__(self):
+        self.graph = build_graph()
+        print("LangGraphAgent 初始化完成，使用模型:", QWEN_MODEL)
+    def __call__(self, question: str, task_id: str = "") -> str:
+        state = {
+            "messages": [HumanMessage(content=question)],
+            "next_step": "agent",
+            "final_answer": "",
+            "task_id": task_id
+        }
+        try:
+            final_state = self.graph.invoke(state)
+            return final_state["final_answer"]
+        except Exception as e:
+            print(f"Agent 运行失败: {e}")
+            return f"Error: {e}"
+# =============================================================================
+# 主运行函数（改为生成器以支持实时进度）
+# =============================================================================
+def run_and_submit_all(profile: gr.OAuthProfile | None) -> Generator:
+    space_id = os.getenv("SPACE_ID")
+    if not profile:
+        yield "<div>请先登录</div>", "", pd.DataFrame()
+        return
+    username = profile.username
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    api_url = DEFAULT_API_URL
+    # 初始化 Agent 和进度监控
     try:
+        agent = LangGraphAgent()
+        monitor = ProgressMonitor()
     except Exception as e:
+        yield f"<div>Agent 初始化失败: {e}</div>", f"Agent 初始化失败: {e}", pd.DataFrame()
+        return
+    # 获取问题
+    try:
+        resp = requests.get(f"{api_url}/questions", timeout=15)
+        resp.raise_for_status()
+        questions = resp.json()
+        if not questions:
+            yield "<div>没有题目</div>", "没有题目", pd.DataFrame()
+            return
+    except Exception as e:
+        yield f"<div>获取题目失败: {e}</div>", f"获取题目失败: {e}", pd.DataFrame()
+        return
+    monitor.start(len(questions))
     results_log = []
     answers_payload = []
+    # 首次 yield 进度（初始状态）
+    yield monitor.get_html(), "", pd.DataFrame()
+    for idx, item in enumerate(questions):
         task_id = item.get("task_id")
+        question = item.get("question", "")
+        if not task_id or not question:
             continue
         try:
+            answer = agent(question, task_id=task_id)
         except Exception as e:
+            answer = f"ERROR: {e}"
+        answers_payload.append({"task_id": task_id, "submitted_answer": answer})
+        results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
+        monitor.step(question, answer)
+        # 每完成一题就 yield 进度 + 当前表格
+        yield monitor.get_html(), "", pd.DataFrame(results_log)
+    # 提交
+    if not answers_payload:
+        yield monitor.get_html(), "没有答案可提交", pd.DataFrame(results_log)
+        return
+    submission = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload
+    }
     try:
+        resp = requests.post(f"{api_url}/submit", json=submission, timeout=60)
+        resp.raise_for_status()
+        result = resp.json()
         final_status = (
+            f"✅ 提交成功！\n"
+            f"用户：{username}\n"
+            f"总分：{result.get('score', 'N/A')}% "
+            f"({result.get('correct_count', 0)}/{result.get('total_attempted', 0)} 正确)\n"
+            f"消息：{result.get('message', '')}"
         )
     except Exception as e:
+        final_status = f"提交失败: {e}"
+    # 最终 yield（进度 + 总分 + 表格）
+    yield monitor.get_html(), final_status, pd.DataFrame(results_log)
+# =============================================================================
+# Gradio 界面
+# =============================================================================
+with gr.Blocks(title="GAIA Agent") as demo:
+    gr.Markdown("""
+    # 🤖 GAIA Level 1 Agent (LangGraph + Qwen)
+    **模型:** Qwen3.5-35B-A3B | **API:** agicto.com
+    点击按钮获取题目，Agent 自动调用工具并回答，最后提交评分。
+    """)
     gr.LoginButton()
+    run_btn = gr.Button("🚀 运行评测并提交", variant="primary")
+    progress_html = gr.HTML(label="实时进度")
+    status_output = gr.Textbox(label="提交结果 / 总分", lines=5, interactive=False)
+    results_table = gr.DataFrame(label="题目与 Agent 答案", wrap=True)
+    run_btn.click(
         fn=run_and_submit_all,
+        outputs=[progress_html, status_output, results_table]
     )
 if __name__ == "__main__":
+    print("启动 Gradio App...")
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860)