BiGuan commited on
Commit
055e7ec
·
verified ·
1 Parent(s): 81917a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +486 -158
app.py CHANGED
@@ -1,196 +1,524 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
- import pandas as pd
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
- # --- Constants ---
 
 
 
 
 
 
 
 
 
 
 
 
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
 
14
  def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
- """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
- and displays the results.
26
- """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
-
30
- if profile:
31
- username= f"{profile.username}"
32
- print(f"User logged in: {username}")
33
- else:
34
- print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
36
 
37
- api_url = DEFAULT_API_URL
38
- questions_url = f"{api_url}/questions"
39
- submit_url = f"{api_url}/submit"
 
 
 
 
 
 
 
 
 
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  try:
43
- agent = BasicAgent()
 
 
 
 
 
 
 
 
 
 
44
  except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
 
51
- # 2. Fetch Questions
52
- print(f"Fetching questions from: {questions_url}")
53
  try:
54
- response = requests.get(questions_url, timeout=15)
55
- response.raise_for_status()
56
- questions_data = response.json()
57
- if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
- print(f"Fetched {len(questions_data)} questions.")
61
- except requests.exceptions.RequestException as e:
62
- print(f"Error fetching questions: {e}")
63
- return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
- return f"An unexpected error occurred fetching questions: {e}", None
71
 
72
- # 3. Run your Agent
 
 
 
 
 
 
 
 
 
 
 
 
73
  results_log = []
74
  answers_payload = []
75
- print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
 
 
 
77
  task_id = item.get("task_id")
78
- question_text = item.get("question")
79
- if not task_id or question_text is None:
80
- print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
89
 
90
- if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
98
 
99
- # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
  final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
  )
112
- print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
- except requests.exceptions.HTTPError as e:
116
- error_detail = f"Server responded with status {e.response.status_code}."
117
- try:
118
- error_json = e.response.json()
119
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
- error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
  except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
141
-
142
-
143
- # --- Build Gradio Interface using Blocks ---
144
- with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
- gr.Markdown(
147
- """
148
- **Instructions:**
149
-
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
- """
159
- )
160
 
161
  gr.LoginButton()
162
 
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
  fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
  )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
- space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
-
180
- if space_host_startup:
181
- print(f"✅ SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
- else:
184
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
-
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
- print(f"✅ SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
- else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
-
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
-
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
3
  import requests
4
+ import json
5
+ import re
6
+ import tempfile
7
+ import base64
8
+ import io
9
+ import time
10
+ import threading
11
+ from typing import TypedDict, Annotated, Sequence, List, Dict, Any, Generator
12
+ from datetime import datetime
13
+ import operator
14
 
15
+ # LangChain / LangGraph
16
+ from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
17
+ from langchain_core.tools import tool
18
+ from langgraph.graph import StateGraph, END
19
+ from langgraph.prebuilt import ToolExecutor
20
+ from langchain_core.utils.function_calling import convert_to_openai_function
21
+
22
+ # 其他工具依赖
23
+ from bs4 import BeautifulSoup
24
+ from youtube_transcript_api import YouTubeTranscriptApi
25
+
26
+ # =============================================================================
27
+ # 配置常量
28
+ # =============================================================================
29
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
30
+ AGICTO_BASE_URL = os.getenv("AGICTO_BASE_URL", "https://agicto.com/model")
31
+ AGICTO_API_KEY = os.getenv("AGICTO_API_KEY", "")
32
+ QWEN_MODEL = "qwen3.5-35b-a3b"
33
 
34
+ # =============================================================================
35
+ # 进度监控器(仅用于 UI,不参与评分)
36
+ # =============================================================================
37
+ class ProgressMonitor:
38
  def __init__(self):
39
+ self.current = 0
40
+ self.total = 0
41
+ self.last_question = ""
42
+ self.last_answer = ""
43
+ self.logs = []
44
+ self._lock = threading.Lock()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ def start(self, total: int):
47
+ with self._lock:
48
+ self.total = total
49
+ self.current = 0
50
+ self.logs = []
51
+
52
+ def step(self, question: str, answer: str):
53
+ with self._lock:
54
+ self.current += 1
55
+ self.last_question = question
56
+ self.last_answer = answer
57
+ self.logs.append(f"✅ 第 {self.current}/{self.total} 题完成:{answer[:50]}...")
58
 
59
+ def get_html(self) -> str:
60
+ with self._lock:
61
+ pct = int(self.current / self.total * 100) if self.total > 0 else 0
62
+ html = f"""
63
+ <div style="border:1px solid #ddd; padding:10px; border-radius:8px; background:#fafafa;">
64
+ <h3>📊 实时进度</h3>
65
+ <div style="background:#eee; height:20px; border-radius:10px; margin-bottom:10px;">
66
+ <div style="width:{pct}%; background:#4CAF50; height:100%; border-radius:10px; text-align:center; color:white; font-size:12px; line-height:20px;">
67
+ {pct}% ({self.current}/{self.total})
68
+ </div>
69
+ </div>
70
+ <p><b>最新题目:</b> {self.last_question[:100]}{"..." if len(self.last_question)>100 else ""}</p>
71
+ <p><b>答案:</b> <span style="color:#2e7d32;">{self.last_answer}</span></p>
72
+ <details>
73
+ <summary>详细日志</summary>
74
+ <pre style="background:#f5f5f5; padding:10px; border-radius:4px; max-height:200px; overflow:auto;">{chr(10).join(self.logs)}</pre>
75
+ </details>
76
+ </div>
77
+ """
78
+ return html
79
+
80
+ # =============================================================================
81
+ # Qwen LLM 封装(通过 agicto.com API)
82
+ # =============================================================================
83
+ class QwenLLM:
84
+ def __init__(self, model=QWEN_MODEL):
85
+ self.model = model
86
+ self.api_key = AGICTO_API_KEY
87
+ self.base_url = AGICTO_BASE_URL
88
+ if not self.api_key:
89
+ print("⚠️ 未设置 AGICTO_API_KEY,请检查环境变量")
90
+
91
+ def _call_api(self, messages: list, functions: list = None, max_tokens=2000):
92
+ headers = {
93
+ "Content-Type": "application/json",
94
+ "Authorization": f"Bearer {self.api_key}"
95
+ }
96
+ body = {
97
+ "model": self.model,
98
+ "messages": messages,
99
+ "temperature": 0.0,
100
+ "max_tokens": max_tokens
101
+ }
102
+ if functions:
103
+ body["tools"] = [{"type": "function", "function": f} for f in functions]
104
+ body["tool_choice"] = "auto"
105
+ try:
106
+ resp = requests.post(f"{self.base_url}/v1/chat/completions", headers=headers, json=body, timeout=60)
107
+ resp.raise_for_status()
108
+ return resp.json()
109
+ except Exception as e:
110
+ print(f"API 调用失败: {e}")
111
+ return None
112
+
113
+ def invoke(self, messages: list) -> AIMessage:
114
+ formatted = self._format_messages(messages)
115
+ result = self._call_api(formatted)
116
+ if not result:
117
+ return AIMessage(content="模型调用失败")
118
+ choice = result["choices"][0]
119
+ msg = choice["message"]
120
+ if "tool_calls" in msg and msg["tool_calls"]:
121
+ tool_call = msg["tool_calls"][0]
122
+ return AIMessage(
123
+ content=msg.get("content", ""),
124
+ additional_kwargs={
125
+ "function_call": {
126
+ "name": tool_call["function"]["name"],
127
+ "arguments": tool_call["function"]["arguments"]
128
+ }
129
+ }
130
+ )
131
+ return AIMessage(content=msg["content"])
132
+
133
+ def bind_functions(self, functions: list):
134
+ # 返回一个临时对象,模拟 LangChain 的 bind 行为
135
+ class BoundLLM:
136
+ def __init__(self, llm, funcs):
137
+ self.llm = llm
138
+ self.functions = funcs
139
+ def invoke(self, messages: list) -> AIMessage:
140
+ formatted = self.llm._format_messages(messages)
141
+ result = self.llm._call_api(formatted, functions=self.functions)
142
+ if not result:
143
+ return AIMessage(content="模型调用失败")
144
+ choice = result["choices"][0]
145
+ msg = choice["message"]
146
+ if "tool_calls" in msg and msg["tool_calls"]:
147
+ tool_call = msg["tool_calls"][0]
148
+ return AIMessage(
149
+ content=msg.get("content", ""),
150
+ additional_kwargs={
151
+ "function_call": {
152
+ "name": tool_call["function"]["name"],
153
+ "arguments": tool_call["function"]["arguments"]
154
+ }
155
+ }
156
+ )
157
+ return AIMessage(content=msg["content"])
158
+ return BoundLLM(self, functions)
159
+
160
+ def _format_messages(self, messages: list) -> list:
161
+ formatted = []
162
+ for m in messages:
163
+ if isinstance(m, SystemMessage):
164
+ formatted.append({"role": "system", "content": m.content})
165
+ elif isinstance(m, HumanMessage):
166
+ formatted.append({"role": "user", "content": m.content})
167
+ elif isinstance(m, AIMessage):
168
+ entry = {"role": "assistant", "content": m.content}
169
+ if hasattr(m, "additional_kwargs") and "function_call" in m.additional_kwargs:
170
+ entry["tool_calls"] = [{
171
+ "id": "call_1",
172
+ "type": "function",
173
+ "function": m.additional_kwargs["function_call"]
174
+ }]
175
+ formatted.append(entry)
176
+ elif isinstance(m, ToolMessage):
177
+ formatted.append({
178
+ "role": "tool",
179
+ "tool_call_id": m.tool_call_id if hasattr(m, "tool_call_id") else "call_1",
180
+ "content": m.content
181
+ })
182
+ return formatted
183
+
184
+ # =============================================================================
185
+ # 工具定义
186
+ # =============================================================================
187
+ api_url_tasks = DEFAULT_API_URL # 用于文件下载
188
+
189
+ @tool
190
+ def web_search(query: str) -> str:
191
+ """搜索互联网信息"""
192
  try:
193
+ url = "https://api.duckduckgo.com/"
194
+ params = {"q": query, "format": "json", "no_html": 1}
195
+ resp = requests.get(url, params=params, timeout=10)
196
+ data = resp.json()
197
+ parts = []
198
+ if data.get("AbstractText"):
199
+ parts.append(f"摘要: {data['AbstractText']}")
200
+ for topic in data.get("RelatedTopics", [])[:3]:
201
+ if isinstance(topic, dict) and "Text" in topic:
202
+ parts.append(topic["Text"])
203
+ return "\n".join(parts) if parts else "未找到相关信息"
204
  except Exception as e:
205
+ return f"搜索失败: {e}"
206
+
207
+ @tool
208
+ def web_scraper(url: str) -> str:
209
+ """抓取网页文本内容"""
210
+ try:
211
+ headers = {"User-Agent": "Mozilla/5.0"}
212
+ resp = requests.get(url, headers=headers, timeout=15)
213
+ soup = BeautifulSoup(resp.text, "html.parser")
214
+ for el in soup(["script", "style", "nav", "footer"]):
215
+ el.decompose()
216
+ text = soup.get_text()
217
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
218
+ return " ".join(lines)[:5000]
219
+ except Exception as e:
220
+ return f"抓取失败: {e}"
221
+
222
+ @tool
223
+ def calculator(expression: str) -> str:
224
+ """计算数学表达式"""
225
+ try:
226
+ import math
227
+ allowed = {k: v for k, v in math.__dict__.items() if not k.startswith("__")}
228
+ result = eval(expression, {"__builtins__": {}}, allowed)
229
+ return str(result)
230
+ except Exception as e:
231
+ return f"计算失败: {e}"
232
+
233
+ @tool
234
+ def analyze_image(image_data: str) -> str:
235
+ """分析图片内容(URL 或 base64)"""
236
+ try:
237
+ headers = {"Authorization": f"Bearer {AGICTO_API_KEY}", "Content-Type": "application/json"}
238
+ if not image_data.startswith("http"):
239
+ image_data = f"data:image/jpeg;base64,{image_data}"
240
+ body = {
241
+ "model": QWEN_MODEL,
242
+ "messages": [{"role": "user", "content": [
243
+ {"type": "text", "text": "请详细描述这张图片的内容,包括文字、数字等信息。"},
244
+ {"type": "image_url", "image_url": {"url": image_data}}
245
+ ]}],
246
+ "max_tokens": 800
247
+ }
248
+ resp = requests.post(f"{AGICTO_BASE_URL}/v1/chat/completions", headers=headers, json=body, timeout=30)
249
+ if resp.status_code == 200:
250
+ return resp.json()["choices"][0]["message"]["content"]
251
+ return f"图片分析失败: {resp.status_code}"
252
+ except Exception as e:
253
+ return f"图片分析失败: {e}"
254
+
255
+ @tool
256
+ def transcribe_audio(audio_path: str) -> str:
257
+ """转录音频文件(路径或 URL)"""
258
+ try:
259
+ headers = {"Authorization": f"Bearer {AGICTO_API_KEY}"}
260
+ if audio_path.startswith("http"):
261
+ resp = requests.get(audio_path, timeout=30)
262
+ audio_data = io.BytesIO(resp.content)
263
+ audio_data.name = "audio.mp3"
264
+ else:
265
+ audio_data = open(audio_path, "rb")
266
+ files = {"file": audio_data, "model": (None, "whisper-1")}
267
+ resp = requests.post(f"{AGICTO_BASE_URL}/v1/audio/transcriptions", headers=headers, files=files, timeout=60)
268
+ if resp.status_code == 200:
269
+ return resp.json()["text"]
270
+ return f"转录失败: {resp.status_code}"
271
+ except Exception as e:
272
+ return f"转录失败: {e}"
273
+
274
+ @tool
275
+ def get_youtube_transcript(video_url: str) -> str:
276
+ """获取 YouTube 视频字幕"""
277
+ try:
278
+ if "watch?v=" in video_url:
279
+ vid = video_url.split("v=")[1].split("&")[0]
280
+ elif "youtu.be/" in video_url:
281
+ vid = video_url.split("youtu.be/")[1].split("?")[0]
282
+ else:
283
+ return "无法提取视频 ID"
284
+ transcript = YouTubeTranscriptApi.get_transcript(vid, languages=['en', 'zh'])
285
+ return " ".join([t['text'] for t in transcript])[:4000]
286
+ except Exception as e:
287
+ return f"获取字幕失败: {e}"
288
+
289
+ @tool
290
+ def download_file_for_task(task_id: str) -> str:
291
+ """下载 GAIA 任务关联的文件(图片、音频等)并返回内容或描述"""
292
+ try:
293
+ url = f"{api_url_tasks}/files/{task_id}"
294
+ resp = requests.get(url, timeout=20)
295
+ if resp.status_code != 200:
296
+ return f"文件不存在 (HTTP {resp.status_code})"
297
+ content_type = resp.headers.get("content-type", "")
298
+ if "image" in content_type:
299
+ b64 = base64.b64encode(resp.content).decode()
300
+ return analyze_image(b64)
301
+ elif "audio" in content_type:
302
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
303
+ f.write(resp.content)
304
+ temp_path = f.name
305
+ result = transcribe_audio(temp_path)
306
+ os.unlink(temp_path)
307
+ return result
308
+ else:
309
+ return resp.text[:4000]
310
+ except Exception as e:
311
+ return f"文件下载失败: {e}"
312
+
313
+ # =============================================================================
314
+ # LangGraph Agent 状态与图构建
315
+ # =============================================================================
316
+ class AgentState(TypedDict):
317
+ messages: Annotated[Sequence[BaseMessage], operator.add]
318
+ next_step: str
319
+ final_answer: str
320
+ task_id: str # 当前任务 ID,供工具使用
321
+
322
+ tools = [web_search, web_scraper, calculator, analyze_image, transcribe_audio, get_youtube_transcript, download_file_for_task]
323
+ tool_executor = ToolExecutor(tools)
324
+ llm = QwenLLM()
325
+ functions = [convert_to_openai_function(t) for t in tools]
326
+ llm_with_tools = llm.bind_functions(functions)
327
+
328
+ def agent_node(state: AgentState) -> AgentState:
329
+ messages = state["messages"]
330
+ task_id = state.get("task_id", "")
331
+ sys_prompt = f"""You are a helpful assistant answering GAIA Level 1 questions. Use tools if needed.
332
+ When you know the answer, output only the answer string, without any extra text or "FINAL ANSWER:".
333
+ Current task ID: {task_id}. If you need the file for this task, use download_file_for_task with task_id="{task_id}"."""
334
+ full = [SystemMessage(content=sys_prompt)] + list(messages)
335
+ response = llm_with_tools.invoke(full)
336
+ return {
337
+ "messages": [response],
338
+ "next_step": "decide",
339
+ "final_answer": state.get("final_answer", ""),
340
+ "task_id": task_id
341
+ }
342
+
343
+ def decide_node(state: AgentState) -> str:
344
+ last = state["messages"][-1]
345
+ if hasattr(last, "additional_kwargs") and "function_call" in last.additional_kwargs:
346
+ return "use_tool"
347
+ if len(state["messages"]) > 12:
348
+ return "finish"
349
+ return "finish"
350
+
351
+ def tool_node(state: AgentState) -> AgentState:
352
+ last = state["messages"][-1]
353
+ func_call = last.additional_kwargs["function_call"]
354
+ name = func_call["name"]
355
+ args = json.loads(func_call["arguments"])
356
+ # 如果是 download_file_for_task,自动注入 task_id
357
+ if name == "download_file_for_task" and "task_id" in state:
358
+ args.setdefault("task_id", state["task_id"])
359
+ result = tool_executor.invoke({"name": name, "arguments": args})
360
+ tool_msg = ToolMessage(content=str(result), tool_call_id="call_1")
361
+ return {
362
+ "messages": [tool_msg],
363
+ "next_step": "agent",
364
+ "final_answer": state.get("final_answer", ""),
365
+ "task_id": state.get("task_id", "")
366
+ }
367
+
368
+ def finish_node(state: AgentState) -> AgentState:
369
+ last = state["messages"][-1]
370
+ content = last.content
371
+ # 提取最终答案(纯文本,去除可能的前缀)
372
+ answer = content.strip().split("\n")[-1].strip()
373
+ # 如果仍然包含 "FINAL ANSWER:" 则做最后清理
374
+ if "FINAL ANSWER:" in answer:
375
+ answer = answer.split("FINAL ANSWER:")[-1].strip()
376
+ return {
377
+ "messages": state["messages"],
378
+ "next_step": "end",
379
+ "final_answer": answer,
380
+ "task_id": state.get("task_id", "")
381
+ }
382
+
383
+ def build_graph():
384
+ workflow = StateGraph(AgentState)
385
+ workflow.add_node("agent", agent_node)
386
+ workflow.add_node("tools", tool_node)
387
+ workflow.add_node("finish", finish_node)
388
+ workflow.set_entry_point("agent")
389
+ workflow.add_conditional_edges("agent", decide_node, {"use_tool": "tools", "finish": "finish"})
390
+ workflow.add_edge("tools", "agent")
391
+ workflow.add_edge("finish", END)
392
+ return workflow.compile()
393
+
394
+ # =============================================================================
395
+ # 真正的 Agent 类(替换 BasicAgent)
396
+ # =============================================================================
397
+ class LangGraphAgent:
398
+ def __init__(self):
399
+ self.graph = build_graph()
400
+ print("LangGraphAgent 初始化完成,使用模型:", QWEN_MODEL)
401
+
402
+ def __call__(self, question: str, task_id: str = "") -> str:
403
+ state = {
404
+ "messages": [HumanMessage(content=question)],
405
+ "next_step": "agent",
406
+ "final_answer": "",
407
+ "task_id": task_id
408
+ }
409
+ try:
410
+ final_state = self.graph.invoke(state)
411
+ return final_state["final_answer"]
412
+ except Exception as e:
413
+ print(f"Agent 运行失败: {e}")
414
+ return f"Error: {e}"
415
+
416
+ # =============================================================================
417
+ # 主运行函数(改为生成器以支持实时进度)
418
+ # =============================================================================
419
+ def run_and_submit_all(profile: gr.OAuthProfile | None) -> Generator:
420
+ space_id = os.getenv("SPACE_ID")
421
+ if not profile:
422
+ yield "<div>请先登录</div>", "", pd.DataFrame()
423
+ return
424
+
425
+ username = profile.username
426
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
427
+ api_url = DEFAULT_API_URL
428
 
429
+ # 初始化 Agent 和进度监控
 
430
  try:
431
+ agent = LangGraphAgent()
432
+ monitor = ProgressMonitor()
 
 
 
 
 
 
 
 
 
 
 
 
433
  except Exception as e:
434
+ yield f"<div>Agent 初始化失败: {e}</div>", f"Agent 初始化失败: {e}", pd.DataFrame()
435
+ return
436
 
437
+ # 获取问题
438
+ try:
439
+ resp = requests.get(f"{api_url}/questions", timeout=15)
440
+ resp.raise_for_status()
441
+ questions = resp.json()
442
+ if not questions:
443
+ yield "<div>没有题目</div>", "没有题目", pd.DataFrame()
444
+ return
445
+ except Exception as e:
446
+ yield f"<div>获取题目失败: {e}</div>", f"获取题目失败: {e}", pd.DataFrame()
447
+ return
448
+
449
+ monitor.start(len(questions))
450
  results_log = []
451
  answers_payload = []
452
+
453
+ # 首次 yield 进度(初始状态)
454
+ yield monitor.get_html(), "", pd.DataFrame()
455
+
456
+ for idx, item in enumerate(questions):
457
  task_id = item.get("task_id")
458
+ question = item.get("question", "")
459
+ if not task_id or not question:
 
460
  continue
461
  try:
462
+ answer = agent(question, task_id=task_id)
 
 
463
  except Exception as e:
464
+ answer = f"ERROR: {e}"
465
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
466
+ results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
467
+ monitor.step(question, answer)
468
 
469
+ # 每完成一题就 yield 进度 + 当前表格
470
+ yield monitor.get_html(), "", pd.DataFrame(results_log)
 
471
 
472
+ # 提交
473
+ if not answers_payload:
474
+ yield monitor.get_html(), "没有答案可提交", pd.DataFrame(results_log)
475
+ return
476
 
477
+ submission = {
478
+ "username": username.strip(),
479
+ "agent_code": agent_code,
480
+ "answers": answers_payload
481
+ }
482
  try:
483
+ resp = requests.post(f"{api_url}/submit", json=submission, timeout=60)
484
+ resp.raise_for_status()
485
+ result = resp.json()
486
  final_status = (
487
+ f" 提交成功!\n"
488
+ f"用户:{username}\n"
489
+ f"总分:{result.get('score', 'N/A')}% "
490
+ f"({result.get('correct_count', 0)}/{result.get('total_attempted', 0)} 正确)\n"
491
+ f"消息:{result.get('message', '')}"
492
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  except Exception as e:
494
+ final_status = f"提交失败: {e}"
495
+
496
+ # 最终 yield(进度 + 总分 + 表格)
497
+ yield monitor.get_html(), final_status, pd.DataFrame(results_log)
498
+
499
+ # =============================================================================
500
+ # Gradio 界面
501
+ # =============================================================================
502
+ with gr.Blocks(title="GAIA Agent") as demo:
503
+ gr.Markdown("""
504
+ # 🤖 GAIA Level 1 Agent (LangGraph + Qwen)
505
+ **模型:** Qwen3.5-35B-A3B | **API:** agicto.com
506
+ 点击按钮获取题目,Agent 自动调用工具并回答,最后提交评分。
507
+ """)
 
 
 
 
 
 
 
 
 
508
 
509
  gr.LoginButton()
510
 
511
+ run_btn = gr.Button("🚀 运行评测并提交", variant="primary")
512
 
513
+ progress_html = gr.HTML(label="实时进度")
514
+ status_output = gr.Textbox(label="提交结果 / 总分", lines=5, interactive=False)
515
+ results_table = gr.DataFrame(label="题目与 Agent 答案", wrap=True)
516
 
517
+ run_btn.click(
518
  fn=run_and_submit_all,
519
+ outputs=[progress_html, status_output, results_table]
520
  )
521
 
522
  if __name__ == "__main__":
523
+ print("启动 Gradio App...")
524
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)