johnnychiang commited on
Commit
aa85f48
·
verified ·
1 Parent(s): 63bad53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -121
app.py CHANGED
@@ -1,73 +1,72 @@
1
  import os
 
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- import re
7
  from huggingface_hub import InferenceClient
8
 
9
-
10
- # (Keep Constants as is)
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
- # --- Basic Agent Definition ---
15
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
16
  # --- Basic Agent Definition ---
17
  class BasicAgent:
18
  """
19
  Minimal LLM-based agent for GAIA level-1 style questions.
20
- Goal: >=30% (at least 6/20 exact match).
21
  """
 
22
  def __init__(self):
23
  print("BasicAgent initialized (LLM mode).")
24
 
25
- # 必須先在 Space 設定 Secret:HF_TOKEN(你的 Hugging Face access token)
26
  self.hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
27
  if not self.hf_token:
28
- raise RuntimeError("Missing HF_TOKEN. Please set it in Space Settings → Secrets.")
29
 
30
- # 先用 7B 最穩最容易跑完;不夠分再升 14B/32B
31
  self.model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
32
 
33
- # 重要:用 router,不要用 api-inference(你之前 410 就是這個)
 
34
  self.client = InferenceClient(
35
  model=self.model_id,
36
  token=self.hf_token,
37
- base_url="https://router.huggingface.co",
38
  timeout=120,
39
  )
40
 
 
 
 
 
 
 
 
 
41
  def _sanitize(self, text: str) -> str:
42
  if not text:
43
  return ""
44
- t = text.strip()
45
 
46
- # 移除 FINAL ANSWER 這種字眼(課程有說不要加)
47
  t = re.sub(r"(?i)\bFINAL ANSWER\b\s*[:\-]*\s*", "", t).strip()
 
48
 
49
- # 如果模型分行,取最後一行(通常答案會在最後)
50
  lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
51
  if lines:
52
  t = lines[-1]
53
 
54
- # 去掉引號
55
  t = t.strip().strip('"').strip("'").strip()
56
  return t
57
 
58
  def __call__(self, question: str) -> str:
59
- print(f"Agent received question (first 50 chars): {question[:50]}...")
60
 
61
- system = (
62
- "You are a precise question-answering assistant.\n"
63
- "Return ONLY the final answer, nothing else.\n"
64
- "No explanations. No extra words. No punctuation unless required.\n"
65
- "If the answer is a number/date/name, output it exactly.\n"
66
- )
67
 
68
- prompt = f"{system}\nQuestion: {question}\nAnswer:"
69
-
70
- # 用 chat completion 風格(InferenceClient 會依模型支援)
71
  try:
72
  out = self.client.text_generation(
73
  prompt,
@@ -77,31 +76,30 @@ class BasicAgent:
77
  return_full_text=False,
78
  )
79
  except Exception as e:
80
- # 如果 text_generation 因模型接口差異出錯,退回 chat_completion
81
  print("text_generation failed, fallback to chat_completion:", e)
82
  out = self.client.chat_completion(
83
  messages=[
84
- {"role": "system", "content": system},
85
  {"role": "user", "content": question},
86
  ],
87
  max_tokens=128,
88
  temperature=0.0,
89
  ).choices[0].message.content
90
 
91
- ans = self._sanitize(str(out))
92
  print(f"Agent answer: {ans}")
93
  return ans
94
 
95
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
96
  """
97
- Fetches all questions, runs the BasicAgent on them, submits all answers,
98
  and displays the results.
99
  """
100
- # --- Determine HF Space Runtime URL and Repo URL ---
101
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
102
 
103
  if profile:
104
- username= f"{profile.username}"
105
  print(f"User logged in: {username}")
106
  else:
107
  print("User not logged in.")
@@ -111,70 +109,71 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
111
  questions_url = f"{api_url}/questions"
112
  submit_url = f"{api_url}/submit"
113
 
114
- # 1. Instantiate Agent ( modify this part to create your agent)
115
  try:
116
  agent = BasicAgent()
117
  except Exception as e:
118
  print(f"Error instantiating agent: {e}")
119
  return f"Error initializing agent: {e}", None
120
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
121
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
122
- print(agent_code)
123
 
124
- # 2. Fetch Questions
 
 
 
 
125
  print(f"Fetching questions from: {questions_url}")
126
  try:
127
- response = requests.get(questions_url, timeout=15)
128
  response.raise_for_status()
129
  questions_data = response.json()
130
  if not questions_data:
131
- print("Fetched questions list is empty.")
132
- return "Fetched questions list is empty or invalid format.", None
133
  print(f"Fetched {len(questions_data)} questions.")
134
- except requests.exceptions.RequestException as e:
135
  print(f"Error fetching questions: {e}")
136
  return f"Error fetching questions: {e}", None
137
- except requests.exceptions.JSONDecodeError as e:
138
- print(f"Error decoding JSON response from questions endpoint: {e}")
139
- print(f"Response text: {response.text[:500]}")
140
- return f"Error decoding server response for questions: {e}", None
141
- except Exception as e:
142
- print(f"An unexpected error occurred fetching questions: {e}")
143
- return f"An unexpected error occurred fetching questions: {e}", None
144
 
145
- # 3. Run your Agent
146
  results_log = []
147
  answers_payload = []
148
  print(f"Running agent on {len(questions_data)} questions...")
 
149
  for item in questions_data:
150
  task_id = item.get("task_id")
151
  question_text = item.get("question")
 
152
  if not task_id or question_text is None:
153
- print(f"Skipping item with missing task_id or question: {item}")
154
  continue
 
155
  try:
156
  submitted_answer = agent(question_text)
157
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
158
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
159
  except Exception as e:
160
- print(f"Error running agent on task {task_id}: {e}")
161
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
162
 
163
  if not answers_payload:
164
- print("Agent did not produce any answers to submit.")
165
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
166
 
167
- # 4. Prepare Submission
168
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
169
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
170
- print(status_update)
 
 
171
 
172
- # 5. Submit
173
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
174
  try:
175
- response = requests.post(submit_url, json=submission_data, timeout=60)
176
  response.raise_for_status()
177
  result_data = response.json()
 
178
  final_status = (
179
  f"Submission Successful!\n"
180
  f"User: {result_data.get('username')}\n"
@@ -182,88 +181,53 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
182
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
183
  f"Message: {result_data.get('message', 'No message received.')}"
184
  )
185
- print("Submission successful.")
186
  results_df = pd.DataFrame(results_log)
187
  return final_status, results_df
 
188
  except requests.exceptions.HTTPError as e:
189
- error_detail = f"Server responded with status {e.response.status_code}."
190
  try:
191
- error_json = e.response.json()
192
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
193
- except requests.exceptions.JSONDecodeError:
194
- error_detail += f" Response: {e.response.text[:500]}"
195
- status_message = f"Submission Failed: {error_detail}"
196
- print(status_message)
197
  results_df = pd.DataFrame(results_log)
198
- return status_message, results_df
 
199
  except requests.exceptions.Timeout:
200
- status_message = "Submission Failed: The request timed out."
201
- print(status_message)
202
- results_df = pd.DataFrame(results_log)
203
- return status_message, results_df
204
- except requests.exceptions.RequestException as e:
205
- status_message = f"Submission Failed: Network error - {e}"
206
- print(status_message)
207
  results_df = pd.DataFrame(results_log)
208
- return status_message, results_df
 
209
  except Exception as e:
210
- status_message = f"An unexpected error occurred during submission: {e}"
211
- print(status_message)
212
  results_df = pd.DataFrame(results_log)
213
- return status_message, results_df
214
 
215
 
216
- # --- Build Gradio Interface using Blocks ---
217
  with gr.Blocks() as demo:
218
  gr.Markdown("# Basic Agent Evaluation Runner")
219
- gr.Markdown(
220
- """
221
- **Instructions:**
222
 
223
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
224
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
225
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
226
-
227
- ---
228
- **Disclaimers:**
229
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
230
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
231
  """
 
 
 
 
 
 
 
 
232
  )
233
 
234
  gr.LoginButton()
235
-
236
  run_button = gr.Button("Run Evaluation & Submit All Answers")
237
-
238
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
239
- # Removed max_rows=10 from DataFrame constructor
240
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
241
 
242
  run_button.click(
243
  fn=run_and_submit_all,
244
- outputs=[status_output, results_table]
245
  )
246
 
247
  if __name__ == "__main__":
248
- print("\n" + "-"*30 + " App Starting " + "-"*30)
249
- # Check for SPACE_HOST and SPACE_ID at startup for information
250
- space_host_startup = os.getenv("SPACE_HOST")
251
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
252
-
253
- if space_host_startup:
254
- print(f"✅ SPACE_HOST found: {space_host_startup}")
255
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
256
- else:
257
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
258
-
259
- if space_id_startup: # Print repo URLs if SPACE_ID is found
260
- print(f"✅ SPACE_ID found: {space_id_startup}")
261
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
262
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
263
- else:
264
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
265
-
266
- print("-"*(60 + len(" App Starting ")) + "\n")
267
-
268
- print("Launching Gradio Interface for Basic Agent Evaluation...")
269
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import re
3
  import gradio as gr
4
  import requests
 
5
  import pandas as pd
 
6
  from huggingface_hub import InferenceClient
7
 
 
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+
 
12
  # --- Basic Agent Definition ---
13
  class BasicAgent:
14
  """
15
  Minimal LLM-based agent for GAIA level-1 style questions.
16
+ Target: >=30% (>=6/20 exact match) by keeping output clean.
17
  """
18
+
19
  def __init__(self):
20
  print("BasicAgent initialized (LLM mode).")
21
 
22
+ # 必須在 Space 設定 Secret:HF_TOKEN
23
  self.hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
24
  if not self.hf_token:
25
+ raise RuntimeError("Missing HF_TOKEN. Set it in Space Settings → Variables and secrets → New secret.")
26
 
27
+ # 先用 7B 穩定跑完;不夠分再用 Settings 加 MODEL_ID 升到 14B/32B
28
  self.model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
29
 
30
+ # 不要傳 base_url(會跟 model 參數衝突,造成你剛剛那個錯誤)
31
+ # 新版 huggingface_hub 會走新的推理路由;只要給 model + token 就行
32
  self.client = InferenceClient(
33
  model=self.model_id,
34
  token=self.hf_token,
 
35
  timeout=120,
36
  )
37
 
38
+ # 強制乾淨輸出(exact match)
39
+ self.system = (
40
+ "You are a precise question-answering assistant.\n"
41
+ "Return ONLY the final answer, nothing else.\n"
42
+ "No explanation. No extra words.\n"
43
+ "No surrounding quotes unless the answer itself includes them.\n"
44
+ )
45
+
46
  def _sanitize(self, text: str) -> str:
47
  if not text:
48
  return ""
49
+ t = str(text).strip()
50
 
51
+ # 移除 FINAL ANSWER / Answer: 之類常見包裝
52
  t = re.sub(r"(?i)\bFINAL ANSWER\b\s*[:\-]*\s*", "", t).strip()
53
+ t = re.sub(r"(?i)^\s*answer\s*[:\-]\s*", "", t).strip()
54
 
55
+ # 取最後一個非空行(模型如果亂輸出多行,答案常在最後)
56
  lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
57
  if lines:
58
  t = lines[-1]
59
 
60
+ # 去掉外層引號
61
  t = t.strip().strip('"').strip("'").strip()
62
  return t
63
 
64
  def __call__(self, question: str) -> str:
65
+ print(f"Agent received question (first 80 chars): {question[:80]}...")
66
 
67
+ prompt = f"{self.system}\nQuestion: {question}\nAnswer:"
 
 
 
 
 
68
 
69
+ # 優先用 text_generation(快),失敗再用 chat_completion
 
 
70
  try:
71
  out = self.client.text_generation(
72
  prompt,
 
76
  return_full_text=False,
77
  )
78
  except Exception as e:
 
79
  print("text_generation failed, fallback to chat_completion:", e)
80
  out = self.client.chat_completion(
81
  messages=[
82
+ {"role": "system", "content": self.system},
83
  {"role": "user", "content": question},
84
  ],
85
  max_tokens=128,
86
  temperature=0.0,
87
  ).choices[0].message.content
88
 
89
+ ans = self._sanitize(out)
90
  print(f"Agent answer: {ans}")
91
  return ans
92
 
93
+
94
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
95
  """
96
+ Fetches all questions, runs the agent on them, submits all answers,
97
  and displays the results.
98
  """
99
+ space_id = os.getenv("SPACE_ID") # used for code link
 
100
 
101
  if profile:
102
+ username = f"{profile.username}"
103
  print(f"User logged in: {username}")
104
  else:
105
  print("User not logged in.")
 
109
  questions_url = f"{api_url}/questions"
110
  submit_url = f"{api_url}/submit"
111
 
112
+ # 1) Instantiate agent
113
  try:
114
  agent = BasicAgent()
115
  except Exception as e:
116
  print(f"Error instantiating agent: {e}")
117
  return f"Error initializing agent: {e}", None
 
 
 
118
 
119
+ # code link for verification
120
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
121
+ print("agent_code:", agent_code)
122
+
123
+ # 2) Fetch questions
124
  print(f"Fetching questions from: {questions_url}")
125
  try:
126
+ response = requests.get(questions_url, timeout=30)
127
  response.raise_for_status()
128
  questions_data = response.json()
129
  if not questions_data:
130
+ return "Fetched questions list is empty or invalid format.", None
 
131
  print(f"Fetched {len(questions_data)} questions.")
132
+ except Exception as e:
133
  print(f"Error fetching questions: {e}")
134
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
135
 
136
+ # 3) Run agent
137
  results_log = []
138
  answers_payload = []
139
  print(f"Running agent on {len(questions_data)} questions...")
140
+
141
  for item in questions_data:
142
  task_id = item.get("task_id")
143
  question_text = item.get("question")
144
+
145
  if not task_id or question_text is None:
 
146
  continue
147
+
148
  try:
149
  submitted_answer = agent(question_text)
150
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
151
+ results_log.append(
152
+ {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}
153
+ )
154
  except Exception as e:
155
+ print(f"Error running agent on task {task_id}: {e}")
156
+ results_log.append(
157
+ {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
158
+ )
159
 
160
  if not answers_payload:
 
161
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
162
 
163
+ # 4) Prepare submission
164
+ submission_data = {
165
+ "username": username.strip(),
166
+ "agent_code": agent_code,
167
+ "answers": answers_payload,
168
+ }
169
 
170
+ # 5) Submit
171
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
172
  try:
173
+ response = requests.post(submit_url, json=submission_data, timeout=120)
174
  response.raise_for_status()
175
  result_data = response.json()
176
+
177
  final_status = (
178
  f"Submission Successful!\n"
179
  f"User: {result_data.get('username')}\n"
 
181
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
182
  f"Message: {result_data.get('message', 'No message received.')}"
183
  )
 
184
  results_df = pd.DataFrame(results_log)
185
  return final_status, results_df
186
+
187
  except requests.exceptions.HTTPError as e:
188
+ detail = f"Server responded with status {e.response.status_code}."
189
  try:
190
+ detail_json = e.response.json()
191
+ detail += f" Detail: {detail_json.get('detail', e.response.text)}"
192
+ except Exception:
193
+ detail += f" Response: {e.response.text[:500]}"
 
 
194
  results_df = pd.DataFrame(results_log)
195
+ return f"Submission Failed: {detail}", results_df
196
+
197
  except requests.exceptions.Timeout:
 
 
 
 
 
 
 
198
  results_df = pd.DataFrame(results_log)
199
+ return "Submission Failed: The request timed out.", results_df
200
+
201
  except Exception as e:
 
 
202
  results_df = pd.DataFrame(results_log)
203
+ return f"An unexpected error occurred during submission: {e}", results_df
204
 
205
 
206
+ # --- Gradio UI ---
207
  with gr.Blocks() as demo:
208
  gr.Markdown("# Basic Agent Evaluation Runner")
 
 
 
209
 
210
+ gr.Markdown(
 
 
 
 
 
 
 
211
  """
212
+ **Instructions:**
213
+ 1. Log in to your Hugging Face account using the button below.
214
+ 2. Click **Run Evaluation & Submit All Answers**.
215
+
216
+ **Notes:**
217
+ - This will run 20 questions and submit answers for scoring.
218
+ - Exact match: the agent must output ONLY the answer (no extra words).
219
+ """
220
  )
221
 
222
  gr.LoginButton()
 
223
  run_button = gr.Button("Run Evaluation & Submit All Answers")
224
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
 
 
225
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
226
 
227
  run_button.click(
228
  fn=run_and_submit_all,
229
+ outputs=[status_output, results_table],
230
  )
231
 
232
  if __name__ == "__main__":
233
+ demo.launch(debug=True, share=False)