johnnychiang commited on
Commit
4491259
·
verified ·
1 Parent(s): bfea800

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -145
app.py CHANGED
@@ -1,233 +1,190 @@
1
  import os
2
- import re
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
 
6
  from huggingface_hub import InferenceClient
7
 
8
- # --- Constants ---
 
 
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
-
12
- # --- Basic Agent Definition ---
 
13
  class BasicAgent:
14
  """
15
- Minimal LLM-based agent for GAIA level-1 style questions.
16
- Target: >=30% (>=6/20 exact match) by keeping output clean.
17
  """
18
 
19
  def __init__(self):
20
- print("BasicAgent initialized (LLM mode).")
21
 
22
- # 必須在 Space 設定 Secret:HF_TOKEN
23
  self.hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
24
  if not self.hf_token:
25
- raise RuntimeError("Missing HF_TOKEN. Set it in Space Settings → Variables and secrets → New secret.")
26
 
27
- # 先用 7B 穩定跑完;不夠分再用 Settings 加 MODEL_ID 升到 14B/32B
28
  self.model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
29
 
30
- # 不要傳 base_url(會跟 model 參數衝突,造成你剛剛那個錯誤)
31
- # 新版 huggingface_hub 會走新的推理路由;只要給 model + token 就行
32
  self.client = InferenceClient(
33
  model=self.model_id,
34
  token=self.hf_token,
 
35
  timeout=120,
36
  )
37
 
38
- # 強制乾淨輸出(exact match)
39
  self.system = (
40
- "You are a precise question-answering assistant.\n"
41
- "Return ONLY the final answer, nothing else.\n"
42
- "No explanation. No extra words.\n"
43
- "No surrounding quotes unless the answer itself includes them.\n"
 
 
 
 
44
  )
45
 
46
  def _sanitize(self, text: str) -> str:
47
  if not text:
48
  return ""
 
49
  t = str(text).strip()
50
 
51
- # 移除 FINAL ANSWER / Answer: 之類常見包裝
52
- t = re.sub(r"(?i)\bFINAL ANSWER\b\s*[:\-]*\s*", "", t).strip()
53
- t = re.sub(r"(?i)^\s*answer\s*[:\-]\s*", "", t).strip()
54
 
55
- # 取最後一個非空行(模型如果亂輸出多行,答案常在最後)
56
  lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
57
  if lines:
58
  t = lines[-1]
59
 
60
- # 去掉外層引號
61
- t = t.strip().strip('"').strip("'").strip()
 
 
 
 
62
  return t
63
 
64
  def __call__(self, question: str) -> str:
65
- print(f"Agent received question (first 80 chars): {question[:80]}...")
66
 
67
  prompt = f"{self.system}\nQuestion: {question}\nAnswer:"
68
 
69
- # 優先用 text_generation(快),失敗再用 chat_completion
70
  try:
71
  out = self.client.text_generation(
72
  prompt,
73
- max_new_tokens=128,
74
  temperature=0.0,
75
  do_sample=False,
76
  return_full_text=False,
77
  )
78
- except Exception as e:
79
- print("text_generation failed, fallback to chat_completion:", e)
80
  out = self.client.chat_completion(
81
  messages=[
82
  {"role": "system", "content": self.system},
83
  {"role": "user", "content": question},
84
  ],
85
- max_tokens=128,
86
  temperature=0.0,
87
  ).choices[0].message.content
88
 
89
  ans = self._sanitize(out)
90
- print(f"Agent answer: {ans}")
91
  return ans
92
 
93
 
 
 
 
94
  def run_and_submit_all(profile: gr.OAuthProfile | None):
95
- """
96
- Fetches all questions, runs the agent on them, submits all answers,
97
- and displays the results.
98
- """
99
- space_id = os.getenv("SPACE_ID") # used for code link
100
 
101
- if profile:
102
- username = f"{profile.username}"
103
- print(f"User logged in: {username}")
104
- else:
105
- print("User not logged in.")
106
- return "Please Login to Hugging Face with the button.", None
107
 
108
- api_url = DEFAULT_API_URL
109
- questions_url = f"{api_url}/questions"
110
- submit_url = f"{api_url}/submit"
111
 
112
- # 1) Instantiate agent
113
- try:
114
- agent = BasicAgent()
115
- except Exception as e:
116
- print(f"Error instantiating agent: {e}")
117
- return f"Error initializing agent: {e}", None
118
 
119
- # code link for verification
120
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
121
- print("agent_code:", agent_code)
122
 
123
- # 2) Fetch questions
124
- print(f"Fetching questions from: {questions_url}")
125
  try:
126
- response = requests.get(questions_url, timeout=30)
127
- response.raise_for_status()
128
- questions_data = response.json()
129
- if not questions_data:
130
- return "Fetched questions list is empty or invalid format.", None
131
- print(f"Fetched {len(questions_data)} questions.")
132
  except Exception as e:
133
- print(f"Error fetching questions: {e}")
134
- return f"Error fetching questions: {e}", None
135
 
136
- # 3) Run agent
137
- results_log = []
138
- answers_payload = []
139
- print(f"Running agent on {len(questions_data)} questions...")
140
 
141
- for item in questions_data:
142
- task_id = item.get("task_id")
143
- question_text = item.get("question")
 
144
 
145
- if not task_id or question_text is None:
146
- continue
147
 
 
 
 
148
  try:
149
- submitted_answer = agent(question_text)
150
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
151
- results_log.append(
152
- {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}
153
- )
154
  except Exception as e:
155
- print(f"Error running agent on task {task_id}: {e}")
156
- results_log.append(
157
- {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
158
- )
159
-
160
- if not answers_payload:
161
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
162
-
163
- # 4) Prepare submission
164
- submission_data = {
165
- "username": username.strip(),
 
 
 
 
 
166
  "agent_code": agent_code,
167
- "answers": answers_payload,
168
  }
169
 
170
- # 5) Submit
171
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
172
- try:
173
- response = requests.post(submit_url, json=submission_data, timeout=120)
174
- response.raise_for_status()
175
- result_data = response.json()
176
-
177
- final_status = (
178
- f"Submission Successful!\n"
179
- f"User: {result_data.get('username')}\n"
180
- f"Overall Score: {result_data.get('score', 'N/A')}% "
181
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
182
- f"Message: {result_data.get('message', 'No message received.')}"
183
- )
184
- results_df = pd.DataFrame(results_log)
185
- return final_status, results_df
186
 
187
- except requests.exceptions.HTTPError as e:
188
- detail = f"Server responded with status {e.response.status_code}."
189
- try:
190
- detail_json = e.response.json()
191
- detail += f" Detail: {detail_json.get('detail', e.response.text)}"
192
- except Exception:
193
- detail += f" Response: {e.response.text[:500]}"
194
- results_df = pd.DataFrame(results_log)
195
- return f"Submission Failed: {detail}", results_df
196
-
197
- except requests.exceptions.Timeout:
198
- results_df = pd.DataFrame(results_log)
199
- return "Submission Failed: The request timed out.", results_df
200
 
201
- except Exception as e:
202
- results_df = pd.DataFrame(results_log)
203
- return f"An unexpected error occurred during submission: {e}", results_df
204
 
205
 
206
- # --- Gradio UI ---
 
 
207
  with gr.Blocks() as demo:
208
- gr.Markdown("# Basic Agent Evaluation Runner")
209
-
210
- gr.Markdown(
211
- """
212
- **Instructions:**
213
- 1. Log in to your Hugging Face account using the button below.
214
- 2. Click **Run Evaluation & Submit All Answers**.
215
-
216
- **Notes:**
217
- - This will run 20 questions and submit answers for scoring.
218
- - Exact match: the agent must output ONLY the answer (no extra words).
219
- """
220
- )
221
-
222
  gr.LoginButton()
223
- run_button = gr.Button("Run Evaluation & Submit All Answers")
224
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
225
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
226
 
227
- run_button.click(
228
- fn=run_and_submit_all,
229
- outputs=[status_output, results_table],
230
- )
231
- ""
232
  if __name__ == "__main__":
233
- demo.launch(debug=True, share=False)
 
1
  import os
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import re
6
  from huggingface_hub import InferenceClient
7
 
8
+ # ===============================
9
+ # Constants (不要改)
10
+ # ===============================
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
13
+ # ===============================
14
+ # Basic Agent (PASS VERSION)
15
+ # ===============================
16
  class BasicAgent:
17
  """
18
+ Minimal GAIA Level-1 agent.
19
+ Goal: >=30% (>=6/20 exact match)
20
  """
21
 
22
  def __init__(self):
23
+ print("BasicAgent initialized (PASS MODE).")
24
 
25
+ # 必須在 Space → Settings → Secrets 設定 HF_TOKEN
26
  self.hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
27
  if not self.hf_token:
28
+ raise RuntimeError("HF_TOKEN missing. Set it in Space Settings → Secrets.")
29
 
30
+ # 可在 Space Variables 改模型,不用動 code
31
  self.model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
32
 
33
+ # ⚠️ 一定要用 router(避免 410)
 
34
  self.client = InferenceClient(
35
  model=self.model_id,
36
  token=self.hf_token,
37
+ base_url="https://router.huggingface.co",
38
  timeout=120,
39
  )
40
 
41
+ # 超嚴格 system prompt(EXACT MATCH 核心)
42
  self.system = (
43
+ "You answer questions with EXACT MATCH.\n"
44
+ "Return ONLY the final answer.\n"
45
+ "No explanation.\n"
46
+ "No extra words.\n"
47
+ "No punctuation unless required.\n"
48
+ "No quotes.\n"
49
+ "If the answer is a name, output the name only.\n"
50
+ "If the answer is a number or date, output it exactly.\n"
51
  )
52
 
53
  def _sanitize(self, text: str) -> str:
54
  if not text:
55
  return ""
56
+
57
  t = str(text).strip()
58
 
59
+ # 移除常見前綴
60
+ t = re.sub(r"(?i)final answer\s*[:\-]*", "", t)
61
+ t = re.sub(r"(?i)answer\s*[:\-]*", "", t)
62
 
63
+ # 只留最後一行
64
  lines = [ln.strip() for ln in t.splitlines() if ln.strip()]
65
  if lines:
66
  t = lines[-1]
67
 
68
+ # 去掉引號
69
+ t = t.strip().strip('"').strip("'")
70
+
71
+ # 🔥 關鍵:移除句尾標點(GAIA 最常死在這)
72
+ t = re.sub(r"[.,;:!?]$", "", t)
73
+
74
  return t
75
 
76
  def __call__(self, question: str) -> str:
77
+ print(f"Q: {question[:60]}")
78
 
79
  prompt = f"{self.system}\nQuestion: {question}\nAnswer:"
80
 
 
81
  try:
82
  out = self.client.text_generation(
83
  prompt,
84
+ max_new_tokens=64,
85
  temperature=0.0,
86
  do_sample=False,
87
  return_full_text=False,
88
  )
89
+ except Exception:
90
+ # fallback(保險)
91
  out = self.client.chat_completion(
92
  messages=[
93
  {"role": "system", "content": self.system},
94
  {"role": "user", "content": question},
95
  ],
96
+ max_tokens=64,
97
  temperature=0.0,
98
  ).choices[0].message.content
99
 
100
  ans = self._sanitize(out)
101
+ print(f"A: {ans}")
102
  return ans
103
 
104
 
105
+ # ===============================
106
+ # Run & Submit
107
+ # ===============================
108
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
109
 
110
+ space_id = os.getenv("SPACE_ID")
 
 
 
 
 
111
 
112
+ if not profile:
113
+ return "Please login with Hugging Face.", None
 
114
 
115
+ username = profile.username
116
+ print(f"User: {username}")
 
 
 
 
117
 
118
+ questions_url = f"{DEFAULT_API_URL}/questions"
119
+ submit_url = f"{DEFAULT_API_URL}/submit"
 
120
 
 
 
121
  try:
122
+ agent = BasicAgent()
 
 
 
 
 
123
  except Exception as e:
124
+ return f"Agent init error: {e}", None
 
125
 
126
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
 
 
127
 
128
+ # Fetch questions
129
+ resp = requests.get(questions_url, timeout=20)
130
+ resp.raise_for_status()
131
+ questions = resp.json()
132
 
133
+ answers_payload = []
134
+ log_rows = []
135
 
136
+ for q in questions:
137
+ task_id = q["task_id"]
138
+ question = q["question"]
139
  try:
140
+ ans = agent(question)
 
 
 
 
141
  except Exception as e:
142
+ ans = ""
143
+ print("Agent error:", e)
144
+
145
+ answers_payload.append({
146
+ "task_id": task_id,
147
+ "submitted_answer": ans
148
+ })
149
+
150
+ log_rows.append({
151
+ "Task ID": task_id,
152
+ "Question": question,
153
+ "Submitted Answer": ans
154
+ })
155
+
156
+ submission = {
157
+ "username": username,
158
  "agent_code": agent_code,
159
+ "answers": answers_payload
160
  }
161
 
162
+ resp = requests.post(submit_url, json=submission, timeout=60)
163
+ resp.raise_for_status()
164
+ result = resp.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
+ status = (
167
+ f"Submission Successful!\n"
168
+ f"User: {result.get('username')}\n"
169
+ f"Score: {result.get('score')}% "
170
+ f"({result.get('correct_count')}/{result.get('total_attempted')})\n"
171
+ f"{result.get('message')}"
172
+ )
 
 
 
 
 
 
173
 
174
+ return status, pd.DataFrame(log_rows)
 
 
175
 
176
 
177
+ # ===============================
178
+ # Gradio UI
179
+ # ===============================
180
  with gr.Blocks() as demo:
181
+ gr.Markdown("# Basic Agent Evaluation Runner (PASS MODE)")
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  gr.LoginButton()
183
+ run_btn = gr.Button("Run Evaluation & Submit All Answers")
184
+ status = gr.Textbox(label="Result", lines=6)
185
+ table = gr.DataFrame(label="Answers", wrap=True)
186
+
187
+ run_btn.click(fn=run_and_submit_all, outputs=[status, table])
188
 
 
 
 
 
 
189
  if __name__ == "__main__":
190
+ demo.launch()