ahnhs2k commited on
Commit
fdc623e
·
1 Parent(s): 904626c
Files changed (1) hide show
  1. app.py +100 -275
app.py CHANGED
@@ -1,330 +1,155 @@
1
  import os
2
  import re
3
- import math
4
  import gradio as gr
5
  import requests
6
- import inspect
7
  import pandas as pd
8
  from typing import TypedDict
9
  from langgraph.graph import StateGraph, START, END
10
- from langchain_core.messages import HumanMessage
11
  from langchain_openai import ChatOpenAI
 
12
 
13
- # (Keep Constants as is)
14
- # --- Constants ---
 
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
 
17
  # Utils
18
- def _clean_answer(text: str) -> str:
 
19
  if not text:
20
  return ""
21
  s = text.strip()
22
- s = s.replace("Final answer:", "").replace("Answer:", "").strip()
23
  s = s.splitlines()[0].strip()
24
- s = s.strip('"\'`')
25
- if len(s) > 1 and s.endswith("."):
26
- s = s[:-1].strip()
27
  return s
28
 
29
-
30
- def try_simple_math(question: str) -> str | None:
31
- """
32
- 매우 보수적인 산수 처리
33
- - 질문에 노골적인 수식이 있을 때만 eval
34
- """
35
- m = re.search(r"([-+/*().\d\s]{3,})", question)
36
- if not m:
37
- return None
38
- expr = m.group(1).strip()
39
- if re.fullmatch(r"[0-9+\-*/().\s]+", expr) is None:
40
- return None
41
- try:
42
- val = eval(expr, {"__builtins__": {}}, {})
43
- if isinstance(val, (int, float)):
44
- if abs(val - round(val)) < 1e-12:
45
- return str(int(round(val)))
46
- return str(val)
47
- except:
48
- return None
49
- return None
50
-
51
- # 1. State 정의
52
  class AgentState(TypedDict):
53
  question: str
54
  answer: str
55
 
56
- # 2. LLM 초기화
57
- os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
58
-
59
  llm = ChatOpenAI(
60
- model="gpt-4o-mini", # Space 환경에 맞게 조정 가능
61
  temperature=0,
62
- max_tokens=256
63
  )
64
 
65
- # 3. Node 정의
66
- def solve_question(state: AgentState) -> dict:
67
- question = state["question"]
68
-
69
- # 0) 초간단 산수 선처리
70
- math_ans = try_simple_math(question)
71
- if math_ans is not None:
72
- return {"answer": math_ans}
73
 
74
- # 1) 1-pass
75
- prompt1 = f"""
76
- You are solving a GAIA benchmark question.
77
-
78
- Rules:
79
- - Think step-by-step internally (do NOT reveal reasoning).
80
  - Output ONLY the final answer.
81
  - No explanation.
82
  - No formatting.
83
- - No extra text.
84
-
85
- Question:
86
- {question}
87
- """.strip()
88
-
89
- r1 = llm.invoke([HumanMessage(content=prompt1)])
90
- a1 = _clean_answer(r1.content)
91
-
92
- # 2) 2-pass (검증/수정)
93
- if (not a1) or (len(a1) > 80):
94
- prompt2 = f"""
95
- You are verifying a candidate answer for a GAIA question.
96
-
97
- Rules:
98
- - Think internally.
99
- - Output ONLY the corrected final answer.
100
- - No explanation.
101
- - No formatting.
102
- - No extra text.
103
 
104
- Question:
105
- {question}
 
 
 
106
 
107
- Candidate answer:
108
- {a1}
109
- """.strip()
 
110
 
111
- r2 = llm.invoke([HumanMessage(content=prompt2)])
112
- a2 = _clean_answer(r2.content)
113
- final = a2 if a2 else a1
114
- else:
115
- final = a1
116
 
117
- return {"answer": final}
118
 
119
- # 4. Graph 구성
 
 
120
  builder = StateGraph(AgentState)
121
  builder.add_node("solve", solve_question)
122
-
123
  builder.add_edge(START, "solve")
124
  builder.add_edge("solve", END)
 
125
 
126
- langgraph_agent = builder.compile()
127
-
128
-
129
- # 6. 기존 BasicAgent를 LangGraph Agent로 교체
130
- """
131
- 중요 포인트
132
- - run_and_submit_all() 함수는 agent(question) 형태를 기대함
133
- - 따라서 LangGraph Agent를 감싸는 Wrapper 클래스를 유지
134
- """
135
  class BasicAgent:
136
- def __init__(self):
137
- print("LangGraphAgent initialized.")
138
-
139
  def __call__(self, question: str) -> str:
140
- """
141
- question: GAIA 질문 (string)
142
- return: 정답만 포함된 string
143
- """
144
- print(f"Agent received question (first 50 chars): {question[:50]}...")
145
 
146
- result = langgraph_agent.invoke({
147
- "question": question,
148
- "answer": ""
149
- })
 
 
150
 
151
- final_answer = result["answer"]
152
- print(f"Agent returning answer: {final_answer}")
 
153
 
154
- return final_answer
155
 
156
- def run_and_submit_all( profile: gr.OAuthProfile | None):
157
- """
158
- Fetches all questions, runs the BasicAgent on them, submits all answers,
159
- and displays the results.
160
- """
161
- # --- Determine HF Space Runtime URL and Repo URL ---
162
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
163
 
164
- if profile:
165
- username= f"{profile.username}"
166
- print(f"User logged in: {username}")
167
- else:
168
- print("User not logged in.")
169
- return "Please Login to Hugging Face with the button.", None
170
-
171
- api_url = DEFAULT_API_URL
172
- questions_url = f"{api_url}/questions"
173
- submit_url = f"{api_url}/submit"
174
-
175
- # 1. Instantiate Agent ( modify this part to create your agent)
176
- try:
177
- agent = BasicAgent()
178
- except Exception as e:
179
- print(f"Error instantiating agent: {e}")
180
- return f"Error initializing agent: {e}", None
181
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
182
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
183
- print(agent_code)
184
-
185
- # 2. Fetch Questions
186
- print(f"Fetching questions from: {questions_url}")
187
- try:
188
- response = requests.get(questions_url, timeout=15)
189
- response.raise_for_status()
190
- questions_data = response.json()
191
- if not questions_data:
192
- print("Fetched questions list is empty.")
193
- return "Fetched questions list is empty or invalid format.", None
194
- print(f"Fetched {len(questions_data)} questions.")
195
- except requests.exceptions.RequestException as e:
196
- print(f"Error fetching questions: {e}")
197
- return f"Error fetching questions: {e}", None
198
- except requests.exceptions.JSONDecodeError as e:
199
- print(f"Error decoding JSON response from questions endpoint: {e}")
200
- print(f"Response text: {response.text[:500]}")
201
- return f"Error decoding server response for questions: {e}", None
202
- except Exception as e:
203
- print(f"An unexpected error occurred fetching questions: {e}")
204
- return f"An unexpected error occurred fetching questions: {e}", None
205
-
206
- # 3. Run your Agent
207
- results_log = []
208
  answers_payload = []
209
- print(f"Running agent on {len(questions_data)} questions...")
210
- for item in questions_data:
211
- task_id = item.get("task_id")
212
- question_text = item.get("question")
213
- if not task_id or question_text is None:
214
- print(f"Skipping item with missing task_id or question: {item}")
215
- continue
216
- try:
217
- submitted_answer = agent(question_text)
218
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
219
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
220
- except Exception as e:
221
- print(f"Error running agent on task {task_id}: {e}")
222
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
223
 
224
- if not answers_payload:
225
- print("Agent did not produce any answers to submit.")
226
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
227
-
228
- # 4. Prepare Submission
229
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
230
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
231
- print(status_update)
232
-
233
- # 5. Submit
234
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
235
- try:
236
- response = requests.post(submit_url, json=submission_data, timeout=60)
237
- response.raise_for_status()
238
- result_data = response.json()
239
- final_status = (
240
- f"Submission Successful!\n"
241
- f"User: {result_data.get('username')}\n"
242
- f"Overall Score: {result_data.get('score', 'N/A')}% "
243
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
244
- f"Message: {result_data.get('message', 'No message received.')}"
245
- )
246
- print("Submission successful.")
247
- results_df = pd.DataFrame(results_log)
248
- return final_status, results_df
249
- except requests.exceptions.HTTPError as e:
250
- error_detail = f"Server responded with status {e.response.status_code}."
251
  try:
252
- error_json = e.response.json()
253
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
254
- except requests.exceptions.JSONDecodeError:
255
- error_detail += f" Response: {e.response.text[:500]}"
256
- status_message = f"Submission Failed: {error_detail}"
257
- print(status_message)
258
- results_df = pd.DataFrame(results_log)
259
- return status_message, results_df
260
- except requests.exceptions.Timeout:
261
- status_message = "Submission Failed: The request timed out."
262
- print(status_message)
263
- results_df = pd.DataFrame(results_log)
264
- return status_message, results_df
265
- except requests.exceptions.RequestException as e:
266
- status_message = f"Submission Failed: Network error - {e}"
267
- print(status_message)
268
- results_df = pd.DataFrame(results_log)
269
- return status_message, results_df
270
- except Exception as e:
271
- status_message = f"An unexpected error occurred during submission: {e}"
272
- print(status_message)
273
- results_df = pd.DataFrame(results_log)
274
- return status_message, results_df
 
 
 
 
 
275
 
 
276
 
277
- # --- Build Gradio Interface using Blocks ---
 
 
278
  with gr.Blocks() as demo:
279
- gr.Markdown("# Basic Agent Evaluation Runner")
280
- gr.Markdown(
281
- """
282
- **Instructions:**
283
-
284
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
285
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
286
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
287
-
288
- ---
289
- **Disclaimers:**
290
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
291
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
292
- """
293
- )
294
-
295
  gr.LoginButton()
 
 
 
 
296
 
297
- run_button = gr.Button("Run Evaluation & Submit All Answers")
298
-
299
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
300
- # Removed max_rows=10 from DataFrame constructor
301
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
302
-
303
- run_button.click(
304
- fn=run_and_submit_all,
305
- outputs=[status_output, results_table]
306
- )
307
-
308
- if __name__ == "__main__":
309
- print("\n" + "-"*30 + " App Starting " + "-"*30)
310
- # Check for SPACE_HOST and SPACE_ID at startup for information
311
- space_host_startup = os.getenv("SPACE_HOST")
312
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
313
-
314
- if space_host_startup:
315
- print(f"✅ SPACE_HOST found: {space_host_startup}")
316
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
317
- else:
318
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
319
-
320
- if space_id_startup: # Print repo URLs if SPACE_ID is found
321
- print(f"✅ SPACE_ID found: {space_id_startup}")
322
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
323
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
324
- else:
325
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
326
-
327
- print("-"*(60 + len(" App Starting ")) + "\n")
328
-
329
- print("Launching Gradio Interface for Basic Agent Evaluation...")
330
- demo.launch(debug=True, share=False)
 
1
  import os
2
  import re
3
+ import time
4
  import gradio as gr
5
  import requests
 
6
  import pandas as pd
7
  from typing import TypedDict
8
  from langgraph.graph import StateGraph, START, END
 
9
  from langchain_openai import ChatOpenAI
10
+ from langchain_core.messages import SystemMessage, HumanMessage
11
 
12
+ # -------------------------
13
+ # Constants
14
+ # -------------------------
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
+ # -------------------------
18
  # Utils
19
+ # -------------------------
20
+ def clean_answer(text: str) -> str:
21
  if not text:
22
  return ""
23
  s = text.strip()
24
+ s = re.sub(r"^(Final answer:|Answer:)\s*", "", s, flags=re.I)
25
  s = s.splitlines()[0].strip()
26
+ s = s.strip("\"'`")
27
+ if s.endswith(".") and len(s) > 1:
28
+ s = s[:-1]
29
  return s
30
 
31
+ # -------------------------
32
+ # State
33
+ # -------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  class AgentState(TypedDict):
35
  question: str
36
  answer: str
37
 
38
+ # -------------------------
39
+ # LLM
40
+ # -------------------------
41
  llm = ChatOpenAI(
42
+ model="gpt-4o-mini",
43
  temperature=0,
44
+ max_tokens=128
45
  )
46
 
47
+ SYSTEM_PROMPT = """
48
+ You are solving GAIA benchmark questions.
 
 
 
 
 
 
49
 
50
+ Rules (VERY IMPORTANT):
51
+ - You MUST reason step by step internally.
52
+ - NEVER show your reasoning.
 
 
 
53
  - Output ONLY the final answer.
54
  - No explanation.
55
  - No formatting.
56
+ - No extra words.
57
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ # -------------------------
60
+ # Node
61
+ # -------------------------
62
+ def solve_question(state: AgentState) -> dict:
63
+ question = state["question"]
64
 
65
+ messages = [
66
+ SystemMessage(content=SYSTEM_PROMPT),
67
+ HumanMessage(content=question)
68
+ ]
69
 
70
+ resp = llm.invoke(messages)
71
+ answer = clean_answer(resp.content)
 
 
 
72
 
73
+ return {"answer": answer}
74
 
75
+ # -------------------------
76
+ # Graph
77
+ # -------------------------
78
  builder = StateGraph(AgentState)
79
  builder.add_node("solve", solve_question)
 
80
  builder.add_edge(START, "solve")
81
  builder.add_edge("solve", END)
82
+ agent_graph = builder.compile()
83
 
84
+ # -------------------------
85
+ # Wrapper
86
+ # -------------------------
 
 
 
 
 
 
87
  class BasicAgent:
 
 
 
88
  def __call__(self, question: str) -> str:
89
+ result = agent_graph.invoke({"question": question, "answer": ""})
90
+ return result["answer"]
 
 
 
91
 
92
+ # -------------------------
93
+ # Runner
94
+ # -------------------------
95
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
96
+ if not profile:
97
+ return "Please login first.", None
98
 
99
+ username = profile.username
100
+ questions_url = f"{DEFAULT_API_URL}/questions"
101
+ submit_url = f"{DEFAULT_API_URL}/submit"
102
 
103
+ agent = BasicAgent()
104
 
105
+ # fetch once
106
+ questions = requests.get(questions_url).json()
 
 
 
 
 
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  answers_payload = []
109
+ log = []
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ for q in questions:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  try:
113
+ ans = agent(q["question"])
114
+ answers_payload.append({
115
+ "task_id": q["task_id"],
116
+ "submitted_answer": ans
117
+ })
118
+ log.append({
119
+ "task_id": q["task_id"],
120
+ "answer": ans
121
+ })
122
+ except Exception as e:
123
+ log.append({
124
+ "task_id": q["task_id"],
125
+ "answer": f"ERROR: {e}"
126
+ })
127
+
128
+ submission = {
129
+ "username": username,
130
+ "agent_code": "https://huggingface.co/spaces/ahnhs2k/Agents_Final_Assignment_",
131
+ "answers": answers_payload
132
+ }
133
+
134
+ res = requests.post(submit_url, json=submission).json()
135
+
136
+ status = (
137
+ f"Score: {res.get('score')}% "
138
+ f"({res.get('correct_count')}/{res.get('total_attempted')})\n"
139
+ f"{res.get('message')}"
140
+ )
141
 
142
+ return status, pd.DataFrame(log)
143
 
144
+ # -------------------------
145
+ # UI
146
+ # -------------------------
147
  with gr.Blocks() as demo:
148
+ gr.Markdown("# GAIA Level-1 Agent")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  gr.LoginButton()
150
+ btn = gr.Button("Run & Submit")
151
+ out = gr.Textbox(lines=4)
152
+ table = gr.DataFrame()
153
+ btn.click(run_and_submit_all, outputs=[out, table])
154
 
155
+ demo.launch()