DarrenDsa commited on
Commit
41d7f0b
·
verified ·
1 Parent(s): 7e35370

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -49
app.py CHANGED
@@ -1,5 +1,3 @@
1
-
2
-
3
  import os
4
  import gradio as gr
5
  import requests
@@ -8,143 +6,263 @@ import pandas as pd
8
  from smolagents import LiteLLMModel, CodeAgent, DuckDuckGoSearchTool
9
  from gaia_tools import ReverseTextTool, RunPythonFileTool, download_server
10
 
11
- # System prompt for the agent
 
 
 
 
12
  SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question.
13
- Report your thoughts, and finish your answer with just the answer — no prefixes like "FINAL ANSWER:".
14
- Your answer should be a number OR as few words as possible OR a comma-separated list of numbers and/or strings.
15
- If you're asked for a number, don’t use commas or units like $ or %, unless specified.
16
- If you're asked for a string, don’t use articles or abbreviations (e.g. for cities), and write digits in plain text unless told otherwise.
17
-
18
- Tool Use Guidelines:
19
- 1. Do **not** use any tools outside of the provided tools list.
20
- 2. Always use **only one tool at a time** in each step of your execution.
21
- 3. If the question refers to a `.py` file or uploaded Python script, use **RunPythonFileTool** to execute it and base your answer on its output.
22
- 4. If the question looks reversed (starts with a period or reads backward), first use **ReverseTextTool** to reverse it, then process the question.
23
- 5. For logic or word puzzles, solve them directly unless they are reversed — in which case, decode first using **ReverseTextTool**.
24
- 6. When dealing with Excel files, prioritize using the **excel** tool over writing code in **terminal-controller**.
25
- 7. If you need to download a file, always use the **download_server** tool and save it to the correct path.
26
- 8. Even for complex tasks, assume a solution exists. If one method fails, try another approach using different tools.
27
- 9. Due to context length limits, keep browser-based tasks (e.g., searches) as short and efficient as possible.
 
 
 
 
 
 
 
 
 
 
 
 
28
  """
29
 
 
30
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
31
 
32
- # Agent wrapper using LiteLLMModel
 
 
 
 
33
  class MyAgent:
 
34
  def __init__(self):
35
- gemini_api_key = os.getenv("GEMINI_API_KEY")
36
- if not gemini_api_key:
37
- raise ValueError("GEMINI_API_KEY not set in environment variables.")
38
-
 
 
 
 
 
39
  self.model = LiteLLMModel(
40
- model_id="gemini/gemini-2.0-flash-lite",
41
- api_key=gemini_api_key,
 
 
 
 
 
42
  system_prompt=SYSTEM_PROMPT
 
43
  )
44
-
45
  self.agent = CodeAgent(
 
46
  tools=[
47
  DuckDuckGoSearchTool(),
48
  ReverseTextTool,
49
  RunPythonFileTool,
50
  download_server
51
  ],
 
52
  model=self.model,
 
53
  add_base_tools=True,
 
54
  )
55
 
56
  def __call__(self, question: str) -> str:
57
  return self.agent.run(question)
58
 
59
- # Main evaluation function
 
 
 
 
60
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
61
  space_id = os.getenv("SPACE_ID")
62
 
63
  if profile:
64
  username = profile.username
65
  print(f"User logged in: {username}")
66
  else:
67
- print("User not logged in.")
68
  return "Please login to Hugging Face.", None
69
 
 
70
  questions_url = f"{DEFAULT_API_URL}/questions"
71
  submit_url = f"{DEFAULT_API_URL}/submit"
72
 
 
73
  try:
74
  agent = MyAgent()
75
  except Exception as e:
76
  return f"Error initializing agent: {e}", None
77
 
 
 
 
78
  try:
79
- response = requests.get(questions_url, timeout=15)
 
 
 
 
 
80
  response.raise_for_status()
 
81
  questions_data = response.json()
 
82
  except Exception as e:
 
83
  return f"Error fetching questions: {e}", None
84
 
 
85
  results_log = []
86
  answers_payload = []
87
 
 
 
 
88
  for item in questions_data:
 
89
  task_id = item.get("task_id")
90
  question_text = item.get("question")
 
91
  if not task_id or question_text is None:
92
  continue
 
93
  try:
 
94
  submitted_answer = agent(question_text)
95
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
96
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
97
  except Exception as e:
 
98
  results_log.append({
99
  "Task ID": task_id,
100
  "Question": question_text,
101
  "Submitted Answer": f"AGENT ERROR: {e}"
102
  })
103
 
 
104
  if not answers_payload:
105
- return "Agent did not return any answers.", pd.DataFrame(results_log)
 
106
 
107
  submission_data = {
 
108
  "username": profile.username.strip(),
109
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
 
 
 
110
  "answers": answers_payload
 
111
  }
112
 
 
 
 
113
  try:
114
- response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
 
 
 
 
115
  response.raise_for_status()
 
116
  result_data = response.json()
 
117
  final_status = (
118
  f"Submission Successful!\n"
119
  f"User: {result_data.get('username')}\n"
120
  f"Score: {result_data.get('score', 'N/A')}% "
121
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
 
122
  f"Message: {result_data.get('message', 'No message received.')}"
123
  )
 
124
  return final_status, pd.DataFrame(results_log)
 
125
  except Exception as e:
 
126
  return f"Submission failed: {e}", pd.DataFrame(results_log)
127
 
128
- # Gradio UI setup
 
 
 
 
129
  with gr.Blocks() as demo:
130
- gr.Markdown("# Basic Agent Evaluation Runner")
 
 
131
  gr.Markdown("""
132
- **Instructions:**
133
- 1. Clone this space and configure your Gemini API key.
134
- 2. Log in to Hugging Face.
135
- 3. Run your agent on evaluation tasks and submit answers.
136
- """)
 
137
 
138
  gr.LoginButton()
139
- run_button = gr.Button("Run Evaluation & Submit All Answers")
140
- status_output = gr.Textbox(label="Submission Result", lines=5, interactive=False)
141
- results_table = gr.DataFrame(label="Results", wrap=True)
142
 
143
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
144
 
145
- if __name__ == "__main__":
146
- print("🔧 App starting...")
147
- demo.launch(debug=True, share=False)
 
 
148
 
 
 
 
 
149
 
 
 
 
 
 
 
 
 
 
150
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import requests
 
6
  from smolagents import LiteLLMModel, CodeAgent, DuckDuckGoSearchTool
7
  from gaia_tools import ReverseTextTool, RunPythonFileTool, download_server
8
 
9
+
10
+ # ==============================
11
+ # System Prompt
12
+ # ==============================
13
+
14
  SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question.
15
+ Report your thoughts, and finish your answer with just the answer — no prefixes.
16
+
17
+ Your answer should be:
18
+ A number
19
+ OR
20
+ Few words
21
+ OR
22
+ Comma-separated list
23
+
24
+ Rules:
25
+
26
+ If number:
27
+ - No commas
28
+ - No units
29
+
30
+ If string:
31
+ - No articles
32
+ - No abbreviations
33
+ - Write digits as words
34
+
35
+ Tool Rules:
36
+
37
+ 1. Use only provided tools.
38
+ 2. Use one tool at a time.
39
+ 3. If reversed question → use ReverseTextTool.
40
+ 4. If .py file → use RunPythonFileTool.
41
+ 5. For downloads → use download_server.
42
  """
43
 
44
+
45
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
46
 
47
+
48
+ # ==============================
49
+ # Agent Class (NVIDIA Version)
50
+ # ==============================
51
+
52
  class MyAgent:
53
+
54
  def __init__(self):
55
+
56
+ nvidia_api_key = os.getenv("NVIDIA_API_KEY")
57
+
58
+ if not nvidia_api_key:
59
+ raise ValueError(
60
+ "NVIDIA_API_KEY not set in environment variables."
61
+ )
62
+
63
+ # NVIDIA MiniMax model via LiteLLM
64
  self.model = LiteLLMModel(
65
+
66
+ model_id="openai/minimaxai/minimax-m2.5",
67
+
68
+ api_key=nvidia_api_key,
69
+
70
+ api_base="https://integrate.api.nvidia.com/v1",
71
+
72
  system_prompt=SYSTEM_PROMPT
73
+
74
  )
75
+
76
  self.agent = CodeAgent(
77
+
78
  tools=[
79
  DuckDuckGoSearchTool(),
80
  ReverseTextTool,
81
  RunPythonFileTool,
82
  download_server
83
  ],
84
+
85
  model=self.model,
86
+
87
  add_base_tools=True,
88
+
89
  )
90
 
91
  def __call__(self, question: str) -> str:
92
  return self.agent.run(question)
93
 
94
+
95
+ # ==============================
96
+ # Main Evaluation Function
97
+ # ==============================
98
+
99
  def run_and_submit_all(profile: gr.OAuthProfile | None):
100
+
101
  space_id = os.getenv("SPACE_ID")
102
 
103
  if profile:
104
  username = profile.username
105
  print(f"User logged in: {username}")
106
  else:
 
107
  return "Please login to Hugging Face.", None
108
 
109
+
110
  questions_url = f"{DEFAULT_API_URL}/questions"
111
  submit_url = f"{DEFAULT_API_URL}/submit"
112
 
113
+
114
  try:
115
  agent = MyAgent()
116
  except Exception as e:
117
  return f"Error initializing agent: {e}", None
118
 
119
+
120
+ # Fetch questions
121
+
122
  try:
123
+
124
+ response = requests.get(
125
+ questions_url,
126
+ timeout=15
127
+ )
128
+
129
  response.raise_for_status()
130
+
131
  questions_data = response.json()
132
+
133
  except Exception as e:
134
+
135
  return f"Error fetching questions: {e}", None
136
 
137
+
138
  results_log = []
139
  answers_payload = []
140
 
141
+
142
+ # Run agent
143
+
144
  for item in questions_data:
145
+
146
  task_id = item.get("task_id")
147
  question_text = item.get("question")
148
+
149
  if not task_id or question_text is None:
150
  continue
151
+
152
  try:
153
+
154
  submitted_answer = agent(question_text)
155
+
156
+ answers_payload.append({
157
+ "task_id": task_id,
158
+ "submitted_answer": submitted_answer
159
+ })
160
+
161
+ results_log.append({
162
+ "Task ID": task_id,
163
+ "Question": question_text,
164
+ "Submitted Answer": submitted_answer
165
+ })
166
+
167
  except Exception as e:
168
+
169
  results_log.append({
170
  "Task ID": task_id,
171
  "Question": question_text,
172
  "Submitted Answer": f"AGENT ERROR: {e}"
173
  })
174
 
175
+
176
  if not answers_payload:
177
+ return "Agent did not return answers.", pd.DataFrame(results_log)
178
+
179
 
180
  submission_data = {
181
+
182
  "username": profile.username.strip(),
183
+
184
+ "agent_code":
185
+ f"https://huggingface.co/spaces/{space_id}/tree/main",
186
+
187
  "answers": answers_payload
188
+
189
  }
190
 
191
+
192
+ # Submit answers
193
+
194
  try:
195
+
196
+ response = requests.post(
197
+ submit_url,
198
+ json=submission_data,
199
+ timeout=60
200
+ )
201
+
202
  response.raise_for_status()
203
+
204
  result_data = response.json()
205
+
206
  final_status = (
207
  f"Submission Successful!\n"
208
  f"User: {result_data.get('username')}\n"
209
  f"Score: {result_data.get('score', 'N/A')}% "
210
+ f"({result_data.get('correct_count', '?')}/"
211
+ f"{result_data.get('total_attempted', '?')} correct)\n"
212
  f"Message: {result_data.get('message', 'No message received.')}"
213
  )
214
+
215
  return final_status, pd.DataFrame(results_log)
216
+
217
  except Exception as e:
218
+
219
  return f"Submission failed: {e}", pd.DataFrame(results_log)
220
 
221
+
222
+ # ==============================
223
+ # Gradio UI
224
+ # ==============================
225
+
226
  with gr.Blocks() as demo:
227
+
228
+ gr.Markdown("# NVIDIA MiniMax Agent Runner 🚀")
229
+
230
  gr.Markdown("""
231
+ **Instructions**
232
+
233
+ 1. Add NVIDIA API key in Secrets
234
+ 2. Login to HuggingFace
235
+ 3. Click Run
236
+ """)
237
 
238
  gr.LoginButton()
 
 
 
239
 
240
+ run_button = gr.Button(
241
+ "Run Evaluation & Submit All Answers"
242
+ )
243
 
244
+ status_output = gr.Textbox(
245
+ label="Submission Result",
246
+ lines=5,
247
+ interactive=False
248
+ )
249
 
250
+ results_table = gr.DataFrame(
251
+ label="Results",
252
+ wrap=True
253
+ )
254
 
255
+ run_button.click(
256
+ fn=run_and_submit_all,
257
+ outputs=[status_output, results_table]
258
+ )
259
+
260
+
261
+ if __name__ == "__main__":
262
+
263
+ print("🔧 App starting...")
264
 
265
+ demo.launch(
266
+ debug=True,
267
+ share=False
268
+ )