meghkaa commited on
Commit
beda03b
·
verified ·
1 Parent(s): 2e5c6ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -148
app.py CHANGED
@@ -1,266 +1,246 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
  from transformers import pipeline
7
  import re
8
- import math
9
 
10
- # (Keep Constants as is)
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
- # --- Basic Agent Definition ---
15
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
16
 
17
  class BasicAgent:
18
  def __init__(self):
19
- print("Loading stronger instruction model...")
 
 
20
  self.generator = pipeline(
21
  "text-generation",
22
- model="mistralai/Mistral-7B-Instruct-v0.2",
23
- max_new_tokens=256,
24
  do_sample=False,
 
25
  )
26
- print("Model loaded.")
27
 
28
- # ---- Simple math tool ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def try_math(self, question: str):
30
  try:
31
- # Detect simple arithmetic expressions
32
- expression = re.findall(r"[\d\.\+\-\*\/\(\) ]+", question)
33
- if expression:
34
- candidate = expression[0]
35
- result = eval(candidate)
 
 
 
36
  return str(result)
37
  except:
38
  pass
 
39
  return None
40
 
41
- # ---- Clean output for EXACT MATCH ----
 
 
42
  def clean_answer(self, text: str) -> str:
43
  text = text.strip()
44
 
45
  if "Answer:" in text:
46
  text = text.split("Answer:")[-1]
47
 
48
- text = text.split("\n")[0]
49
- text = text.strip()
50
 
51
- # Remove trailing punctuation
 
52
  text = re.sub(r"[\.]$", "", text)
53
 
54
  return text.strip()
55
 
 
 
 
56
  def ask_model(self, question: str):
57
- prompt = f"""
58
- You are solving a benchmark evaluation problem.
59
-
60
- Think step by step internally.
61
- But output ONLY the final answer.
62
- Do NOT explain.
63
- Do NOT add extra words.
64
 
65
- Question:
66
- {question}
67
 
68
- Final Answer:
69
- """
70
  output = self.generator(prompt)[0]["generated_text"]
71
  answer = output.replace(prompt, "")
 
72
  return self.clean_answer(answer)
73
 
 
 
 
74
  def __call__(self, question: str) -> str:
75
- print(f"Processing question: {question[:60]}...")
 
 
 
 
 
 
76
 
 
77
  math_result = self.try_math(question)
78
  if math_result:
79
  print("Used math tool.")
80
  return math_result
81
-
 
82
  answer = self.ask_model(question)
83
 
84
- if len(answer.split()) > 6:
85
- print("Retrying due to long answer...")
 
86
  answer = self.ask_model(question)
87
 
88
  print(f"Final Answer: {answer}")
89
  return answer
90
 
91
 
92
- def run_and_submit_all( profile: gr.OAuthProfile | None):
93
- """
94
- Fetches all questions, runs the BasicAgent on them, submits all answers,
95
- and displays the results.
96
- """
97
- # --- Determine HF Space Runtime URL and Repo URL ---
98
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
99
 
100
  if profile:
101
- username= f"{profile.username}"
102
  print(f"User logged in: {username}")
103
  else:
104
- print("User not logged in.")
105
  return "Please Login to Hugging Face with the button.", None
106
 
107
  api_url = DEFAULT_API_URL
108
  questions_url = f"{api_url}/questions"
109
  submit_url = f"{api_url}/submit"
110
 
111
- # 1. Instantiate Agent ( modify this part to create your agent)
112
  try:
113
  agent = BasicAgent()
114
  except Exception as e:
115
- print(f"Error instantiating agent: {e}")
116
  return f"Error initializing agent: {e}", None
117
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
118
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
119
- print(agent_code)
120
 
121
- # 2. Fetch Questions
122
- print(f"Fetching questions from: {questions_url}")
123
  try:
124
  response = requests.get(questions_url, timeout=15)
125
  response.raise_for_status()
126
  questions_data = response.json()
127
- if not questions_data:
128
- print("Fetched questions list is empty.")
129
- return "Fetched questions list is empty or invalid format.", None
130
- print(f"Fetched {len(questions_data)} questions.")
131
- except requests.exceptions.RequestException as e:
132
- print(f"Error fetching questions: {e}")
133
- return f"Error fetching questions: {e}", None
134
- except requests.exceptions.JSONDecodeError as e:
135
- print(f"Error decoding JSON response from questions endpoint: {e}")
136
- print(f"Response text: {response.text[:500]}")
137
- return f"Error decoding server response for questions: {e}", None
138
  except Exception as e:
139
- print(f"An unexpected error occurred fetching questions: {e}")
140
- return f"An unexpected error occurred fetching questions: {e}", None
141
 
142
- # 3. Run your Agent
143
  results_log = []
144
  answers_payload = []
145
- print(f"Running agent on {len(questions_data)} questions...")
146
  for item in questions_data:
147
  task_id = item.get("task_id")
148
  question_text = item.get("question")
 
149
  if not task_id or question_text is None:
150
- print(f"Skipping item with missing task_id or question: {item}")
151
  continue
 
152
  try:
153
  submitted_answer = agent(question_text)
154
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
155
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
156
  except Exception as e:
157
- print(f"Error running agent on task {task_id}: {e}")
158
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
159
 
160
  if not answers_payload:
161
- print("Agent did not produce any answers to submit.")
162
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
163
 
164
- # 4. Prepare Submission
165
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
166
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
167
- print(status_update)
 
168
 
169
- # 5. Submit
170
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
171
  try:
172
  response = requests.post(submit_url, json=submission_data, timeout=60)
173
  response.raise_for_status()
174
  result_data = response.json()
 
175
  final_status = (
176
  f"Submission Successful!\n"
177
  f"User: {result_data.get('username')}\n"
178
  f"Overall Score: {result_data.get('score', 'N/A')}% "
179
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
180
- f"Message: {result_data.get('message', 'No message received.')}"
 
181
  )
182
- print("Submission successful.")
183
- results_df = pd.DataFrame(results_log)
184
- return final_status, results_df
185
- except requests.exceptions.HTTPError as e:
186
- error_detail = f"Server responded with status {e.response.status_code}."
187
- try:
188
- error_json = e.response.json()
189
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
190
- except requests.exceptions.JSONDecodeError:
191
- error_detail += f" Response: {e.response.text[:500]}"
192
- status_message = f"Submission Failed: {error_detail}"
193
- print(status_message)
194
- results_df = pd.DataFrame(results_log)
195
- return status_message, results_df
196
- except requests.exceptions.Timeout:
197
- status_message = "Submission Failed: The request timed out."
198
- print(status_message)
199
- results_df = pd.DataFrame(results_log)
200
- return status_message, results_df
201
- except requests.exceptions.RequestException as e:
202
- status_message = f"Submission Failed: Network error - {e}"
203
- print(status_message)
204
- results_df = pd.DataFrame(results_log)
205
- return status_message, results_df
206
  except Exception as e:
207
- status_message = f"An unexpected error occurred during submission: {e}"
208
- print(status_message)
209
- results_df = pd.DataFrame(results_log)
210
- return status_message, results_df
211
 
212
 
213
- # --- Build Gradio Interface using Blocks ---
214
  with gr.Blocks() as demo:
215
  gr.Markdown("# Basic Agent Evaluation Runner")
216
- gr.Markdown(
217
- """
218
- **Instructions:**
219
-
220
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
221
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
222
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
223
-
224
- ---
225
- **Disclaimers:**
226
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
227
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
228
- """
229
- )
230
 
231
  gr.LoginButton()
232
 
233
  run_button = gr.Button("Run Evaluation & Submit All Answers")
234
 
235
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
236
- # Removed max_rows=10 from DataFrame constructor
237
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
 
 
238
 
239
  run_button.click(
240
  fn=run_and_submit_all,
241
- outputs=[status_output, results_table]
242
  )
243
 
244
  if __name__ == "__main__":
245
- print("\n" + "-"*30 + " App Starting " + "-"*30)
246
- # Check for SPACE_HOST and SPACE_ID at startup for information
247
- space_host_startup = os.getenv("SPACE_HOST")
248
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
249
-
250
- if space_host_startup:
251
- print(f"✅ SPACE_HOST found: {space_host_startup}")
252
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
253
- else:
254
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
255
-
256
- if space_id_startup: # Print repo URLs if SPACE_ID is found
257
- print(f"✅ SPACE_ID found: {space_id_startup}")
258
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
259
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
260
- else:
261
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
262
-
263
- print("-"*(60 + len(" App Starting ")) + "\n")
264
-
265
- print("Launching Gradio Interface for Basic Agent Evaluation...")
266
- demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
  from transformers import pipeline
6
  import re
 
7
 
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+ # -----------------------------------------
12
+ # BASIC AGENT
13
+ # -----------------------------------------
14
 
15
  class BasicAgent:
16
  def __init__(self):
17
+ print("Loading lightweight GAIA agent model...")
18
+
19
+ # Lightweight model for HF CPU Spaces (stable)
20
  self.generator = pipeline(
21
  "text-generation",
22
+ model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
23
+ max_new_tokens=32,
24
  do_sample=False,
25
+ temperature=0.0,
26
  )
 
27
 
28
+ print("Model loaded successfully.")
29
+
30
+ # -------------------------
31
+ # TOOL 1: Reverse text
32
+ # -------------------------
33
+ def try_reverse(self, question: str):
34
+ q = question.strip()
35
+
36
+ # Only reverse if clearly reversed (starts with dot)
37
+ if q.startswith("."):
38
+ return q[::-1]
39
+
40
+ return None
41
+
42
+ # -------------------------
43
+ # TOOL 2: Safe arithmetic
44
+ # -------------------------
45
  def try_math(self, question: str):
46
  try:
47
+ pattern = r"\d+\.?\d*\s*[\+\-\*\/]\s*\d+\.?\d*"
48
+ match = re.search(pattern, question)
49
+ if match:
50
+ expression = match.group()
51
+ result = eval(expression)
52
+
53
+ if float(result).is_integer():
54
+ return str(int(result))
55
  return str(result)
56
  except:
57
  pass
58
+
59
  return None
60
 
61
+ # -------------------------
62
+ # STRICT CLEANING (Exact Match)
63
+ # -------------------------
64
  def clean_answer(self, text: str) -> str:
65
  text = text.strip()
66
 
67
  if "Answer:" in text:
68
  text = text.split("Answer:")[-1]
69
 
70
+ text = text.split("\n")[0].strip()
 
71
 
72
+ # Remove quotes and trailing punctuation
73
+ text = text.strip('"').strip("'")
74
  text = re.sub(r"[\.]$", "", text)
75
 
76
  return text.strip()
77
 
78
+ # -------------------------
79
+ # MODEL CALL
80
+ # -------------------------
81
  def ask_model(self, question: str):
82
+ prompt = f"""You are answering a benchmark question.
83
+ Return ONLY the exact final answer.
84
+ No explanation.
85
+ No extra words.
86
+ If number return number only.
87
+ If word → return word only.
 
88
 
89
+ Question: {question}
90
+ Answer:"""
91
 
 
 
92
  output = self.generator(prompt)[0]["generated_text"]
93
  answer = output.replace(prompt, "")
94
+
95
  return self.clean_answer(answer)
96
 
97
+ # -------------------------
98
+ # MAIN LOGIC
99
+ # -------------------------
100
  def __call__(self, question: str) -> str:
101
+ print(f"Processing: {question[:60]}...")
102
+
103
+ # 1️⃣ Reverse tool
104
+ reversed_q = self.try_reverse(question)
105
+ if reversed_q:
106
+ print("Used reverse tool.")
107
+ return self.ask_model(reversed_q)
108
 
109
+ # 2️⃣ Math tool
110
  math_result = self.try_math(question)
111
  if math_result:
112
  print("Used math tool.")
113
  return math_result
114
+
115
+ # 3️⃣ LLM reasoning
116
  answer = self.ask_model(question)
117
 
118
+ # Retry once if output too long
119
+ if len(answer.split()) > 5:
120
+ print("Retrying for shorter answer...")
121
  answer = self.ask_model(question)
122
 
123
  print(f"Final Answer: {answer}")
124
  return answer
125
 
126
 
127
+ # -----------------------------------------
128
+ # RUN + SUBMIT FUNCTION
129
+ # -----------------------------------------
130
+
131
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
132
+
133
+ space_id = os.getenv("SPACE_ID")
134
 
135
  if profile:
136
+ username = profile.username
137
  print(f"User logged in: {username}")
138
  else:
 
139
  return "Please Login to Hugging Face with the button.", None
140
 
141
  api_url = DEFAULT_API_URL
142
  questions_url = f"{api_url}/questions"
143
  submit_url = f"{api_url}/submit"
144
 
145
+ # Instantiate agent
146
  try:
147
  agent = BasicAgent()
148
  except Exception as e:
 
149
  return f"Error initializing agent: {e}", None
150
+
151
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
152
 
153
+ # Fetch Questions
 
154
  try:
155
  response = requests.get(questions_url, timeout=15)
156
  response.raise_for_status()
157
  questions_data = response.json()
 
 
 
 
 
 
 
 
 
 
 
158
  except Exception as e:
159
+ return f"Error fetching questions: {e}", None
 
160
 
 
161
  results_log = []
162
  answers_payload = []
163
+
164
  for item in questions_data:
165
  task_id = item.get("task_id")
166
  question_text = item.get("question")
167
+
168
  if not task_id or question_text is None:
 
169
  continue
170
+
171
  try:
172
  submitted_answer = agent(question_text)
173
+ answers_payload.append(
174
+ {"task_id": task_id, "submitted_answer": submitted_answer}
175
+ )
176
+
177
+ results_log.append(
178
+ {
179
+ "Task ID": task_id,
180
+ "Question": question_text,
181
+ "Submitted Answer": submitted_answer,
182
+ }
183
+ )
184
+
185
  except Exception as e:
186
+ results_log.append(
187
+ {
188
+ "Task ID": task_id,
189
+ "Question": question_text,
190
+ "Submitted Answer": f"AGENT ERROR: {e}",
191
+ }
192
+ )
193
 
194
  if not answers_payload:
195
+ return "Agent did not produce any answers.", pd.DataFrame(results_log)
 
196
 
197
+ submission_data = {
198
+ "username": username.strip(),
199
+ "agent_code": agent_code,
200
+ "answers": answers_payload,
201
+ }
202
 
203
+ # Submit
 
204
  try:
205
  response = requests.post(submit_url, json=submission_data, timeout=60)
206
  response.raise_for_status()
207
  result_data = response.json()
208
+
209
  final_status = (
210
  f"Submission Successful!\n"
211
  f"User: {result_data.get('username')}\n"
212
  f"Overall Score: {result_data.get('score', 'N/A')}% "
213
+ f"({result_data.get('correct_count', '?')}/"
214
+ f"{result_data.get('total_attempted', '?')} correct)\n"
215
+ f"Message: {result_data.get('message', '')}"
216
  )
217
+
218
+ return final_status, pd.DataFrame(results_log)
219
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  except Exception as e:
221
+ return f"Submission Failed: {e}", pd.DataFrame(results_log)
222
+
 
 
223
 
224
 
 
225
  with gr.Blocks() as demo:
226
  gr.Markdown("# Basic Agent Evaluation Runner")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  gr.LoginButton()
229
 
230
  run_button = gr.Button("Run Evaluation & Submit All Answers")
231
 
232
+ status_output = gr.Textbox(
233
+ label="Run Status / Submission Result", lines=5, interactive=False
234
+ )
235
+
236
+ results_table = gr.DataFrame(
237
+ label="Questions and Agent Answers", wrap=True
238
+ )
239
 
240
  run_button.click(
241
  fn=run_and_submit_all,
242
+ outputs=[status_output, results_table],
243
  )
244
 
245
  if __name__ == "__main__":
246
+ demo.launch(debug=True, share=False)