lethaq commited on
Commit
475d553
·
verified ·
1 Parent(s): 567d169

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -130
app.py CHANGED
@@ -1,65 +1,105 @@
1
- """ Basic Agent Evaluation Runner"""
2
  import os
3
- import inspect
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
- from langchain_core.messages import HumanMessage
8
- from agent import build_graph
9
 
 
 
10
 
 
 
11
 
12
- # (Keep Constants as is)
13
- # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
- # --- Basic Agent Definition ---
17
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
18
-
19
-
20
- class BasicAgent:
21
- """A langgraph agent."""
22
  def __init__(self):
23
- print("BasicAgent initialized.")
24
- self.graph = build_graph()
25
-
26
  def __call__(self, question: str) -> str:
 
27
  print(f"Agent received question (first 50 chars): {question[:50]}...")
28
- # Wrap the question in a HumanMessage from langchain_core
29
- messages = [HumanMessage(content=question)]
30
- messages = self.graph.invoke({"messages": messages})
31
- answer = messages['messages'][-1].content
32
- return answer[14:]
33
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- def run_and_submit_all( profile: gr.OAuthProfile | None):
36
  """
37
- Fetches all questions, runs the BasicAgent on them, submits all answers,
38
  and displays the results.
39
  """
40
- # --- Determine HF Space Runtime URL and Repo URL ---
41
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
42
-
43
  if profile:
44
- username= f"{profile.username}"
45
  print(f"User logged in: {username}")
46
  else:
47
  print("User not logged in.")
48
  return "Please Login to Hugging Face with the button.", None
49
 
 
 
 
 
50
  api_url = DEFAULT_API_URL
51
  questions_url = f"{api_url}/questions"
52
  submit_url = f"{api_url}/submit"
53
 
54
- # 1. Instantiate Agent ( modify this part to create your agent)
55
  try:
56
- agent = BasicAgent()
57
  except Exception as e:
58
- print(f"Error instantiating agent: {e}")
59
  return f"Error initializing agent: {e}", None
60
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
61
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
62
- print(agent_code)
63
 
64
  # 2. Fetch Questions
65
  print(f"Fetching questions from: {questions_url}")
@@ -67,115 +107,120 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
67
  response = requests.get(questions_url, timeout=15)
68
  response.raise_for_status()
69
  questions_data = response.json()
 
70
  if not questions_data:
71
- print("Fetched questions list is empty.")
72
- return "Fetched questions list is empty or invalid format.", None
73
  print(f"Fetched {len(questions_data)} questions.")
74
- except requests.exceptions.RequestException as e:
 
75
  print(f"Error fetching questions: {e}")
76
  return f"Error fetching questions: {e}", None
77
- except requests.exceptions.JSONDecodeError as e:
78
- print(f"Error decoding JSON response from questions endpoint: {e}")
79
- print(f"Response text: {response.text[:500]}")
80
- return f"Error decoding server response for questions: {e}", None
81
- except Exception as e:
82
- print(f"An unexpected error occurred fetching questions: {e}")
83
- return f"An unexpected error occurred fetching questions: {e}", None
84
 
85
- # 3. Run your Agent
86
  results_log = []
87
  answers_payload = []
88
- print(f"Running agent on {len(questions_data)} questions...")
89
- for item in questions_data:
 
 
90
  task_id = item.get("task_id")
91
  question_text = item.get("question")
 
92
  if not task_id or question_text is None:
93
- print(f"Skipping item with missing task_id or question: {item}")
94
  continue
 
 
 
95
  try:
 
96
  submitted_answer = agent(question_text)
97
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
98
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
 
99
  except Exception as e:
100
- print(f"Error running agent on task {task_id}: {e}")
101
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
102
 
103
  if not answers_payload:
104
- print("Agent did not produce any answers to submit.")
105
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
106
 
107
- # 4. Prepare Submission
108
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
109
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
110
- print(status_update)
111
-
112
- # 5. Submit
113
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
114
  try:
115
  response = requests.post(submit_url, json=submission_data, timeout=60)
116
  response.raise_for_status()
117
  result_data = response.json()
 
 
118
  final_status = (
119
- f"Submission Successful!\n"
120
  f"User: {result_data.get('username')}\n"
121
- f"Overall Score: {result_data.get('score', 'N/A')}% "
122
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
123
- f"Message: {result_data.get('message', 'No message received.')}"
124
  )
125
- print("Submission successful.")
 
126
  results_df = pd.DataFrame(results_log)
127
  return final_status, results_df
128
- except requests.exceptions.HTTPError as e:
129
- error_detail = f"Server responded with status {e.response.status_code}."
130
- try:
131
- error_json = e.response.json()
132
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
133
- except requests.exceptions.JSONDecodeError:
134
- error_detail += f" Response: {e.response.text[:500]}"
135
- status_message = f"Submission Failed: {error_detail}"
136
- print(status_message)
137
- results_df = pd.DataFrame(results_log)
138
- return status_message, results_df
139
- except requests.exceptions.Timeout:
140
- status_message = "Submission Failed: The request timed out."
141
- print(status_message)
142
- results_df = pd.DataFrame(results_log)
143
- return status_message, results_df
144
- except requests.exceptions.RequestException as e:
145
- status_message = f"Submission Failed: Network error - {e}"
146
- print(status_message)
147
- results_df = pd.DataFrame(results_log)
148
- return status_message, results_df
149
  except Exception as e:
150
- status_message = f"An unexpected error occurred during submission: {e}"
151
- print(status_message)
152
  results_df = pd.DataFrame(results_log)
153
- return status_message, results_df
154
-
155
-
156
- # --- Build Gradio Interface using Blocks ---
157
- with gr.Blocks() as demo:
158
- gr.Markdown("# Basic Agent Evaluation Runner")
159
- gr.Markdown(
160
- """
161
- **Instructions:**
162
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
163
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
164
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
165
- ---
166
- **Disclaimers:**
167
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
168
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
169
- """
170
- )
171
 
172
  gr.LoginButton()
173
-
174
- run_button = gr.Button("Run Evaluation & Submit All Answers")
175
-
176
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
177
- # Removed max_rows=10 from DataFrame constructor
178
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
 
 
 
 
 
179
 
180
  run_button.click(
181
  fn=run_and_submit_all,
@@ -183,26 +228,28 @@ with gr.Blocks() as demo:
183
  )
184
 
185
  if __name__ == "__main__":
186
- print("\n" + "-"*30 + " App Starting " + "-"*30)
187
- # Check for SPACE_HOST and SPACE_ID at startup for information
188
- space_host_startup = os.getenv("SPACE_HOST")
189
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
190
-
191
- if space_host_startup:
192
- print(f" SPACE_HOST found: {space_host_startup}")
193
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
194
- else:
195
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
196
-
197
- if space_id_startup: # Print repo URLs if SPACE_ID is found
198
- print(f"✅ SPACE_ID found: {space_id_startup}")
199
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
200
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
201
  else:
202
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
203
-
204
- print("-"*(60 + len(" App Starting ")) + "\n")
205
-
206
- print("Launching Gradio Interface for Basic Agent Evaluation...")
 
 
 
 
 
 
 
 
 
207
  demo.launch(debug=True, share=False)
208
 
 
1
+ """Simple Agent Evaluation Runner"""
2
  import os
3
+ import re
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ import google.generativeai as genai
8
+ from dotenv import load_dotenv
9
 
10
+ # Load environment variables
11
+ load_dotenv()
12
 
13
+ # Configure Gemini
14
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
15
 
16
+ # Constants
 
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+ class SimpleAgent:
20
+ """A simple agent using Google Gemini."""
21
+
 
 
 
22
  def __init__(self):
23
+ print("SimpleAgent initialized.")
24
+ self.model = genai.GenerativeModel('gemini-1.5-flash')
25
+
26
  def __call__(self, question: str) -> str:
27
+ """Process a question and return an answer."""
28
  print(f"Agent received question (first 50 chars): {question[:50]}...")
29
+
30
+ # Simple system prompt
31
+ system_prompt = """You are a helpful assistant. Answer questions as accurately as possible.
32
+
33
+ IMPORTANT: Your final answer should be:
34
+ - A number (without commas, $ signs, or % signs unless specifically requested)
35
+ - A few words as possible
36
+ - A comma-separated list if multiple items are requested
37
+
38
+ Always end your response with: FINAL ANSWER: [your answer]
39
+
40
+ Examples:
41
+ - For "How many albums did X release?" → FINAL ANSWER: 5
42
+ - For "What city is the capital?" → FINAL ANSWER: Paris
43
+ - For "List the top 3 countries" → FINAL ANSWER: USA, China, Japan
44
+ """
45
+
46
+ # Combine system prompt with question
47
+ full_prompt = f"{system_prompt}\n\nQuestion: {question}"
48
+
49
+ try:
50
+ # Generate response using Gemini
51
+ response = self.model.generate_content(full_prompt)
52
+ answer = response.text
53
+
54
+ # Extract final answer if it exists
55
+ final_answer_match = re.search(r'FINAL ANSWER:\s*(.+?)(?:\n|$)', answer, re.IGNORECASE)
56
+ if final_answer_match:
57
+ final_answer = final_answer_match.group(1).strip()
58
+ return final_answer
59
+ else:
60
+ # If no "FINAL ANSWER:" format, try to extract a simple answer
61
+ # Look for numbers, short phrases, or lists
62
+ lines = answer.strip().split('\n')
63
+ for line in reversed(lines): # Start from the end
64
+ line = line.strip()
65
+ if line and not line.startswith('FINAL'):
66
+ # Simple heuristic: if it's short, likely an answer
67
+ if len(line) < 100:
68
+ return line
69
+
70
+ return answer.strip()[:100] # Fallback to first 100 chars
71
+
72
+ except Exception as e:
73
+ print(f"Error calling Gemini API: {e}")
74
+ return f"Error: {str(e)}"
75
 
76
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
77
  """
78
+ Fetches all questions, runs the SimpleAgent on them, submits all answers,
79
  and displays the results.
80
  """
81
+ # Check if user is logged in
 
 
82
  if profile:
83
+ username = f"{profile.username}"
84
  print(f"User logged in: {username}")
85
  else:
86
  print("User not logged in.")
87
  return "Please Login to Hugging Face with the button.", None
88
 
89
+ # Get space info
90
+ space_id = os.getenv("SPACE_ID")
91
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
92
+
93
  api_url = DEFAULT_API_URL
94
  questions_url = f"{api_url}/questions"
95
  submit_url = f"{api_url}/submit"
96
 
97
+ # 1. Initialize Agent
98
  try:
99
+ agent = SimpleAgent()
100
  except Exception as e:
101
+ print(f"Error initializing agent: {e}")
102
  return f"Error initializing agent: {e}", None
 
 
 
103
 
104
  # 2. Fetch Questions
105
  print(f"Fetching questions from: {questions_url}")
 
107
  response = requests.get(questions_url, timeout=15)
108
  response.raise_for_status()
109
  questions_data = response.json()
110
+
111
  if not questions_data:
112
+ return "No questions received from server.", None
113
+
114
  print(f"Fetched {len(questions_data)} questions.")
115
+
116
+ except Exception as e:
117
  print(f"Error fetching questions: {e}")
118
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
119
 
120
+ # 3. Process Questions
121
  results_log = []
122
  answers_payload = []
123
+
124
+ print(f"Processing {len(questions_data)} questions...")
125
+
126
+ for i, item in enumerate(questions_data):
127
  task_id = item.get("task_id")
128
  question_text = item.get("question")
129
+
130
  if not task_id or question_text is None:
131
+ print(f"Skipping invalid item: {item}")
132
  continue
133
+
134
+ print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
135
+
136
  try:
137
+ # Get answer from agent
138
  submitted_answer = agent(question_text)
139
+
140
+ # Store results
141
+ answers_payload.append({
142
+ "task_id": task_id,
143
+ "submitted_answer": submitted_answer
144
+ })
145
+
146
+ results_log.append({
147
+ "Task ID": task_id,
148
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
149
+ "Submitted Answer": submitted_answer
150
+ })
151
+
152
  except Exception as e:
153
+ error_msg = f"ERROR: {str(e)}"
154
+ print(f"Error processing task {task_id}: {e}")
155
+
156
+ results_log.append({
157
+ "Task ID": task_id,
158
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
159
+ "Submitted Answer": error_msg
160
+ })
161
 
162
  if not answers_payload:
 
163
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
164
 
165
+ # 4. Submit Results
166
+ submission_data = {
167
+ "username": username.strip(),
168
+ "agent_code": agent_code,
169
+ "answers": answers_payload
170
+ }
171
+
172
+ print(f"Submitting {len(answers_payload)} answers...")
173
+
174
  try:
175
  response = requests.post(submit_url, json=submission_data, timeout=60)
176
  response.raise_for_status()
177
  result_data = response.json()
178
+
179
+ # Format success message
180
  final_status = (
181
+ f"Submission Successful!\n"
182
  f"User: {result_data.get('username')}\n"
183
+ f"Score: {result_data.get('score', 'N/A')}% "
184
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
185
+ f"Message: {result_data.get('message', 'No additional message.')}"
186
  )
187
+
188
+ print("Submission successful!")
189
  results_df = pd.DataFrame(results_log)
190
  return final_status, results_df
191
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  except Exception as e:
193
+ error_msg = f" Submission Failed: {str(e)}"
194
+ print(error_msg)
195
  results_df = pd.DataFrame(results_log)
196
+ return error_msg, results_df
197
+
198
+ # Build Gradio Interface
199
+ with gr.Blocks(title="Simple Agent Evaluation") as demo:
200
+ gr.Markdown("# Simple Agent Evaluation Runner")
201
+ gr.Markdown("""
202
+ **Instructions:**
203
+ 1. Make sure you have set up your `GOOGLE_API_KEY` in the environment variables
204
+ 2. Log in to your Hugging Face account using the button below
205
+ 3. Click 'Run Evaluation & Submit All Answers' to start the evaluation
206
+
207
+ **Note:** This is a simplified agent that uses Google Gemini to answer questions.
208
+ """)
 
 
 
 
 
209
 
210
  gr.LoginButton()
211
+
212
+ run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
213
+
214
+ status_output = gr.Textbox(
215
+ label="Status / Results",
216
+ lines=6,
217
+ interactive=False
218
+ )
219
+
220
+ results_table = gr.DataFrame(
221
+ label="Questions and Answers",
222
+ wrap=True
223
+ )
224
 
225
  run_button.click(
226
  fn=run_and_submit_all,
 
228
  )
229
 
230
  if __name__ == "__main__":
231
+ print("=" * 50)
232
+ print("🚀 Starting Simple Agent Evaluation Runner")
233
+ print("=" * 50)
234
+
235
+ # Check environment variables
236
+ if not os.getenv("GOOGLE_API_KEY"):
237
+ print("⚠️ WARNING: GOOGLE_API_KEY not found in environment variables!")
238
+ print(" Please set your Google API key to use Gemini.")
 
 
 
 
 
 
 
239
  else:
240
+ print(" GOOGLE_API_KEY found")
241
+
242
+ space_host = os.getenv("SPACE_HOST")
243
+ space_id = os.getenv("SPACE_ID")
244
+
245
+ if space_host:
246
+ print(f"✅ Running on Hugging Face Space")
247
+ print(f" URL: https://{space_host}.hf.space")
248
+
249
+ if space_id:
250
+ print(f"✅ Space ID: {space_id}")
251
+
252
+ print("=" * 50)
253
+
254
  demo.launch(debug=True, share=False)
255