lethaq commited on
Commit
bf4b516
·
verified ·
1 Parent(s): 82fad89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -52
app.py CHANGED
@@ -1,11 +1,14 @@
1
- """Simple Agent Evaluation Runner"""
2
  import os
3
  import re
 
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
  import google.generativeai as genai
8
  from dotenv import load_dotenv
 
 
9
 
10
  # Load environment variables
11
  load_dotenv()
@@ -16,66 +19,237 @@ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
  # Constants
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
- class SimpleAgent:
20
- """A simple agent using Google Gemini."""
21
 
22
  def __init__(self):
23
- print("SimpleAgent initialized.")
24
- self.model = genai.GenerativeModel('gemini-1.5-flash')
 
 
 
 
25
 
26
- def __call__(self, question: str) -> str:
27
- """Process a question and return an answer."""
28
- print(f"Agent received question (first 50 chars): {question[:50]}...")
29
 
30
- # Simple system prompt
31
- system_prompt = """You are a helpful assistant. Answer questions as accurately as possible.
32
-
33
- IMPORTANT: Your final answer should be:
34
- - A number (without commas, $ signs, or % signs unless specifically requested)
35
- - A few words as possible
36
- - A comma-separated list if multiple items are requested
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- Always end your response with: FINAL ANSWER: [your answer]
 
 
 
 
 
39
 
40
- Examples:
41
- - For "How many albums did X release?" → FINAL ANSWER: 5
42
- - For "What city is the capital?" → FINAL ANSWER: Paris
43
- - For "List the top 3 countries" → FINAL ANSWER: USA, China, Japan
44
  """
45
 
46
- # Combine system prompt with question
47
- full_prompt = f"{system_prompt}\n\nQuestion: {question}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- try:
50
- # Generate response using Gemini
51
- response = self.model.generate_content(full_prompt)
52
- answer = response.text
53
-
54
- # Extract final answer if it exists
55
- final_answer_match = re.search(r'FINAL ANSWER:\s*(.+?)(?:\n|$)', answer, re.IGNORECASE)
56
- if final_answer_match:
57
- final_answer = final_answer_match.group(1).strip()
58
- return final_answer
59
- else:
60
- # If no "FINAL ANSWER:" format, try to extract a simple answer
61
- # Look for numbers, short phrases, or lists
62
- lines = answer.strip().split('\n')
63
- for line in reversed(lines): # Start from the end
64
- line = line.strip()
65
- if line and not line.startswith('FINAL'):
66
- # Simple heuristic: if it's short, likely an answer
67
- if len(line) < 100:
68
- return line
69
 
70
- return answer.strip()[:100] # Fallback to first 100 chars
 
 
 
 
 
 
 
71
 
72
- except Exception as e:
73
- print(f"Error calling Gemini API: {e}")
74
- return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def run_and_submit_all(profile: gr.OAuthProfile | None):
77
  """
78
- Fetches all questions, runs the SimpleAgent on them, submits all answers,
79
  and displays the results.
80
  """
81
  # Check if user is logged in
@@ -96,7 +270,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
96
 
97
  # 1. Initialize Agent
98
  try:
99
- agent = SimpleAgent()
100
  except Exception as e:
101
  print(f"Error initializing agent: {e}")
102
  return f"Error initializing agent: {e}", None
@@ -149,6 +323,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
149
  "Submitted Answer": submitted_answer
150
  })
151
 
 
 
 
152
  except Exception as e:
153
  error_msg = f"ERROR: {str(e)}"
154
  print(f"Error processing task {task_id}: {e}")
@@ -196,15 +373,20 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
196
  return error_msg, results_df
197
 
198
  # Build Gradio Interface
199
- with gr.Blocks(title="Simple Agent Evaluation") as demo:
200
- gr.Markdown("# Simple Agent Evaluation Runner")
201
  gr.Markdown("""
202
  **Instructions:**
203
  1. Make sure you have set up your `GOOGLE_API_KEY` in the environment variables
204
  2. Log in to your Hugging Face account using the button below
205
  3. Click 'Run Evaluation & Submit All Answers' to start the evaluation
206
 
207
- **Note:** This is a simplified agent that uses Google Gemini to answer questions.
 
 
 
 
 
208
  """)
209
 
210
  gr.LoginButton()
@@ -229,7 +411,7 @@ with gr.Blocks(title="Simple Agent Evaluation") as demo:
229
 
230
  if __name__ == "__main__":
231
  print("=" * 50)
232
- print("🚀 Starting Simple Agent Evaluation Runner")
233
  print("=" * 50)
234
 
235
  # Check environment variables
 
1
+ """Enhanced Agent Evaluation Runner with improved capabilities"""
2
  import os
3
  import re
4
+ import time
5
  import gradio as gr
6
  import requests
7
  import pandas as pd
8
  import google.generativeai as genai
9
  from dotenv import load_dotenv
10
+ from urllib.parse import urlparse, parse_qs
11
+ import json
12
 
13
  # Load environment variables
14
  load_dotenv()
 
19
  # Constants
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
+ class EnhancedAgent:
23
+ """An enhanced agent using Google Gemini with improved capabilities."""
24
 
25
  def __init__(self):
26
+ print("EnhancedAgent initialized.")
27
+ # Use gemini-1.5-pro for better performance, fallback to flash
28
+ try:
29
+ self.model = genai.GenerativeModel('gemini-1.5-pro')
30
+ except:
31
+ self.model = genai.GenerativeModel('gemini-1.5-flash')
32
 
33
+ # Rate limiting
34
+ self.last_request_time = 0
35
+ self.min_request_interval = 1.0 # 1 second between requests
36
 
37
+ def _rate_limit(self):
38
+ """Simple rate limiting to avoid quota issues."""
39
+ current_time = time.time()
40
+ time_since_last = current_time - self.last_request_time
41
+ if time_since_last < self.min_request_interval:
42
+ time.sleep(self.min_request_interval - time_since_last)
43
+ self.last_request_time = time.time()
44
+
45
+ def _extract_youtube_info(self, question: str) -> str:
46
+ """Extract information about YouTube videos mentioned in questions."""
47
+ youtube_patterns = [
48
+ r'youtube\.com/watch\?v=([a-zA-Z0-9_-]+)',
49
+ r'youtu\.be/([a-zA-Z0-9_-]+)'
50
+ ]
51
+
52
+ for pattern in youtube_patterns:
53
+ match = re.search(pattern, question)
54
+ if match:
55
+ video_id = match.group(1)
56
+ return f"YouTube video ID: {video_id}. Note: Cannot access video content directly, but can make educated guesses based on context."
57
+ return ""
58
+
59
+ def _analyze_question_type(self, question: str) -> str:
60
+ """Analyze the type of question and provide specific guidance."""
61
+ question_lower = question.lower()
62
+
63
+ # Different question types and their handling strategies
64
+ if any(word in question_lower for word in ['youtube', 'video', 'watch']):
65
+ return "VIDEO_ANALYSIS"
66
+ elif any(word in question_lower for word in ['excel', 'spreadsheet', 'file', 'csv']):
67
+ return "FILE_ANALYSIS"
68
+ elif any(word in question_lower for word in ['how many', 'count', 'number of']):
69
+ return "COUNTING"
70
+ elif any(word in question_lower for word in ['who', 'what', 'where', 'when']):
71
+ return "FACTUAL"
72
+ elif any(word in question_lower for word in ['calculate', 'compute', 'math']):
73
+ return "CALCULATION"
74
+ elif any(word in question_lower for word in ['list', 'name', 'identify']):
75
+ return "LIST"
76
+ else:
77
+ return "GENERAL"
78
+
79
+ def _get_enhanced_prompt(self, question: str, question_type: str) -> str:
80
+ """Generate an enhanced system prompt based on question type."""
81
+
82
+ base_prompt = """You are an expert assistant with broad knowledge across many domains including:
83
+ - Music, entertainment, and media
84
+ - Sports statistics and history
85
+ - Science and mathematics
86
+ - Geography and world facts
87
+ - Technology and computing
88
+ - Literature and culture
89
 
90
+ CRITICAL INSTRUCTIONS:
91
+ 1. Always provide your best educated guess even if you're not 100% certain
92
+ 2. For numerical answers, provide ONLY the number (no commas, currency symbols, or units unless specified)
93
+ 3. For names/words, provide the exact spelling
94
+ 4. For lists, use comma-separated format
95
+ 5. End with: FINAL ANSWER: [your concise answer]
96
 
 
 
 
 
97
  """
98
 
99
+ if question_type == "VIDEO_ANALYSIS":
100
+ base_prompt += """
101
+ For video-related questions:
102
+ - If you cannot access the video content, make educated guesses based on:
103
+ - Video title/URL context
104
+ - Common knowledge about the topic
105
+ - Typical content patterns
106
+ - Provide your best estimate rather than saying "cannot access"
107
+ """
108
+ elif question_type == "FILE_ANALYSIS":
109
+ base_prompt += """
110
+ For file-related questions:
111
+ - If you cannot access files directly, make reasonable assumptions
112
+ - Use general knowledge about typical data in such contexts
113
+ - Provide educated estimates based on the question context
114
+ """
115
+ elif question_type == "COUNTING":
116
+ base_prompt += """
117
+ For counting questions:
118
+ - Provide specific numbers when possible
119
+ - If exact count unknown, provide reasonable estimates
120
+ - Consider historical data and typical ranges
121
+ """
122
+ elif question_type == "FACTUAL":
123
+ base_prompt += """
124
+ For factual questions:
125
+ - Use your knowledge base to provide accurate information
126
+ - If multiple possibilities exist, choose the most likely one
127
+ - Be specific with names, dates, and details
128
+ """
129
 
130
+ return base_prompt
131
+
132
+ def _make_api_call_with_retry(self, prompt: str, max_retries: int = 3) -> str:
133
+ """Make API call with retry logic and error handling."""
134
+
135
+ for attempt in range(max_retries):
136
+ try:
137
+ self._rate_limit() # Apply rate limiting
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ # Generate response using Gemini
140
+ response = self.model.generate_content(
141
+ prompt,
142
+ generation_config=genai.types.GenerationConfig(
143
+ temperature=0.1, # Lower temperature for more consistent answers
144
+ max_output_tokens=1000,
145
+ )
146
+ )
147
 
148
+ if response.text:
149
+ return response.text
150
+ else:
151
+ raise Exception("Empty response from API")
152
+
153
+ except Exception as e:
154
+ error_msg = str(e).lower()
155
+
156
+ if "quota" in error_msg or "429" in error_msg:
157
+ if attempt < max_retries - 1:
158
+ wait_time = (2 ** attempt) * 5 # Exponential backoff
159
+ print(f"Quota exceeded, waiting {wait_time} seconds...")
160
+ time.sleep(wait_time)
161
+ continue
162
+ else:
163
+ return "Error: API quota exceeded"
164
+ elif "safety" in error_msg:
165
+ return "Error: Content safety filter triggered"
166
+ else:
167
+ if attempt < max_retries - 1:
168
+ time.sleep(2) # Wait before retry
169
+ continue
170
+ else:
171
+ return f"Error: {str(e)}"
172
+
173
+ return "Error: Max retries exceeded"
174
+
175
+ def __call__(self, question: str) -> str:
176
+ """Process a question and return an answer."""
177
+ print(f"Agent processing: {question[:100]}...")
178
+
179
+ # Analyze question type
180
+ question_type = self._analyze_question_type(question)
181
+ print(f"Question type identified: {question_type}")
182
+
183
+ # Extract additional context
184
+ youtube_info = self._extract_youtube_info(question)
185
+
186
+ # Build enhanced prompt
187
+ system_prompt = self._get_enhanced_prompt(question, question_type)
188
+
189
+ # Add context if available
190
+ context = ""
191
+ if youtube_info:
192
+ context += f"\nContext: {youtube_info}\n"
193
+
194
+ # Combine everything
195
+ full_prompt = f"{system_prompt}\n{context}\nQuestion: {question}\n\nProvide your best answer:"
196
+
197
+ # Make API call with retry
198
+ response = self._make_api_call_with_retry(full_prompt)
199
+
200
+ # Extract final answer
201
+ return self._extract_final_answer(response, question_type)
202
+
203
+ def _extract_final_answer(self, response: str, question_type: str) -> str:
204
+ """Extract the final answer from the response."""
205
+ if response.startswith("Error:"):
206
+ return response
207
+
208
+ # Look for FINAL ANSWER: pattern
209
+ final_answer_match = re.search(r'FINAL ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
210
+ if final_answer_match:
211
+ answer = final_answer_match.group(1).strip()
212
+ return self._clean_answer(answer, question_type)
213
+
214
+ # Fallback: extract from end of response
215
+ lines = response.strip().split('\n')
216
+ for line in reversed(lines):
217
+ line = line.strip()
218
+ if line and len(line) < 200: # Reasonable answer length
219
+ return self._clean_answer(line, question_type)
220
+
221
+ # Last resort: return first part of response
222
+ return self._clean_answer(response[:100], question_type)
223
+
224
+ def _clean_answer(self, answer: str, question_type: str) -> str:
225
+ """Clean and format the final answer."""
226
+ answer = answer.strip()
227
+
228
+ # Remove common prefixes
229
+ prefixes_to_remove = [
230
+ "the answer is", "answer:", "final answer:",
231
+ "result:", "solution:", "therefore",
232
+ "in conclusion", "to summarize"
233
+ ]
234
+
235
+ for prefix in prefixes_to_remove:
236
+ if answer.lower().startswith(prefix):
237
+ answer = answer[len(prefix):].strip()
238
+
239
+ # Clean punctuation from the end
240
+ answer = answer.rstrip('.,;:!')
241
+
242
+ # For counting questions, ensure we return just the number
243
+ if question_type == "COUNTING":
244
+ number_match = re.search(r'\b(\d+(?:,\d{3})*(?:\.\d+)?)\b', answer)
245
+ if number_match:
246
+ return number_match.group(1).replace(',', '')
247
+
248
+ return answer
249
 
250
  def run_and_submit_all(profile: gr.OAuthProfile | None):
251
  """
252
+ Fetches all questions, runs the EnhancedAgent on them, submits all answers,
253
  and displays the results.
254
  """
255
  # Check if user is logged in
 
270
 
271
  # 1. Initialize Agent
272
  try:
273
+ agent = EnhancedAgent()
274
  except Exception as e:
275
  print(f"Error initializing agent: {e}")
276
  return f"Error initializing agent: {e}", None
 
323
  "Submitted Answer": submitted_answer
324
  })
325
 
326
+ # Small delay between questions to avoid rate limiting
327
+ time.sleep(0.5)
328
+
329
  except Exception as e:
330
  error_msg = f"ERROR: {str(e)}"
331
  print(f"Error processing task {task_id}: {e}")
 
373
  return error_msg, results_df
374
 
375
  # Build Gradio Interface
376
+ with gr.Blocks(title="Enhanced Agent Evaluation") as demo:
377
+ gr.Markdown("# Enhanced Agent Evaluation Runner")
378
  gr.Markdown("""
379
  **Instructions:**
380
  1. Make sure you have set up your `GOOGLE_API_KEY` in the environment variables
381
  2. Log in to your Hugging Face account using the button below
382
  3. Click 'Run Evaluation & Submit All Answers' to start the evaluation
383
 
384
+ **Enhanced Features:**
385
+ - Improved question analysis and categorization
386
+ - Better handling of different question types
387
+ - Rate limiting to avoid API quota issues
388
+ - Retry logic for failed requests
389
+ - Enhanced prompting for better accuracy
390
  """)
391
 
392
  gr.LoginButton()
 
411
 
412
  if __name__ == "__main__":
413
  print("=" * 50)
414
+ print("🚀 Starting Enhanced Agent Evaluation Runner")
415
  print("=" * 50)
416
 
417
  # Check environment variables