ghanemfaouri commited on
Commit
26a1714
·
verified ·
1 Parent(s): 8205925

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -135
app.py CHANGED
@@ -5,83 +5,42 @@ import pandas as pd
5
  import re
6
  from openai import OpenAI
7
  from duckduckgo_search import DDGS
8
- import wikipediaapi
9
- from pytube import YouTube
10
- import whisper
11
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
  GAIA_SYSTEM_PROMPT = """
15
  You are an expert at solving GAIA benchmark questions. Follow these rules:
16
  1. Think step-by-step before answering
17
- 2. Format answers EXACTLY as required:
18
- - Numbers: Plain digits without commas/units
19
- - Strings: Minimal words, no articles/abbreviations
20
- - Lists: Comma-separated values without spaces
21
- - Multiple choice: Single uppercase letter
22
- 3. For calculations, show your work then box the final answer
23
- 4. When uncertain, search online for verification
24
- 5. ALWAYS end with: FINAL ANSWER: [Your Answer]
25
  """
26
 
27
- class EnhancedGaiaAgent:
28
  def __init__(self):
29
- print("Initializing Enhanced GAIA Agent")
30
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
31
- self.wiki = wikipediaapi.Wikipedia('en')
32
- self.model = whisper.load_model("base")
33
-
34
  self.answer_patterns = [
35
  r"FINAL ANSWER:\s*(.+)",
36
  r"Final Answer:\s*(.+)",
37
- r"\[ANSWER\]:\s*(.+)",
38
  r"Answer:\s*(.+)"
39
  ]
40
 
41
- def web_search(self, query: str, max_results: int = 3) -> str:
 
42
  try:
43
  with DDGS() as ddgs:
44
- results = [r for r in ddgs.text(query, max_results=max_results)]
45
  return "\n".join([f"{i+1}. {res['title']}: {res['body']}" for i, res in enumerate(results)])
46
  except Exception as e:
47
  print(f"Search error: {str(e)}")
48
  return ""
49
 
50
- def get_wikipedia(self, topic: str) -> str:
51
- try:
52
- page = self.wiki.page(topic)
53
- return page.summary[:2000] if page.exists() else ""
54
- except Exception:
55
- return ""
56
-
57
- def transcribe_audio(self, audio_path: str) -> str:
58
- try:
59
- result = self.model.transcribe(audio_path)
60
- return result["text"]
61
- except Exception as e:
62
- print(f"Transcription error: {str(e)}")
63
- return ""
64
-
65
- def extract_youtube_info(self, url: str) -> str:
66
- try:
67
- yt = YouTube(url)
68
- return f"Title: {yt.title}\nLength: {yt.length}s"
69
- except Exception:
70
- return ""
71
-
72
  def __call__(self, question: str) -> str:
 
73
  print(f"Processing: {question[:60]}...")
74
 
75
- if "youtube.com/watch" in question.lower():
76
- return self.handle_youtube_question(question)
77
- if "mp3" in question.lower() or "audio" in question.lower():
78
- return self.handle_audio_question(question)
79
- if "wikipedia" in question.lower():
80
- return self.handle_wikipedia_question(question)
81
-
82
- return self.handle_general_question(question)
83
-
84
- def handle_general_question(self, question: str) -> str:
85
  needs_search = any(word in question.lower() for word in
86
  ["current", "recent", "today", "latest", "who is", "what is"])
87
 
@@ -107,82 +66,22 @@ class EnhancedGaiaAgent:
107
  print(f"GPT error: {str(e)}")
108
  return "Error: Could not generate answer"
109
 
110
- def handle_youtube_question(self, question: str) -> str:
111
- try:
112
- url = re.search(r"(https?://[^\s]+)", question).group(1)
113
- video_info = self.extract_youtube_info(url)
114
-
115
- messages = [
116
- {"role": "system", "content": GAIA_SYSTEM_PROMPT},
117
- {"role": "system", "content": f"Video Info:\n{video_info}"},
118
- {"role": "user", "content": question}
119
- ]
120
-
121
- response = self.client.chat.completions.create(
122
- model="gpt-4-turbo",
123
- messages=messages,
124
- temperature=0.1
125
- )
126
- return self.extract_final_answer(response.choices[0].message.content)
127
- except Exception as e:
128
- print(f"YouTube processing error: {str(e)}")
129
- return "Error: Could not process video"
130
-
131
- def handle_audio_question(self, question: str) -> str:
132
- try:
133
- audio_path = "temp_audio.mp3" # Assume file is saved here
134
- transcript = self.transcribe_audio(audio_path)
135
-
136
- messages = [
137
- {"role": "system", "content": GAIA_SYSTEM_PROMPT},
138
- {"role": "system", "content": f"Transcript:\n{transcript}"},
139
- {"role": "user", "content": question}
140
- ]
141
-
142
- response = self.client.chat.completions.create(
143
- model="gpt-4-turbo",
144
- messages=messages,
145
- temperature=0.1
146
- )
147
- return self.extract_final_answer(response.choices[0].message.content)
148
- except Exception as e:
149
- print(f"Audio processing error: {str(e)}")
150
- return "Error: Could not process audio"
151
-
152
- def handle_wikipedia_question(self, question: str) -> str:
153
- try:
154
- topic_match = re.search(r"about (.*?)(?:that|which)", question, re.IGNORECASE)
155
- topic = topic_match.group(1) if topic_match else ""
156
- wiki_content = self.get_wikipedia(topic)
157
-
158
- messages = [
159
- {"role": "system", "content": GAIA_SYSTEM_PROMPT},
160
- {"role": "system", "content": f"Wikipedia Context:\n{wiki_content}"},
161
- {"role": "user", "content": question}
162
- ]
163
-
164
- response = self.client.chat.completions.create(
165
- model="gpt-4-turbo",
166
- messages=messages,
167
- temperature=0.1
168
- )
169
- return self.extract_final_answer(response.choices[0].message.content)
170
- except Exception as e:
171
- print(f"Wikipedia processing error: {str(e)}")
172
- return "Error: Could not process Wikipedia query"
173
-
174
  def extract_final_answer(self, response: str) -> str:
 
175
  for pattern in self.answer_patterns:
176
  match = re.search(pattern, response, re.IGNORECASE)
177
  if match:
178
  answer = match.group(1).strip()
179
- return re.sub(r"[^a-zA-Z0-9,. ]", "", answer)[:200]
 
 
180
 
 
181
  lines = response.strip().split('\n')
182
  return lines[-1].strip() if lines else "No answer found"
183
 
184
  def run_and_submit_all(profile: gr.OAuthProfile | None):
185
- """Original submission function with agent replaced"""
186
  space_id = os.getenv("SPACE_ID")
187
 
188
  if profile:
@@ -197,27 +96,25 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
197
  submit_url = f"{api_url}/submit"
198
 
199
  try:
200
- agent = EnhancedGaiaAgent() # Using our new agent
201
  except Exception as e:
202
  print(f"Error instantiating agent: {e}")
203
  return f"Error initializing agent: {e}", None
204
 
205
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
206
- print(agent_code)
207
 
208
  try:
209
  response = requests.get(questions_url, timeout=15)
210
  response.raise_for_status()
211
  questions_data = response.json()
212
  if not questions_data:
213
- return "Fetched questions list is empty or invalid format.", None
214
  print(f"Fetched {len(questions_data)} questions.")
215
  except Exception as e:
216
  return f"Error fetching questions: {e}", None
217
 
218
  results_log = []
219
  answers_payload = []
220
- print(f"Running agent on {len(questions_data)} questions...")
221
  for item in questions_data:
222
  task_id = item.get("task_id")
223
  question_text = item.get("question")
@@ -231,11 +128,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
231
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
232
 
233
  if not answers_payload:
234
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
235
 
236
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
237
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
238
- print(status_update)
 
 
239
 
240
  try:
241
  response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -243,10 +142,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
243
  result_data = response.json()
244
  final_status = (
245
  f"Submission Successful!\n"
246
- f"User: {result_data.get('username')}\n"
247
- f"Overall Score: {result_data.get('score', 'N/A')}% "
248
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
249
- f"Message: {result_data.get('message', 'No message received.')}"
250
  )
251
  return final_status, pd.DataFrame(results_log)
252
  except Exception as e:
@@ -254,12 +151,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
254
 
255
  with gr.Blocks() as demo:
256
  gr.Markdown("# GAIA Benchmark Agent")
257
- gr.Markdown("Optimized agent for GAIA benchmark certification")
258
 
259
  gr.LoginButton()
260
- run_button = gr.Button("Run Evaluation & Submit All Answers")
261
- status_output = gr.Textbox(label="Run Status", lines=5)
262
- results_table = gr.DataFrame(label="Results", wrap=True)
263
 
264
  run_button.click(
265
  fn=run_and_submit_all,
@@ -267,5 +164,5 @@ with gr.Blocks() as demo:
267
  )
268
 
269
  if __name__ == "__main__":
270
- print("\n" + "-"*30 + " GAIA Agent Starting " + "-"*30)
271
- demo.launch(debug=True, share=False)
 
5
  import re
6
  from openai import OpenAI
7
  from duckduckgo_search import DDGS
 
 
 
8
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
  GAIA_SYSTEM_PROMPT = """
12
  You are an expert at solving GAIA benchmark questions. Follow these rules:
13
  1. Think step-by-step before answering
14
+ 2. Format answers EXACTLY as required
15
+ 3. Use web search when needed
16
+ 4. ALWAYS end with: FINAL ANSWER: [Your Answer]
 
 
 
 
 
17
  """
18
 
19
+ class GaiaAgent:
20
  def __init__(self):
21
+ print("Initializing GAIA Agent")
22
  self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
 
 
23
  self.answer_patterns = [
24
  r"FINAL ANSWER:\s*(.+)",
25
  r"Final Answer:\s*(.+)",
 
26
  r"Answer:\s*(.+)"
27
  ]
28
 
29
+ def web_search(self, query: str) -> str:
30
+ """Simple web search implementation"""
31
  try:
32
  with DDGS() as ddgs:
33
+ results = [r for r in ddgs.text(query, max_results=3)]
34
  return "\n".join([f"{i+1}. {res['title']}: {res['body']}" for i, res in enumerate(results)])
35
  except Exception as e:
36
  print(f"Search error: {str(e)}")
37
  return ""
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def __call__(self, question: str) -> str:
40
+ """Handle question answering"""
41
  print(f"Processing: {question[:60]}...")
42
 
43
+ # Determine if we need web search
 
 
 
 
 
 
 
 
 
44
  needs_search = any(word in question.lower() for word in
45
  ["current", "recent", "today", "latest", "who is", "what is"])
46
 
 
66
  print(f"GPT error: {str(e)}")
67
  return "Error: Could not generate answer"
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def extract_final_answer(self, response: str) -> str:
70
+ """Extract the final answer from the response"""
71
  for pattern in self.answer_patterns:
72
  match = re.search(pattern, response, re.IGNORECASE)
73
  if match:
74
  answer = match.group(1).strip()
75
+ # Clean up the answer
76
+ answer = re.sub(r"[^a-zA-Z0-9,. ]", "", answer)
77
+ return answer[:200] # Limit length
78
 
79
+ # Fallback: return the last line
80
  lines = response.strip().split('\n')
81
  return lines[-1].strip() if lines else "No answer found"
82
 
83
  def run_and_submit_all(profile: gr.OAuthProfile | None):
84
+ """Handle the full submission process"""
85
  space_id = os.getenv("SPACE_ID")
86
 
87
  if profile:
 
96
  submit_url = f"{api_url}/submit"
97
 
98
  try:
99
+ agent = GaiaAgent()
100
  except Exception as e:
101
  print(f"Error instantiating agent: {e}")
102
  return f"Error initializing agent: {e}", None
103
 
104
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
105
 
106
  try:
107
  response = requests.get(questions_url, timeout=15)
108
  response.raise_for_status()
109
  questions_data = response.json()
110
  if not questions_data:
111
+ return "Fetched questions list is empty.", None
112
  print(f"Fetched {len(questions_data)} questions.")
113
  except Exception as e:
114
  return f"Error fetching questions: {e}", None
115
 
116
  results_log = []
117
  answers_payload = []
 
118
  for item in questions_data:
119
  task_id = item.get("task_id")
120
  question_text = item.get("question")
 
128
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
129
 
130
  if not answers_payload:
131
+ return "Agent did not produce any answers.", pd.DataFrame(results_log)
132
 
133
+ submission_data = {
134
+ "username": username.strip(),
135
+ "agent_code": agent_code,
136
+ "answers": answers_payload
137
+ }
138
 
139
  try:
140
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
142
  result_data = response.json()
143
  final_status = (
144
  f"Submission Successful!\n"
145
+ f"Score: {result_data.get('score', 'N/A')}% "
146
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)"
 
 
147
  )
148
  return final_status, pd.DataFrame(results_log)
149
  except Exception as e:
 
151
 
152
  with gr.Blocks() as demo:
153
  gr.Markdown("# GAIA Benchmark Agent")
154
+ gr.Markdown("Run the agent to answer GAIA benchmark questions")
155
 
156
  gr.LoginButton()
157
+ run_button = gr.Button("Run Evaluation")
158
+ status_output = gr.Textbox(label="Status", lines=3)
159
+ results_table = gr.DataFrame(label="Results")
160
 
161
  run_button.click(
162
  fn=run_and_submit_all,
 
164
  )
165
 
166
  if __name__ == "__main__":
167
+ print("Starting GAIA Agent...")
168
+ demo.launch()