Vinsmart06 commited on
Commit
b93e474
·
verified ·
1 Parent(s): 2bf5b9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -349
app.py CHANGED
@@ -98,382 +98,135 @@ def youtube_captions(self, url):
98
  from openai import OpenAI
99
 
100
  class BasicAgent:
101
-
102
-
103
- def read_audio(self, file):
104
-
105
- try:
106
- model = whisper.load_model("base")
107
-
108
- result = model.transcribe(file)
109
-
110
- return result["text"]
111
-
112
- except:
113
- return ""
114
- def execute_tool(self, tool, input_data, file_url):
115
-
116
- if tool == "wiki_search":
117
- return self.wiki_search(input_data)
118
-
119
- if tool == "scrape_page":
120
- return self.scrape_page(input_data)
121
-
122
- if tool == "read_excel":
123
- file = self.download_file(file_url)
124
- return self.read_excel(file)
125
-
126
- if tool == "read_image":
127
- file = self.download_file(file_url)
128
- return self.read_image(file)
129
-
130
- if tool == "calculator":
131
- try:
132
- return str(eval(input_data))
133
- except:
134
- return "error"
135
-
136
- return "unknown tool"
137
-
138
- def agent_loop(self, question, file_url):
139
-
140
- memory = ""
141
-
142
- for step in range(5):
143
-
144
- prompt = f"""
145
- You are a GAIA solving agent.
146
-
147
- Available tools:
148
-
149
- 1. read_excel(file_url)
150
- 2. read_image(file_url)
151
- 3. scrape_page(url)
152
- 4. youtube_captions(url)
153
- 5. calculator(expression)
154
- 6. wiki_search(query)
155
-
156
- Question:
157
- {question}
158
-
159
- Previous steps:
160
- {memory}
161
-
162
- Decide next action.
163
-
164
- Format:
165
-
166
- TOOL: tool_name
167
- INPUT: tool_input
168
-
169
- OR
170
-
171
- FINAL: answer
172
- """
173
-
174
- response = self.client.chat.completions.create(
175
- model="gpt-4o-mini",
176
- temperature=0,
177
- messages=[{"role": "user", "content": prompt}]
178
- )
179
-
180
- action = response.choices[0].message.content
181
-
182
- print("Agent step:", action)
183
-
184
- # FINAL ANSWER
185
- if "FINAL:" in action:
186
-
187
- return action.split("FINAL:")[-1].strip()
188
-
189
- # TOOL CALL
190
- if "TOOL:" in action:
191
-
192
- tool = action.split("TOOL:")[1].split("\n")[0].strip()
193
-
194
- input_data = action.split("INPUT:")[-1].strip()
195
-
196
- result = self.execute_tool(tool, input_data, file_url)
197
-
198
- memory += f"\nTool {tool} result:\n{result}\n"
199
-
200
- return "No answer found"
201
  def __init__(self):
202
  print("🚀 Super GAIA Agent initialized")
203
- self.client = OpenAI()
204
-
205
- # ------------------------------------------------
206
- # Download file
207
- # ------------------------------------------------
208
 
209
  def download_file(self, url):
210
-
 
211
  try:
212
-
213
  r = requests.get(url, timeout=20)
214
-
215
- file_name = url.split("/")[-1]
216
-
217
  with open(file_name, "wb") as f:
218
  f.write(r.content)
219
-
220
  return file_name
221
-
222
  except Exception as e:
223
-
224
- print("Download error:", e)
225
-
226
  return None
227
 
228
- # ------------------------------------------------
229
- # Excel parser
230
- # ------------------------------------------------
231
-
232
- def read_excel(self, file):
233
-
234
- try:
235
-
236
- df = pd.read_excel(file)
237
-
238
- return df.to_string()
239
-
240
- except Exception as e:
241
-
242
- print("Excel error:", e)
243
-
244
- return ""
245
-
246
- # ------------------------------------------------
247
- # Image OCR
248
- # ------------------------------------------------
249
-
250
- def read_image(self, file):
251
-
252
- try:
253
-
254
- img = Image.open(file)
255
-
256
- text = pytesseract.image_to_string(img)
257
-
258
- return text
259
-
260
- except Exception as e:
261
-
262
- print("OCR error:", e)
263
-
264
- return ""
265
-
266
- # ------------------------------------------------
267
- # Web scraper
268
- # ------------------------------------------------
269
-
270
- def scrape_page(self, url):
271
-
272
- try:
273
-
274
- r = requests.get(url, timeout=20)
275
-
276
- soup = BeautifulSoup(r.text, "html.parser")
277
-
278
- text = soup.get_text()
279
-
280
- return text[:6000]
281
-
282
- except Exception as e:
283
-
284
- print("Web error:", e)
285
-
286
- return ""
287
-
288
- # ------------------------------------------------
289
- # Wikipedia search
290
- # ------------------------------------------------
291
-
292
  def wiki_search(self, query):
293
-
294
  try:
295
-
296
- url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={query}&format=json"
297
-
298
  r = requests.get(url).json()
299
-
 
300
  title = r["query"]["search"][0]["title"]
301
-
302
- page = requests.get(
303
- f"https://en.wikipedia.org/api/rest_v1/page/summary/{title}"
304
- ).json()
305
-
306
- return page["extract"]
307
-
308
  except:
 
309
 
310
- return ""
311
-
312
- # ------------------------------------------------
313
- # Python calculator
314
- # ------------------------------------------------
315
-
316
- def calculator(self, question):
317
-
318
  try:
 
 
 
 
 
 
 
319
 
320
- expr = re.findall(r'[\d\.\+\-\*\/\(\)]+', question)
321
-
322
- for e in expr:
323
-
324
- if any(op in e for op in "+-*/"):
325
-
326
- result = eval(e, {"__builtins__": {}})
327
-
328
- return str(result)
329
-
330
- except:
331
- pass
332
-
333
- return None
334
-
335
- # ------------------------------------------------
336
- # File loader
337
- # ------------------------------------------------
338
-
339
- def load_file(self, url):
340
-
341
- if not url:
342
- return ""
343
-
344
- file = self.download_file(url)
345
-
346
- if not file:
347
- return ""
348
-
349
- if file.endswith(".xlsx") or file.endswith(".xls"):
350
-
351
- return self.read_excel(file)
352
-
353
- if file.endswith(".png") or file.endswith(".jpg"):
354
-
355
- return self.read_image(file)
356
- if file.endswith(".mp3"):
357
- return self.read_audio(file)
358
- if file.endswith(".py"):
359
-
360
- with open(file) as f:
361
- return f.read()
362
-
363
- try:
364
-
365
- with open(file) as f:
366
-
367
- return f.read()
368
-
369
- except:
370
-
371
- return ""
372
-
373
- # ------------------------------------------------
374
- # Clean answer
375
- # ------------------------------------------------
376
-
377
- def clean(self, text):
378
-
379
- text = text.replace("Answer:", "")
380
- text = text.replace("Final answer:", "")
381
-
382
- text = text.strip()
383
-
384
- text = text.replace("\n", " ")
385
-
386
- return text
387
-
388
- # ------------------------------------------------
389
- # LLM reasoning
390
- # ------------------------------------------------
391
-
392
- def reason(self, question, file_content):
393
-
394
- wiki_data = self.wiki_search(question)
395
-
396
- prompt = f"""
397
- You are a powerful GAIA benchmark agent.
398
-
399
- You can:
400
- - read tables
401
- - analyze files
402
- - solve math
403
- - search wikipedia
404
- - extract OCR text
405
-
406
- QUESTION:
407
- {question}
408
-
409
- FILE CONTENT:
410
- {file_content}
411
-
412
- WIKIPEDIA DATA:
413
- {wiki_data}
414
-
415
- Solve the task carefully.
416
-
417
- If a list is requested, return the FULL list.
418
-
419
- Return ONLY the final answer.
420
- """
421
-
422
- response = self.client.chat.completions.create(
423
-
424
- model="gpt-4o-mini",
425
- temperature=0,
426
-
427
- messages=[
428
- {"role": "system", "content": "You are a highly capable reasoning agent."},
429
- {"role": "user", "content": prompt}
430
- ]
431
- )
432
-
433
- return response.choices[0].message.content
434
-
435
- # ------------------------------------------------
436
- # Extract final answer
437
- # ------------------------------------------------
438
-
439
- def extract(self, reasoning):
440
 
441
- prompt = f"""
442
- Extract the final answer from the text.
443
 
444
- Text:
445
- {reasoning}
446
 
447
- Return only the answer.
448
- """
 
449
 
450
- response = self.client.chat.completions.create(
 
 
 
 
451
 
452
- model="gpt-4o-mini",
453
- temperature=0,
 
 
 
 
 
 
 
454
 
455
- messages=[{"role": "user", "content": prompt}]
456
- )
 
457
 
458
- return response.choices[0].message.content
459
-
460
- # ------------------------------------------------
461
- # Main agent call
462
- # ------------------------------------------------
463
-
464
- def __call__(self, question, file_url=None):
465
- if "youtube.com" in question or "youtu.be" in question:
466
- captions = self.youtube_captions(question)
467
- file_content = captions
468
- else:
469
- file_content = self.load_file(file_url)
470
- print("Question:", question)
471
-
472
- answer = self.agent_loop(question, file_url)
473
-
474
- print("Final:", answer)
475
-
476
- return answer
477
 
478
  def run_and_submit_all( profile: gr.OAuthProfile | None):
479
  """
 
98
  from openai import OpenAI
99
 
100
  class BasicAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def __init__(self):
102
  print("🚀 Super GAIA Agent initialized")
103
+ # Ensure your API Key is set in the Hugging Face Space Secrets
104
+ self.client = OpenAI()
 
 
 
105
 
106
  def download_file(self, url):
107
+ if not url or not url.startswith("http"):
108
+ return None
109
  try:
 
110
  r = requests.get(url, timeout=20)
111
+ file_name = url.split("/")[-1] or "temp_file"
 
 
112
  with open(file_name, "wb") as f:
113
  f.write(r.content)
 
114
  return file_name
 
115
  except Exception as e:
116
+ print(f"Download error: {e}")
 
 
117
  return None
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  def wiki_search(self, query):
 
120
  try:
121
+ # Clean query to avoid API errors
122
+ query = query.strip('"')
123
+ url = f"https://en.wikipedia.org{query}&format=json"
124
  r = requests.get(url).json()
125
+ if not r.get("query", {}).get("search"):
126
+ return "No wikipedia results found."
127
  title = r["query"]["search"][0]["title"]
128
+ page = requests.get(f"https://en.wikipedia.org{title}").json()
129
+ return page.get("extract", "No summary available.")
 
 
 
 
 
130
  except:
131
+ return "Wikipedia search failed."
132
 
133
+ def youtube_captions(self, url):
 
 
 
 
 
 
 
134
  try:
135
+ ydl_opts = {"skip_download": True, "writesubtitles": True, "writeautomaticsub": True}
136
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
137
+ info = ydl.extract_info(url, download=False)
138
+ # Return first available captions
139
+ return str(info.get("subtitles") or info.get("automatic_captions"))[:5000]
140
+ except Exception as e:
141
+ return f"YouTube error: {e}"
142
 
143
+ def execute_tool(self, tool, input_data, file_url):
144
+ # 1. Handle tools that need the provided file_url
145
+ if tool in ["read_excel", "read_image"]:
146
+ target_url = file_url if file_url else input_data
147
+ local_file = self.download_file(target_url)
148
+ if not local_file: return "Error: Could not download file. Check if file_url is valid."
149
+
150
+ if tool == "read_excel":
151
+ try:
152
+ return pd.read_excel(local_file).to_string()[:5000]
153
+ except: return "Excel read error."
154
+ if tool == "read_image":
155
+ try:
156
+ return pytesseract.image_to_string(Image.open(local_file))
157
+ except: return "OCR read error."
158
+
159
+ # 2. Handle web/search tools
160
+ if tool == "wiki_search":
161
+ return self.wiki_search(input_data)
162
+ if tool == "scrape_page":
163
+ try:
164
+ r = requests.get(input_data, timeout=15)
165
+ return BeautifulSoup(r.text, "html.parser").get_text()[:6000]
166
+ except: return "Web scraping failed."
167
+ if tool == "youtube_captions":
168
+ return self.youtube_captions(input_data)
169
+ if tool == "calculator":
170
+ try:
171
+ # Basic math security
172
+ clean = re.sub(r'[^\d\+\-\*\/\(\)\.]', '', input_data)
173
+ return str(eval(clean, {"__builtins__": {}}))
174
+ except: return "Math error."
175
+
176
+ return "Unknown tool"
177
+
178
+ def agent_loop(self, question, file_url=None):
179
+ memory = ""
180
+ # Provide the file_url explicitly in the prompt so the agent knows it exists
181
+ file_info = f"\nNote: A file is available for this task at: {file_url}" if file_url else ""
182
+
183
+ for step in range(5):
184
+ prompt = f"""You are a GAIA solving agent.
185
+ Available tools: read_excel, read_image, scrape_page, youtube_captions, calculator, wiki_search.
186
+ {file_info}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
+ Question: {question}
 
189
 
190
+ Current History:
191
+ {memory}
192
 
193
+ Instructions:
194
+ 1. If you need to read the provided file, use TOOL: read_excel or TOOL: read_image with INPUT: {file_url if file_url else 'none'}.
195
+ 2. Give a FINAL answer as soon as you have enough information.
196
 
197
+ Format:
198
+ TOOL: tool_name
199
+ INPUT: tool_input
200
+ OR
201
+ FINAL: your_precise_answer"""
202
 
203
+ response = self.client.chat.completions.create(
204
+ model="gpt-4o-mini",
205
+ temperature=0,
206
+ messages=[{"role": "system", "content": "You are a helpful assistant that uses tools to solve tasks."},
207
+ {"role": "user", "content": prompt}]
208
+ )
209
+
210
+ action = response.choices[0].message.content.strip()
211
+ print(f"Agent step: {action}")
212
 
213
+ if "FINAL:" in action:
214
+ # Extract only the content after FINAL:
215
+ return action.split("FINAL:")[-1].strip()
216
 
217
+ if "TOOL:" in action:
218
+ try:
219
+ tool_part = action.split("TOOL:")[1].split("\n")[0].strip()
220
+ input_part = action.split("INPUT:")[1].split("\n")[0].strip()
221
+ result = self.execute_tool(tool_part, input_part, file_url)
222
+ memory += f"\nStep {step}: Tool {tool_part} returned: {result[:1000]}"
223
+ except Exception as e:
224
+ memory += f"\nStep {step}: Tool call error: {str(e)}"
225
+ else:
226
+ # If the agent just talks without a tool or FINAL, treat as text
227
+ memory += f"\nStep {step}: {action}"
228
+
229
+ return "No answer found"
 
 
 
 
 
 
230
 
231
  def run_and_submit_all( profile: gr.OAuthProfile | None):
232
  """