Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
-
GAIA Agent
|
| 3 |
-
Target: 6/20
|
| 4 |
"""
|
| 5 |
import os
|
| 6 |
import re
|
|
@@ -11,376 +11,795 @@ import gradio as gr
|
|
| 11 |
import requests
|
| 12 |
import pandas as pd
|
| 13 |
from bs4 import BeautifulSoup
|
|
|
|
| 14 |
|
| 15 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 16 |
GROQ_API = "https://api.groq.com/openai/v1/chat/completions"
|
| 17 |
|
| 18 |
-
|
| 19 |
# ==========================================
|
| 20 |
# TOOLS
|
| 21 |
# ==========================================
|
| 22 |
|
| 23 |
-
def fetch_webpage(url: str) -> str:
|
|
|
|
| 24 |
try:
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
resp.raise_for_status()
|
|
|
|
| 27 |
soup = BeautifulSoup(resp.text, "html.parser")
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
el.extract()
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
except Exception as e:
|
|
|
|
| 33 |
return ""
|
| 34 |
|
| 35 |
|
| 36 |
def fetch_youtube_transcript(url: str) -> str:
|
|
|
|
| 37 |
try:
|
| 38 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
try:
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
continue
|
| 48 |
-
|
|
|
|
| 49 |
try:
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
return ""
|
| 57 |
except Exception as e:
|
|
|
|
| 58 |
return ""
|
| 59 |
|
| 60 |
|
| 61 |
-
def fetch_task_file(task_id: str) ->
|
| 62 |
-
"""Returns (content_str, file_type)"""
|
| 63 |
try:
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
if resp.status_code != 200:
|
|
|
|
| 66 |
return "", "none"
|
| 67 |
-
|
|
|
|
| 68 |
cd = resp.headers.get("Content-Disposition", "")
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
if ext == "csv" or "csv" in ct:
|
| 74 |
try:
|
| 75 |
-
df = pd.read_csv(io.StringIO(
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
|
| 82 |
try:
|
| 83 |
df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
return
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
if "pdf" in ct or ext == "pdf":
|
| 89 |
try:
|
| 90 |
import PyPDF2
|
| 91 |
reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
try:
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
return "", "none"
|
| 108 |
|
| 109 |
|
| 110 |
-
def web_search(query: str, max_results: int = 5) ->
|
| 111 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 112 |
try:
|
| 113 |
from ddgs import DDGS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
with DDGS() as ddgs:
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
# ==========================================
|
| 121 |
-
# GROQ
|
| 122 |
# ==========================================
|
| 123 |
|
| 124 |
-
def ask_groq(messages:
|
|
|
|
| 125 |
for attempt in range(3):
|
| 126 |
try:
|
| 127 |
resp = requests.post(
|
| 128 |
GROQ_API,
|
| 129 |
-
headers={
|
|
|
|
|
|
|
|
|
|
| 130 |
json={
|
| 131 |
"model": "llama-3.3-70b-versatile",
|
| 132 |
"messages": messages,
|
| 133 |
-
"temperature":
|
| 134 |
"max_tokens": max_tokens,
|
| 135 |
},
|
| 136 |
-
timeout=
|
| 137 |
)
|
|
|
|
| 138 |
if resp.status_code == 200:
|
| 139 |
return resp.json()["choices"][0]["message"]["content"].strip()
|
| 140 |
elif resp.status_code == 429:
|
| 141 |
-
|
|
|
|
|
|
|
| 142 |
else:
|
| 143 |
-
print(f" Groq {resp.status_code}")
|
| 144 |
-
|
| 145 |
-
except
|
| 146 |
-
print(f" Groq
|
| 147 |
time.sleep(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
return ""
|
| 149 |
|
| 150 |
|
| 151 |
# ==========================================
|
| 152 |
-
#
|
| 153 |
# ==========================================
|
| 154 |
|
| 155 |
def preprocess_question(question: str) -> str:
|
|
|
|
| 156 |
stripped = question.strip()
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
|
| 165 |
def clean_answer(raw: str) -> str:
|
| 166 |
-
answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
for line in answer.split("\n"):
|
| 168 |
line = line.strip()
|
| 169 |
-
if line:
|
| 170 |
answer = line
|
| 171 |
break
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
|
| 184 |
-
def
|
|
|
|
| 185 |
if not answer or len(answer.strip()) < 1:
|
| 186 |
return False
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
|
| 192 |
# ==========================================
|
| 193 |
-
#
|
| 194 |
# ==========================================
|
| 195 |
|
| 196 |
-
|
| 197 |
|
| 198 |
-
RULES
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
-
def solve_question(question: str, task_id: str, groq_key: str) -> str:
|
| 210 |
-
print(f"\n[Q]: {question[:130]}")
|
| 211 |
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
context_parts = []
|
| 214 |
-
|
| 215 |
-
# 1.
|
| 216 |
file_content, file_type = fetch_task_file(task_id)
|
| 217 |
if file_content and file_type != "none":
|
| 218 |
-
context_parts.append(f"[ATTACHED FILE
|
| 219 |
-
print(f"
|
| 220 |
-
|
| 221 |
-
# 2. YouTube
|
| 222 |
-
yt_urls = re.findall(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)[^\s\)]+',
|
| 223 |
-
for yt_url in yt_urls:
|
| 224 |
-
|
|
|
|
|
|
|
| 225 |
if transcript:
|
| 226 |
-
context_parts.append(f"[YOUTUBE TRANSCRIPT]:\n{transcript}")
|
| 227 |
-
print(f" π¬ Transcript: {len(transcript)}ch")
|
| 228 |
else:
|
| 229 |
-
context_parts.append(f"[YOUTUBE]: Could not fetch transcript
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
other_urls =
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
results = web_search(search_query, max_results=5)
|
|
|
|
| 253 |
if results:
|
| 254 |
# Add search snippets
|
| 255 |
-
snippets = "\n".join([f"
|
| 256 |
-
context_parts.append(f"[SEARCH RESULTS
|
| 257 |
-
|
| 258 |
-
# Fetch top
|
| 259 |
-
|
| 260 |
for r in results:
|
| 261 |
-
if
|
| 262 |
break
|
| 263 |
href = r.get("href", "")
|
| 264 |
-
if href and "youtube.com" not in href:
|
| 265 |
page = fetch_webpage(href)
|
| 266 |
-
if page and len(page) >
|
| 267 |
-
context_parts.append(f"[PAGE
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
if
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
#
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
answer = "I don't know"
|
| 303 |
-
|
| 304 |
-
print(f"
|
| 305 |
return answer
|
| 306 |
|
| 307 |
|
| 308 |
# ==========================================
|
| 309 |
-
#
|
| 310 |
# ==========================================
|
| 311 |
|
| 312 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 313 |
-
|
|
|
|
|
|
|
| 314 |
if not profile:
|
| 315 |
-
return "
|
| 316 |
-
|
| 317 |
username = profile.username
|
| 318 |
groq_key = os.getenv("GROQ_API_KEY", "")
|
|
|
|
| 319 |
if not groq_key:
|
| 320 |
-
return "β GROQ_API_KEY
|
| 321 |
-
|
| 322 |
-
print(f"\n{'='*
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
try:
|
| 325 |
-
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=
|
| 326 |
resp.raise_for_status()
|
| 327 |
questions = resp.json()
|
| 328 |
except Exception as e:
|
| 329 |
-
return f"Errore: {e}", None
|
| 330 |
-
|
| 331 |
-
print(f"π {len(questions)} domande\n")
|
| 332 |
-
|
| 333 |
results = []
|
| 334 |
answers = []
|
| 335 |
-
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 336 |
-
|
| 337 |
for i, item in enumerate(questions):
|
| 338 |
task_id = item.get("task_id", "")
|
| 339 |
q = item.get("question")
|
|
|
|
| 340 |
if not task_id or q is None:
|
|
|
|
| 341 |
continue
|
| 342 |
-
|
| 343 |
-
print(f"\n
|
|
|
|
|
|
|
| 344 |
try:
|
| 345 |
-
|
| 346 |
except Exception as e:
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
if not answers:
|
| 355 |
-
return "Nessuna risposta.", pd.DataFrame(results)
|
| 356 |
-
|
| 357 |
-
|
|
|
|
|
|
|
|
|
|
| 358 |
try:
|
| 359 |
-
|
| 360 |
f"{DEFAULT_API_URL}/submit",
|
| 361 |
-
json={
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
timeout=60,
|
| 363 |
)
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
print(f"\n{status}")
|
| 372 |
return status, pd.DataFrame(results)
|
|
|
|
| 373 |
except Exception as e:
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
-
with gr.Blocks() as demo:
|
| 378 |
-
gr.Markdown("# π GAIA Agent v3\nGroq Llama 3.3 70B β smart search + page fetch")
|
| 379 |
-
gr.LoginButton()
|
| 380 |
-
run_button = gr.Button("π₯ Avvia Valutazione", variant="primary")
|
| 381 |
-
status_output = gr.Textbox(label="Risultato", lines=5, interactive=False)
|
| 382 |
-
results_table = gr.DataFrame(label="Risposte", wrap=True)
|
| 383 |
-
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
|
| 384 |
|
| 385 |
if __name__ == "__main__":
|
| 386 |
-
demo
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
GAIA Agent v4 β Enhanced version with better error handling and tools
|
| 3 |
+
Target: 30%+ (6+/20)
|
| 4 |
"""
|
| 5 |
import os
|
| 6 |
import re
|
|
|
|
| 11 |
import requests
|
| 12 |
import pandas as pd
|
| 13 |
from bs4 import BeautifulSoup
|
| 14 |
+
from typing import Optional, Tuple, List, Dict, Any
|
| 15 |
|
| 16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 17 |
GROQ_API = "https://api.groq.com/openai/v1/chat/completions"
|
| 18 |
|
|
|
|
| 19 |
# ==========================================
|
| 20 |
# TOOLS
|
| 21 |
# ==========================================
|
| 22 |
|
| 23 |
+
def fetch_webpage(url: str, timeout: int = 15) -> str:
|
| 24 |
+
"""Fetch and extract text from a webpage."""
|
| 25 |
try:
|
| 26 |
+
headers = {
|
| 27 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
| 28 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 29 |
+
"Accept-Language": "en-US,en;q=0.5",
|
| 30 |
+
}
|
| 31 |
+
resp = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
|
| 32 |
resp.raise_for_status()
|
| 33 |
+
|
| 34 |
soup = BeautifulSoup(resp.text, "html.parser")
|
| 35 |
+
|
| 36 |
+
# Remove unwanted elements
|
| 37 |
+
for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript", "iframe", "form"]):
|
| 38 |
el.extract()
|
| 39 |
+
|
| 40 |
+
# Try to get main content first
|
| 41 |
+
main_content = soup.find("main") or soup.find("article") or soup.find("div", {"class": re.compile(r"content|main|article", re.I)})
|
| 42 |
+
if main_content:
|
| 43 |
+
text = main_content.get_text("\n", strip=True)
|
| 44 |
+
else:
|
| 45 |
+
text = soup.get_text("\n", strip=True)
|
| 46 |
+
|
| 47 |
+
lines = [l.strip() for l in text.splitlines() if l.strip() and len(l.strip()) > 2]
|
| 48 |
+
return "\n".join(lines)[:10000]
|
| 49 |
except Exception as e:
|
| 50 |
+
print(f" β οΈ Webpage fetch error: {e}")
|
| 51 |
return ""
|
| 52 |
|
| 53 |
|
| 54 |
def fetch_youtube_transcript(url: str) -> str:
|
| 55 |
+
"""Fetch YouTube video transcript with multiple fallback methods."""
|
| 56 |
try:
|
| 57 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 58 |
+
|
| 59 |
+
# Extract video ID
|
| 60 |
+
patterns = [
|
| 61 |
+
r"(?:v=|/v/|youtu\.be/|embed/|shorts/)([a-zA-Z0-9_-]{11})",
|
| 62 |
+
r"^([a-zA-Z0-9_-]{11})$"
|
| 63 |
+
]
|
| 64 |
+
vid = None
|
| 65 |
+
for pattern in patterns:
|
| 66 |
+
match = re.search(pattern, url)
|
| 67 |
+
if match:
|
| 68 |
+
vid = match.group(1)
|
| 69 |
+
break
|
| 70 |
+
|
| 71 |
+
if not vid:
|
| 72 |
+
print(f" β οΈ Could not extract video ID from: {url}")
|
| 73 |
+
return ""
|
| 74 |
+
|
| 75 |
+
print(f" πΊ Video ID: {vid}")
|
| 76 |
+
|
| 77 |
+
# Create API instance (new API style)
|
| 78 |
+
ytt_api = YouTubeTranscriptApi()
|
| 79 |
+
|
| 80 |
+
# Try multiple language options
|
| 81 |
+
lang_options = [
|
| 82 |
+
("en",),
|
| 83 |
+
("en", "en-US", "en-GB"),
|
| 84 |
+
("it", "it-IT"),
|
| 85 |
+
("en", "it", "fr", "de", "es", "pt"),
|
| 86 |
+
]
|
| 87 |
+
|
| 88 |
+
for langs in lang_options:
|
| 89 |
try:
|
| 90 |
+
transcript = ytt_api.fetch(vid, languages=langs)
|
| 91 |
+
# transcript is a FetchedTranscript object, iterate to get snippets
|
| 92 |
+
text = " ".join([snippet.text for snippet in transcript])
|
| 93 |
+
if text:
|
| 94 |
+
print(f" β Got transcript ({len(text)} chars, langs: {langs})")
|
| 95 |
+
return text[:8000]
|
| 96 |
+
except Exception as e:
|
| 97 |
continue
|
| 98 |
+
|
| 99 |
+
# Try listing all transcripts and fetching any available
|
| 100 |
try:
|
| 101 |
+
transcript_list = ytt_api.list(vid)
|
| 102 |
+
|
| 103 |
+
# Try manually created first
|
| 104 |
+
for t in transcript_list:
|
| 105 |
+
if not t.is_generated:
|
| 106 |
+
try:
|
| 107 |
+
fetched = t.fetch()
|
| 108 |
+
text = " ".join([snippet.text for snippet in fetched])
|
| 109 |
+
if text:
|
| 110 |
+
print(f" β Got manual transcript ({len(text)} chars)")
|
| 111 |
+
return text[:8000]
|
| 112 |
+
except:
|
| 113 |
+
pass
|
| 114 |
+
|
| 115 |
+
# Then auto-generated
|
| 116 |
+
for t in transcript_list:
|
| 117 |
+
if t.is_generated:
|
| 118 |
+
try:
|
| 119 |
+
fetched = t.fetch()
|
| 120 |
+
text = " ".join([snippet.text for snippet in fetched])
|
| 121 |
+
if text:
|
| 122 |
+
print(f" β Got auto transcript ({len(text)} chars)")
|
| 123 |
+
return text[:8000]
|
| 124 |
+
except:
|
| 125 |
+
pass
|
| 126 |
+
|
| 127 |
+
# Try translated
|
| 128 |
+
for t in transcript_list:
|
| 129 |
+
try:
|
| 130 |
+
translated = t.translate('en')
|
| 131 |
+
fetched = translated.fetch()
|
| 132 |
+
text = " ".join([snippet.text for snippet in fetched])
|
| 133 |
+
if text:
|
| 134 |
+
print(f" β Got translated transcript ({len(text)} chars)")
|
| 135 |
+
return text[:8000]
|
| 136 |
+
except:
|
| 137 |
+
pass
|
| 138 |
+
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print(f" β οΈ Transcript list error: {e}")
|
| 141 |
+
|
| 142 |
+
return ""
|
| 143 |
+
except ImportError:
|
| 144 |
+
print(" β οΈ youtube_transcript_api not installed")
|
| 145 |
return ""
|
| 146 |
except Exception as e:
|
| 147 |
+
print(f" β οΈ YouTube error: {e}")
|
| 148 |
return ""
|
| 149 |
|
| 150 |
|
| 151 |
+
def fetch_task_file(task_id: str) -> Tuple[str, str]:
|
| 152 |
+
"""Fetch and parse attached file for a task. Returns (content_str, file_type)."""
|
| 153 |
try:
|
| 154 |
+
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 155 |
+
resp = requests.get(url, timeout=30)
|
| 156 |
+
|
| 157 |
+
if resp.status_code == 404:
|
| 158 |
+
return "", "none"
|
| 159 |
if resp.status_code != 200:
|
| 160 |
+
print(f" β οΈ File fetch failed: {resp.status_code}")
|
| 161 |
return "", "none"
|
| 162 |
+
|
| 163 |
+
ct = resp.headers.get("Content-Type", "").lower()
|
| 164 |
cd = resp.headers.get("Content-Disposition", "")
|
| 165 |
+
|
| 166 |
+
# Extract filename
|
| 167 |
+
filename = ""
|
| 168 |
+
if "filename=" in cd:
|
| 169 |
+
filename = cd.split("filename=")[-1].strip('" ')
|
| 170 |
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
|
| 171 |
+
|
| 172 |
+
print(f" π File: {filename or 'unknown'}, type: {ct[:50]}")
|
| 173 |
+
|
| 174 |
+
# Text/Code files
|
| 175 |
+
if any(t in ct for t in ["text/", "json", "javascript", "python"]) or ext in ["txt", "csv", "json", "py", "md", "js", "html"]:
|
| 176 |
+
text = resp.text
|
| 177 |
+
|
| 178 |
+
# CSV parsing
|
| 179 |
if ext == "csv" or "csv" in ct:
|
| 180 |
try:
|
| 181 |
+
df = pd.read_csv(io.StringIO(text))
|
| 182 |
+
summary = f"CSV file with {len(df)} rows and columns: {list(df.columns)}\n"
|
| 183 |
+
summary += f"Data:\n{df.to_string()}"
|
| 184 |
+
return summary[:8000], "csv"
|
| 185 |
+
except Exception as e:
|
| 186 |
+
print(f" β οΈ CSV parse error: {e}")
|
| 187 |
+
|
| 188 |
+
# Python code
|
| 189 |
+
if ext == "py":
|
| 190 |
+
return f"Python code:\n```python\n{text[:6000]}\n```", "python"
|
| 191 |
+
|
| 192 |
+
return text[:8000], "text"
|
| 193 |
+
|
| 194 |
+
# Excel files
|
| 195 |
if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
|
| 196 |
try:
|
| 197 |
df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
|
| 198 |
+
summary = f"Excel file with {len(df)} rows and columns: {list(df.columns)}\n"
|
| 199 |
+
summary += f"Data:\n{df.to_string()}"
|
| 200 |
+
return summary[:8000], "excel"
|
| 201 |
+
except Exception as e:
|
| 202 |
+
print(f" β οΈ Excel parse error: {e}")
|
| 203 |
+
try:
|
| 204 |
+
df = pd.read_excel(io.BytesIO(resp.content))
|
| 205 |
+
summary = f"Excel file with {len(df)} rows and columns: {list(df.columns)}\n"
|
| 206 |
+
summary += f"Data:\n{df.to_string()}"
|
| 207 |
+
return summary[:8000], "excel"
|
| 208 |
+
except:
|
| 209 |
+
return "Excel file (could not parse)", "excel"
|
| 210 |
+
|
| 211 |
+
# PDF files
|
| 212 |
if "pdf" in ct or ext == "pdf":
|
| 213 |
try:
|
| 214 |
import PyPDF2
|
| 215 |
reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
|
| 216 |
+
text_parts = []
|
| 217 |
+
for i, page in enumerate(reader.pages):
|
| 218 |
+
page_text = page.extract_text() or ""
|
| 219 |
+
if page_text:
|
| 220 |
+
text_parts.append(f"--- Page {i+1} ---\n{page_text}")
|
| 221 |
+
text = "\n".join(text_parts)
|
| 222 |
+
return text[:8000] if text else "PDF (no extractable text)", "pdf"
|
| 223 |
+
except ImportError:
|
| 224 |
+
print(" β οΈ PyPDF2 not installed")
|
| 225 |
+
return "PDF file (PyPDF2 not available)", "pdf"
|
| 226 |
+
except Exception as e:
|
| 227 |
+
print(f" β οΈ PDF parse error: {e}")
|
| 228 |
+
return "PDF file (parse error)", "pdf"
|
| 229 |
+
|
| 230 |
+
# Audio files
|
| 231 |
+
if "audio" in ct or ext in ["mp3", "wav", "m4a", "ogg", "flac"]:
|
| 232 |
+
size_kb = len(resp.content) / 1024
|
| 233 |
+
return f"Audio file ({ext or 'unknown'}, {size_kb:.1f} KB). Cannot transcribe audio directly.", "audio"
|
| 234 |
+
|
| 235 |
+
# Image files
|
| 236 |
+
if "image" in ct or ext in ["png", "jpg", "jpeg", "gif", "webp", "bmp"]:
|
| 237 |
+
size_kb = len(resp.content) / 1024
|
| 238 |
+
return f"Image file ({ext or 'unknown'}, {size_kb:.1f} KB). Cannot analyze images directly.", "image"
|
| 239 |
+
|
| 240 |
+
# Try to decode as text
|
| 241 |
try:
|
| 242 |
+
text = resp.content.decode("utf-8")
|
| 243 |
+
return text[:8000], "text"
|
| 244 |
+
except:
|
| 245 |
+
try:
|
| 246 |
+
text = resp.content.decode("latin-1")
|
| 247 |
+
return text[:8000], "text"
|
| 248 |
+
except:
|
| 249 |
+
return f"Binary file ({ct or 'unknown type'}, {len(resp.content)} bytes)", "binary"
|
| 250 |
+
|
| 251 |
+
except requests.exceptions.Timeout:
|
| 252 |
+
print(" β οΈ File fetch timeout")
|
| 253 |
+
return "", "none"
|
| 254 |
+
except Exception as e:
|
| 255 |
+
print(f" β οΈ File fetch error: {e}")
|
| 256 |
return "", "none"
|
| 257 |
|
| 258 |
|
| 259 |
+
def web_search(query: str, max_results: int = 5) -> List[Dict[str, str]]:
|
| 260 |
+
"""Search the web and return results."""
|
| 261 |
+
results = []
|
| 262 |
+
|
| 263 |
+
# Try ddgs package (new name)
|
| 264 |
try:
|
| 265 |
from ddgs import DDGS
|
| 266 |
+
ddgs = DDGS()
|
| 267 |
+
for r in ddgs.text(query, max_results=max_results):
|
| 268 |
+
results.append({
|
| 269 |
+
"title": r.get("title", ""),
|
| 270 |
+
"body": r.get("body", ""),
|
| 271 |
+
"href": r.get("href", "")
|
| 272 |
+
})
|
| 273 |
+
if results:
|
| 274 |
+
print(f" π ddgs found {len(results)} results")
|
| 275 |
+
return results
|
| 276 |
+
except ImportError:
|
| 277 |
+
pass
|
| 278 |
+
except Exception as e:
|
| 279 |
+
print(f" β οΈ ddgs error: {e}")
|
| 280 |
+
|
| 281 |
+
# Fallback: try duckduckgo-search package
|
| 282 |
+
try:
|
| 283 |
+
from duckduckgo_search import DDGS
|
| 284 |
with DDGS() as ddgs:
|
| 285 |
+
for r in ddgs.text(query, max_results=max_results):
|
| 286 |
+
results.append({
|
| 287 |
+
"title": r.get("title", ""),
|
| 288 |
+
"body": r.get("body", ""),
|
| 289 |
+
"href": r.get("href", "")
|
| 290 |
+
})
|
| 291 |
+
if results:
|
| 292 |
+
print(f" π DDG found {len(results)} results")
|
| 293 |
+
return results
|
| 294 |
+
except ImportError:
|
| 295 |
+
print(" β οΈ duckduckgo-search not installed")
|
| 296 |
+
except Exception as e:
|
| 297 |
+
print(f" β οΈ DDG error: {e}")
|
| 298 |
+
|
| 299 |
+
return results
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def search_wikipedia(query: str) -> str:
|
| 303 |
+
"""Search Wikipedia and return article content."""
|
| 304 |
+
try:
|
| 305 |
+
headers = {
|
| 306 |
+
"User-Agent": "GAIAAgent/1.0 (https://huggingface.co/spaces; contact@example.com)"
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
# Search for article
|
| 310 |
+
search_url = "https://en.wikipedia.org/w/api.php"
|
| 311 |
+
params = {
|
| 312 |
+
"action": "query",
|
| 313 |
+
"list": "search",
|
| 314 |
+
"srsearch": query,
|
| 315 |
+
"format": "json",
|
| 316 |
+
"srlimit": 3
|
| 317 |
+
}
|
| 318 |
+
resp = requests.get(search_url, params=params, headers=headers, timeout=10)
|
| 319 |
+
|
| 320 |
+
if resp.status_code != 200:
|
| 321 |
+
print(f" β οΈ Wikipedia search HTTP {resp.status_code}")
|
| 322 |
+
return ""
|
| 323 |
+
|
| 324 |
+
data = resp.json()
|
| 325 |
+
|
| 326 |
+
results = data.get("query", {}).get("search", [])
|
| 327 |
+
if not results:
|
| 328 |
+
return ""
|
| 329 |
+
|
| 330 |
+
# Get the first article
|
| 331 |
+
title = results[0]["title"]
|
| 332 |
+
|
| 333 |
+
# Fetch article content using REST API
|
| 334 |
+
encoded_title = requests.utils.quote(title.replace(' ', '_'))
|
| 335 |
+
content_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{encoded_title}"
|
| 336 |
+
resp = requests.get(content_url, headers=headers, timeout=10)
|
| 337 |
+
|
| 338 |
+
if resp.status_code == 200:
|
| 339 |
+
article = resp.json()
|
| 340 |
+
extract = article.get("extract", "")
|
| 341 |
+
if extract:
|
| 342 |
+
print(f" π Wikipedia: {title}")
|
| 343 |
+
return f"Wikipedia - {title}:\n{extract}"
|
| 344 |
+
|
| 345 |
+
return ""
|
| 346 |
+
except requests.exceptions.Timeout:
|
| 347 |
+
print(f" β οΈ Wikipedia timeout")
|
| 348 |
+
return ""
|
| 349 |
+
except Exception as e:
|
| 350 |
+
print(f" β οΈ Wikipedia error: {e}")
|
| 351 |
+
return ""
|
| 352 |
|
| 353 |
|
| 354 |
# ==========================================
|
| 355 |
+
# GROQ LLM
|
| 356 |
# ==========================================
|
| 357 |
|
| 358 |
+
def ask_groq(messages: List[Dict], groq_key: str, max_tokens: int = 400, temperature: float = 0.1) -> str:
|
| 359 |
+
"""Send request to Groq API with retries."""
|
| 360 |
for attempt in range(3):
|
| 361 |
try:
|
| 362 |
resp = requests.post(
|
| 363 |
GROQ_API,
|
| 364 |
+
headers={
|
| 365 |
+
"Authorization": f"Bearer {groq_key}",
|
| 366 |
+
"Content-Type": "application/json"
|
| 367 |
+
},
|
| 368 |
json={
|
| 369 |
"model": "llama-3.3-70b-versatile",
|
| 370 |
"messages": messages,
|
| 371 |
+
"temperature": temperature,
|
| 372 |
"max_tokens": max_tokens,
|
| 373 |
},
|
| 374 |
+
timeout=30,
|
| 375 |
)
|
| 376 |
+
|
| 377 |
if resp.status_code == 200:
|
| 378 |
return resp.json()["choices"][0]["message"]["content"].strip()
|
| 379 |
elif resp.status_code == 429:
|
| 380 |
+
wait_time = 5 * (attempt + 1)
|
| 381 |
+
print(f" β³ Rate limited, waiting {wait_time}s...")
|
| 382 |
+
time.sleep(wait_time)
|
| 383 |
else:
|
| 384 |
+
print(f" β οΈ Groq API error: {resp.status_code} - {resp.text[:200]}")
|
| 385 |
+
time.sleep(2)
|
| 386 |
+
except requests.exceptions.Timeout:
|
| 387 |
+
print(f" β οΈ Groq timeout (attempt {attempt + 1})")
|
| 388 |
time.sleep(3)
|
| 389 |
+
except Exception as e:
|
| 390 |
+
print(f" β οΈ Groq error: {e}")
|
| 391 |
+
time.sleep(2)
|
| 392 |
+
|
| 393 |
return ""
|
| 394 |
|
| 395 |
|
| 396 |
# ==========================================
|
| 397 |
+
# TEXT PROCESSING
|
| 398 |
# ==========================================
|
| 399 |
|
| 400 |
def preprocess_question(question: str) -> str:
|
| 401 |
+
"""Handle reversed or scrambled text."""
|
| 402 |
stripped = question.strip()
|
| 403 |
+
|
| 404 |
+
# Check for reversed text
|
| 405 |
+
reversed_text = stripped[::-1]
|
| 406 |
+
|
| 407 |
+
# Keywords that indicate proper English text
|
| 408 |
+
keywords = ["answer", "what", "who", "how", "find", "list", "which", "where",
|
| 409 |
+
"when", "the", "is", "are", "was", "were", "has", "have", "this",
|
| 410 |
+
"that", "from", "with", "about", "question", "video", "image"]
|
| 411 |
+
|
| 412 |
+
orig_score = sum(1 for w in keywords if w in stripped.lower())
|
| 413 |
+
rev_score = sum(1 for w in keywords if w in reversed_text.lower())
|
| 414 |
+
|
| 415 |
+
# If reversed text has significantly more keywords, use it
|
| 416 |
+
if rev_score > orig_score + 2 and len(stripped) > 30:
|
| 417 |
+
print(f" π Detected reversed text, using reversed version")
|
| 418 |
+
return reversed_text
|
| 419 |
+
|
| 420 |
+
return stripped
|
| 421 |
|
| 422 |
|
| 423 |
def clean_answer(raw: str) -> str:
|
| 424 |
+
"""Extract and clean the final answer from LLM response."""
|
| 425 |
+
if not raw:
|
| 426 |
+
return ""
|
| 427 |
+
|
| 428 |
+
answer = raw.strip()
|
| 429 |
+
|
| 430 |
+
# Take first non-empty line
|
| 431 |
for line in answer.split("\n"):
|
| 432 |
line = line.strip()
|
| 433 |
+
if line and not line.startswith("#"):
|
| 434 |
answer = line
|
| 435 |
break
|
| 436 |
+
|
| 437 |
+
# Remove common prefixes (case-insensitive)
|
| 438 |
+
prefixes = [
|
| 439 |
+
"the answer is:", "the answer is", "answer:", "answer is:",
|
| 440 |
+
"final answer:", "final answer is:", "the final answer is:",
|
| 441 |
+
"the correct answer is:", "the correct answer is",
|
| 442 |
+
"result:", "the result is:",
|
| 443 |
+
"based on my analysis,", "based on my analysis",
|
| 444 |
+
"based on the", "according to",
|
| 445 |
+
"sure,", "here is", "here's", "i found that"
|
| 446 |
+
]
|
| 447 |
+
|
| 448 |
+
# Apply prefix removal iteratively
|
| 449 |
+
changed = True
|
| 450 |
+
max_iterations = 10
|
| 451 |
+
iterations = 0
|
| 452 |
+
while changed and iterations < max_iterations:
|
| 453 |
+
changed = False
|
| 454 |
+
iterations += 1
|
| 455 |
+
answer_lower = answer.lower()
|
| 456 |
+
for prefix in prefixes:
|
| 457 |
+
if answer_lower.startswith(prefix):
|
| 458 |
+
answer = answer[len(prefix):].strip()
|
| 459 |
+
changed = True
|
| 460 |
+
break
|
| 461 |
+
|
| 462 |
+
# Remove trailing punctuation (period, comma, etc.)
|
| 463 |
+
# But preserve decimal numbers like "3.14"
|
| 464 |
+
while answer and answer[-1] in '.,:;!':
|
| 465 |
+
char = answer[-1]
|
| 466 |
+
if char == '.' and len(answer) >= 2:
|
| 467 |
+
# Check if this is a decimal number (has digits on both sides of a period)
|
| 468 |
+
before = answer[:-1]
|
| 469 |
+
# If there's already a period in the string AND it's followed by digits, it's a decimal
|
| 470 |
+
if '.' in before:
|
| 471 |
+
# e.g., "3.14." - the inner period is decimal, outer is punctuation
|
| 472 |
+
answer = answer[:-1].strip()
|
| 473 |
+
elif before and before[-1].isdigit():
|
| 474 |
+
# Could be end of integer "42." or a decimal "3.14"
|
| 475 |
+
# Check if there are non-digit chars (indicating it's just "42." not "3.14")
|
| 476 |
+
# A pure decimal would be all digits and one period
|
| 477 |
+
test_str = before.lstrip('-') # Handle negative numbers
|
| 478 |
+
if test_str.isdigit():
|
| 479 |
+
# It's just an integer with a period, remove the period
|
| 480 |
+
answer = answer[:-1].strip()
|
| 481 |
+
else:
|
| 482 |
+
# Might have letters or other chars, remove period
|
| 483 |
+
answer = answer[:-1].strip()
|
| 484 |
+
else:
|
| 485 |
+
answer = answer[:-1].strip()
|
| 486 |
+
else:
|
| 487 |
+
answer = answer[:-1].strip()
|
| 488 |
+
|
| 489 |
+
# Clean up formatting
|
| 490 |
+
answer = answer.replace("**", "").strip('"\'`')
|
| 491 |
+
|
| 492 |
+
return answer.strip()
|
| 493 |
|
| 494 |
|
| 495 |
+
def is_valid_answer(answer: str) -> bool:
|
| 496 |
+
"""Check if an answer is valid (not a refusal or error)."""
|
| 497 |
if not answer or len(answer.strip()) < 1:
|
| 498 |
return False
|
| 499 |
+
|
| 500 |
+
invalid_phrases = [
|
| 501 |
+
"i don't know", "i dont know", "i do not know",
|
| 502 |
+
"unknown", "n/a", "none", "error",
|
| 503 |
+
"i cannot", "i can't", "i cant",
|
| 504 |
+
"not available", "no answer", "unable to",
|
| 505 |
+
"i'm not sure", "im not sure", "i am not sure",
|
| 506 |
+
"no image", "cannot determine", "insufficient information",
|
| 507 |
+
"not provided", "cannot access", "i'm unable", "i am unable"
|
| 508 |
+
]
|
| 509 |
+
|
| 510 |
+
answer_lower = answer.lower()
|
| 511 |
+
return not any(phrase in answer_lower for phrase in invalid_phrases)
|
| 512 |
|
| 513 |
|
| 514 |
# ==========================================
|
| 515 |
+
# MAIN SOLVER
|
| 516 |
# ==========================================
|
| 517 |
|
| 518 |
+
SYSTEM_PROMPT = """You are an expert AI assistant solving GAIA benchmark questions.
|
| 519 |
|
| 520 |
+
CRITICAL RULES - Follow these EXACTLY:
|
| 521 |
+
1. Output ONLY the final answer - no explanations, no reasoning, no "The answer is"
|
| 522 |
+
2. Numbers: output just the number (e.g., "42" or "3.14")
|
| 523 |
+
3. Names: output just the name (e.g., "Marie Curie" or "Paris")
|
| 524 |
+
4. Lists: use comma-separated format (e.g., "apple, banana, cherry")
|
| 525 |
+
5. Dates: use the format requested or standard format
|
| 526 |
+
6. Do NOT add a period at the end
|
| 527 |
+
7. If data is provided (CSV, Excel, etc.), analyze it carefully and compute any needed calculations
|
| 528 |
+
8. For math/counting questions, show your work internally but output only the final number
|
| 529 |
|
| 530 |
+
SPECIAL CASES:
|
| 531 |
+
- For reversed/scrambled questions: the question has been corrected for you
|
| 532 |
+
- For video questions without transcript: answer based on any description provided
|
| 533 |
+
- For image questions: answer based on any text description of the image
|
| 534 |
+
- When asked about specific facts, be precise and concise"""
|
| 535 |
|
|
|
|
|
|
|
| 536 |
|
| 537 |
+
def solve_question(question: str, task_id: str, groq_key: str) -> str:
|
| 538 |
+
"""Main function to solve a GAIA question."""
|
| 539 |
+
print(f"\n[Q]: {question[:150]}{'...' if len(question) > 150 else ''}")
|
| 540 |
+
|
| 541 |
+
# Preprocess the question
|
| 542 |
+
processed_q = preprocess_question(question)
|
| 543 |
context_parts = []
|
| 544 |
+
|
| 545 |
+
# 1. Check for attached files
|
| 546 |
file_content, file_type = fetch_task_file(task_id)
|
| 547 |
if file_content and file_type != "none":
|
| 548 |
+
context_parts.append(f"[ATTACHED FILE - {file_type.upper()}]:\n{file_content}")
|
| 549 |
+
print(f" π Got {file_type} file ({len(file_content)} chars)")
|
| 550 |
+
|
| 551 |
+
# 2. Process YouTube URLs
|
| 552 |
+
yt_urls = re.findall(r'https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)[^\s\)\]]+', processed_q)
|
| 553 |
+
for yt_url in yt_urls[:2]: # Limit to 2 videos
|
| 554 |
+
clean_url = yt_url.rstrip('.,;:')
|
| 555 |
+
print(f" π¬ Fetching transcript: {clean_url}")
|
| 556 |
+
transcript = fetch_youtube_transcript(clean_url)
|
| 557 |
if transcript:
|
| 558 |
+
context_parts.append(f"[YOUTUBE VIDEO TRANSCRIPT]:\n{transcript}")
|
|
|
|
| 559 |
else:
|
| 560 |
+
context_parts.append(f"[YOUTUBE VIDEO]: {clean_url} - Could not fetch transcript. Answer based on your knowledge if possible.")
|
| 561 |
+
|
| 562 |
+
# 3. Process other URLs
|
| 563 |
+
other_urls = re.findall(r'https?://[^\s\)\]]+', processed_q)
|
| 564 |
+
other_urls = [u.rstrip('.,;:') for u in other_urls
|
| 565 |
+
if "youtube.com" not in u and "youtu.be" not in u]
|
| 566 |
+
|
| 567 |
+
for url in other_urls[:2]: # Limit to 2 URLs
|
| 568 |
+
print(f" π Fetching page: {url[:60]}...")
|
| 569 |
+
page_content = fetch_webpage(url)
|
| 570 |
+
if page_content:
|
| 571 |
+
context_parts.append(f"[WEBPAGE: {url}]:\n{page_content}")
|
| 572 |
+
|
| 573 |
+
# 4. Web search for additional context
|
| 574 |
+
# Skip search if we have good file data (Excel/CSV with actual data)
|
| 575 |
+
should_search = True
|
| 576 |
+
if file_type in ["excel", "csv"] and len(file_content) > 500:
|
| 577 |
+
should_search = False # We have data to analyze
|
| 578 |
+
print(" βοΈ Skipping search - using file data")
|
| 579 |
+
|
| 580 |
+
if should_search and not yt_urls:
|
| 581 |
+
# Generate search query
|
| 582 |
+
search_query = processed_q[:200] if len(processed_q) < 200 else processed_q[:200]
|
| 583 |
+
|
| 584 |
+
# Try to extract key terms for search
|
| 585 |
+
query_prompt = ask_groq([
|
| 586 |
+
{"role": "system", "content": "Extract the key search terms from this question. Output ONLY the search query (3-8 words), nothing else."},
|
| 587 |
+
{"role": "user", "content": processed_q[:400]}
|
| 588 |
+
], groq_key, max_tokens=30, temperature=0.0)
|
| 589 |
+
|
| 590 |
+
if query_prompt and len(query_prompt) < 100 and len(query_prompt) > 3:
|
| 591 |
+
search_query = query_prompt
|
| 592 |
+
|
| 593 |
+
print(f" π Searching: '{search_query[:50]}'")
|
| 594 |
+
|
| 595 |
+
# Try web search
|
| 596 |
results = web_search(search_query, max_results=5)
|
| 597 |
+
|
| 598 |
if results:
|
| 599 |
# Add search snippets
|
| 600 |
+
snippets = "\n".join([f"β’ {r.get('title', '')}: {r.get('body', '')}" for r in results])
|
| 601 |
+
context_parts.append(f"[WEB SEARCH RESULTS]:\n{snippets}")
|
| 602 |
+
|
| 603 |
+
# Fetch top result pages
|
| 604 |
+
fetched_count = 0
|
| 605 |
for r in results:
|
| 606 |
+
if fetched_count >= 2:
|
| 607 |
break
|
| 608 |
href = r.get("href", "")
|
| 609 |
+
if href and "youtube.com" not in href and "youtu.be" not in href:
|
| 610 |
page = fetch_webpage(href)
|
| 611 |
+
if page and len(page) > 300:
|
| 612 |
+
context_parts.append(f"[PAGE - {r.get('title', 'Unknown')}]:\n{page[:4000]}")
|
| 613 |
+
fetched_count += 1
|
| 614 |
+
|
| 615 |
+
# Also try Wikipedia for factual questions
|
| 616 |
+
wiki_content = search_wikipedia(search_query)
|
| 617 |
+
if wiki_content:
|
| 618 |
+
context_parts.append(f"[WIKIPEDIA]:\n{wiki_content}")
|
| 619 |
+
|
| 620 |
+
# 5. Build context and query LLM
|
| 621 |
+
context = "\n\n".join(context_parts) if context_parts else ""
|
| 622 |
+
|
| 623 |
+
# Truncate context if too long
|
| 624 |
+
if len(context) > 14000:
|
| 625 |
+
context = context[:14000] + "\n[...truncated for length]"
|
| 626 |
+
|
| 627 |
+
# First attempt with context
|
| 628 |
+
if context:
|
| 629 |
+
messages = [
|
| 630 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 631 |
+
{"role": "user", "content": f"CONTEXT:\n{context}\n\n---\nQUESTION: {processed_q}\n\nProvide ONLY the final answer:"}
|
| 632 |
+
]
|
| 633 |
+
else:
|
| 634 |
+
messages = [
|
| 635 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 636 |
+
{"role": "user", "content": f"QUESTION: {processed_q}\n\nProvide ONLY the final answer:"}
|
| 637 |
+
]
|
| 638 |
+
|
| 639 |
+
answer_raw = ask_groq(messages, groq_key, max_tokens=400, temperature=0.1)
|
| 640 |
+
answer = clean_answer(answer_raw)
|
| 641 |
+
|
| 642 |
+
# If answer isn't valid, try again with different approach
|
| 643 |
+
if not is_valid_answer(answer):
|
| 644 |
+
print(f" β οΈ First attempt invalid: '{answer}', retrying...")
|
| 645 |
+
|
| 646 |
+
# Try with just the question and knowledge
|
| 647 |
+
retry_messages = [
|
| 648 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 649 |
+
{"role": "user", "content": f"Using your knowledge, answer this question with ONLY the final answer:\n\n{processed_q}"}
|
| 650 |
+
]
|
| 651 |
+
answer_raw = ask_groq(retry_messages, groq_key, max_tokens=400, temperature=0.2)
|
| 652 |
+
answer = clean_answer(answer_raw)
|
| 653 |
+
|
| 654 |
+
# Final validation
|
| 655 |
+
if not is_valid_answer(answer):
|
| 656 |
answer = "I don't know"
|
| 657 |
+
|
| 658 |
+
print(f" β
Answer: {answer}")
|
| 659 |
return answer
|
| 660 |
|
| 661 |
|
| 662 |
# ==========================================
|
| 663 |
+
# GRADIO INTERFACE
|
| 664 |
# ==========================================
|
| 665 |
|
| 666 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 667 |
+
"""Run the agent on all questions and submit answers."""
|
| 668 |
+
space_id = os.getenv("SPACE_ID", "")
|
| 669 |
+
|
| 670 |
if not profile:
|
| 671 |
+
return "Effettua il login con Hugging Face per continuare.", None
|
| 672 |
+
|
| 673 |
username = profile.username
|
| 674 |
groq_key = os.getenv("GROQ_API_KEY", "")
|
| 675 |
+
|
| 676 |
if not groq_key:
|
| 677 |
+
return "β GROQ_API_KEY non configurata! Aggiungi la chiave nelle impostazioni dello Space.", None
|
| 678 |
+
|
| 679 |
+
print(f"\n{'='*60}")
|
| 680 |
+
print(f"π€ User: {username}")
|
| 681 |
+
print(f"π€ Agent: GAIA Agent v4")
|
| 682 |
+
print(f"{'='*60}")
|
| 683 |
+
|
| 684 |
+
# Fetch questions
|
| 685 |
try:
|
| 686 |
+
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=20)
|
| 687 |
resp.raise_for_status()
|
| 688 |
questions = resp.json()
|
| 689 |
except Exception as e:
|
| 690 |
+
return f"β Errore nel recupero delle domande: {e}", None
|
| 691 |
+
|
| 692 |
+
print(f"\nπ {len(questions)} domande da processare\n")
|
| 693 |
+
|
| 694 |
results = []
|
| 695 |
answers = []
|
| 696 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else ""
|
| 697 |
+
|
| 698 |
for i, item in enumerate(questions):
|
| 699 |
task_id = item.get("task_id", "")
|
| 700 |
q = item.get("question")
|
| 701 |
+
|
| 702 |
if not task_id or q is None:
|
| 703 |
+
print(f"[{i+1}] Skipping invalid item")
|
| 704 |
continue
|
| 705 |
+
|
| 706 |
+
print(f"\n{'β'*60}")
|
| 707 |
+
print(f"[{i+1}/{len(questions)}] Task: {task_id[:20]}...")
|
| 708 |
+
|
| 709 |
try:
|
| 710 |
+
answer = solve_question(q, task_id, groq_key)
|
| 711 |
except Exception as e:
|
| 712 |
+
print(f" π₯ Exception: {e}")
|
| 713 |
+
traceback.print_exc()
|
| 714 |
+
answer = "I don't know"
|
| 715 |
+
|
| 716 |
+
answers.append({
|
| 717 |
+
"task_id": task_id,
|
| 718 |
+
"submitted_answer": answer
|
| 719 |
+
})
|
| 720 |
+
results.append({
|
| 721 |
+
"Task ID": task_id[:20] + "...",
|
| 722 |
+
"Question": q[:80] + ("..." if len(q) > 80 else ""),
|
| 723 |
+
"Answer": answer
|
| 724 |
+
})
|
| 725 |
+
|
| 726 |
+
# Rate limit protection
|
| 727 |
+
time.sleep(1.5)
|
| 728 |
+
|
| 729 |
if not answers:
|
| 730 |
+
return "β Nessuna risposta generata.", pd.DataFrame(results)
|
| 731 |
+
|
| 732 |
+
# Submit answers
|
| 733 |
+
print(f"\n{'='*60}")
|
| 734 |
+
print(f"π€ Submitting {len(answers)} answers...")
|
| 735 |
+
|
| 736 |
try:
|
| 737 |
+
submit_resp = requests.post(
|
| 738 |
f"{DEFAULT_API_URL}/submit",
|
| 739 |
+
json={
|
| 740 |
+
"username": username,
|
| 741 |
+
"agent_code": agent_code,
|
| 742 |
+
"answers": answers
|
| 743 |
+
},
|
| 744 |
timeout=60,
|
| 745 |
)
|
| 746 |
+
submit_resp.raise_for_status()
|
| 747 |
+
result = submit_resp.json()
|
| 748 |
+
|
| 749 |
+
score = result.get('score', 'N/A')
|
| 750 |
+
correct = result.get('correct_count', '?')
|
| 751 |
+
total = result.get('total_attempted', '?')
|
| 752 |
+
message = result.get('message', '')
|
| 753 |
+
|
| 754 |
+
status = f"""β
Completato!
|
| 755 |
+
π€ {result.get('username')}
|
| 756 |
+
π {score}% ({correct}/{total})
|
| 757 |
+
π {message}"""
|
| 758 |
+
|
| 759 |
print(f"\n{status}")
|
| 760 |
return status, pd.DataFrame(results)
|
| 761 |
+
|
| 762 |
except Exception as e:
|
| 763 |
+
error_msg = f"β Errore nell'invio: {e}"
|
| 764 |
+
print(error_msg)
|
| 765 |
+
return error_msg, pd.DataFrame(results)
|
| 766 |
+
|
| 767 |
+
|
| 768 |
+
def create_demo():
|
| 769 |
+
"""Build and return the Gradio interface."""
|
| 770 |
+
with gr.Blocks(title="GAIA Agent v4") as demo:
|
| 771 |
+
gr.Markdown("""# π GAIA Agent v4
|
| 772 |
+
|
| 773 |
+
**Enhanced agent with better tools and reasoning**
|
| 774 |
+
- Groq Llama 3.3 70B
|
| 775 |
+
- Smart web search + Wikipedia
|
| 776 |
+
- YouTube transcript extraction
|
| 777 |
+
- File parsing (CSV, Excel, PDF, Python)
|
| 778 |
+
""")
|
| 779 |
+
|
| 780 |
+
gr.LoginButton()
|
| 781 |
+
|
| 782 |
+
run_button = gr.Button("π₯ Avvia Valutazione", variant="primary", size="lg")
|
| 783 |
+
|
| 784 |
+
status_output = gr.Textbox(
|
| 785 |
+
label="Risultato",
|
| 786 |
+
lines=6,
|
| 787 |
+
interactive=False
|
| 788 |
+
)
|
| 789 |
+
|
| 790 |
+
results_table = gr.DataFrame(
|
| 791 |
+
label="Risposte",
|
| 792 |
+
wrap=True
|
| 793 |
+
)
|
| 794 |
+
|
| 795 |
+
run_button.click(
|
| 796 |
+
fn=run_and_submit_all,
|
| 797 |
+
outputs=[status_output, results_table]
|
| 798 |
+
)
|
| 799 |
+
|
| 800 |
+
return demo
|
| 801 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 802 |
|
| 803 |
if __name__ == "__main__":
|
| 804 |
+
demo = create_demo()
|
| 805 |
+
demo.queue(default_concurrency_limit=1).launch(debug=True, share=False)
|