Update app.py
Browse files
app.py
CHANGED
|
@@ -2,24 +2,26 @@ import os
|
|
| 2 |
import re
|
| 3 |
import io
|
| 4 |
import json
|
|
|
|
| 5 |
import traceback
|
| 6 |
import gradio as gr
|
| 7 |
import requests
|
| 8 |
import pandas as pd
|
| 9 |
from bs4 import BeautifulSoup
|
| 10 |
-
from smolagents import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# --- Constants ---
|
| 13 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 14 |
|
| 15 |
-
#
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
| 20 |
-
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
| 21 |
-
"HuggingFaceH4/zephyr-7b-beta",
|
| 22 |
-
]
|
| 23 |
|
| 24 |
|
| 25 |
# ==========================================
|
|
@@ -41,9 +43,7 @@ def visit_webpage(url: str) -> str:
|
|
| 41 |
soup = BeautifulSoup(response.text, "html.parser")
|
| 42 |
for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
|
| 43 |
el.extract()
|
| 44 |
-
|
| 45 |
-
# Pulizia extra
|
| 46 |
-
lines = [l.strip() for l in text.splitlines() if l.strip()]
|
| 47 |
return "\n".join(lines)[:15000]
|
| 48 |
except Exception as e:
|
| 49 |
return f"Error fetching {url}: {str(e)}"
|
|
@@ -87,8 +87,8 @@ def get_youtube_transcript(video_url: str) -> str:
|
|
| 87 |
@tool
|
| 88 |
def download_task_file(task_id: str) -> str:
|
| 89 |
"""Downloads and reads the file attached to a GAIA task.
|
| 90 |
-
Handles text, CSV, JSON, PDF, Excel (.xlsx/.xls),
|
| 91 |
-
Always try this if the question might reference an attached
|
| 92 |
Args:
|
| 93 |
task_id: The task_id string from the GAIA question.
|
| 94 |
"""
|
|
@@ -101,7 +101,6 @@ def download_task_file(task_id: str) -> str:
|
|
| 101 |
ct = response.headers.get("Content-Type", "")
|
| 102 |
cd = response.headers.get("Content-Disposition", "")
|
| 103 |
|
| 104 |
-
# Detect filename from Content-Disposition
|
| 105 |
filename = ""
|
| 106 |
if "filename=" in cd:
|
| 107 |
filename = cd.split("filename=")[-1].strip('" ')
|
|
@@ -109,13 +108,13 @@ def download_task_file(task_id: str) -> str:
|
|
| 109 |
|
| 110 |
print(f" [FILE] type={ct}, name={filename}, ext={ext}, size={len(response.content)}")
|
| 111 |
|
| 112 |
-
# --- TEXT / CSV / JSON ---
|
| 113 |
if any(t in ct for t in ["text", "json", "csv"]) or ext in ["txt", "csv", "json", "py", "md"]:
|
| 114 |
text = response.text
|
| 115 |
if ext == "csv" or "csv" in ct:
|
| 116 |
try:
|
| 117 |
df = pd.read_csv(io.StringIO(text))
|
| 118 |
-
return f"CSV
|
| 119 |
except Exception:
|
| 120 |
pass
|
| 121 |
return text[:12000]
|
|
@@ -124,10 +123,7 @@ def download_task_file(task_id: str) -> str:
|
|
| 124 |
if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
|
| 125 |
try:
|
| 126 |
df = pd.read_excel(io.BytesIO(response.content), engine="openpyxl")
|
| 127 |
-
|
| 128 |
-
summary += f"Data types: {dict(df.dtypes)}\n\n"
|
| 129 |
-
summary += df.to_string()
|
| 130 |
-
return summary[:12000]
|
| 131 |
except Exception as e:
|
| 132 |
return f"Excel file but read error: {e}"
|
| 133 |
|
|
@@ -136,60 +132,32 @@ def download_task_file(task_id: str) -> str:
|
|
| 136 |
try:
|
| 137 |
import PyPDF2
|
| 138 |
reader = PyPDF2.PdfReader(io.BytesIO(response.content))
|
| 139 |
-
|
| 140 |
for i, page in enumerate(reader.pages):
|
| 141 |
t = page.extract_text() or ""
|
| 142 |
-
|
| 143 |
-
return "\n".join(
|
| 144 |
except Exception as e:
|
| 145 |
return f"PDF attached but read error: {e}"
|
| 146 |
|
| 147 |
-
# --- AUDIO
|
| 148 |
if "audio" in ct or ext in ["mp3", "wav", "m4a", "ogg"]:
|
| 149 |
-
return f"Audio file attached ({ct}, {len(response.content)} bytes). Cannot transcribe
|
| 150 |
|
| 151 |
# --- IMAGE ---
|
| 152 |
if "image" in ct or ext in ["png", "jpg", "jpeg", "gif", "webp"]:
|
| 153 |
return f"Image file attached ({ct}, {len(response.content)} bytes)."
|
| 154 |
|
| 155 |
-
# --- Fallback
|
| 156 |
try:
|
| 157 |
-
|
| 158 |
-
return decoded[:12000]
|
| 159 |
except Exception:
|
| 160 |
-
return f"Binary file ({ct}, {len(response.content)} bytes).
|
| 161 |
|
| 162 |
except Exception as e:
|
| 163 |
return f"File download error: {str(e)}"
|
| 164 |
|
| 165 |
|
| 166 |
-
# ==========================================
|
| 167 |
-
# 🧮 TOOL 4: PYTHON EVAL SICURO
|
| 168 |
-
# ==========================================
|
| 169 |
-
@tool
|
| 170 |
-
def python_compute(code: str) -> str:
|
| 171 |
-
"""Executes a Python expression or short script and returns the result.
|
| 172 |
-
Use for math calculations, string manipulation, date computations, etc.
|
| 173 |
-
Args:
|
| 174 |
-
code: A Python expression or short script. Use print() for output.
|
| 175 |
-
"""
|
| 176 |
-
try:
|
| 177 |
-
# Prova prima come espressione
|
| 178 |
-
result = eval(code)
|
| 179 |
-
return str(result)
|
| 180 |
-
except SyntaxError:
|
| 181 |
-
# Se è uno statement, eseguilo e cattura stdout
|
| 182 |
-
import contextlib
|
| 183 |
-
import sys
|
| 184 |
-
f = io.StringIO()
|
| 185 |
-
with contextlib.redirect_stdout(f):
|
| 186 |
-
exec(code)
|
| 187 |
-
output = f.getvalue().strip()
|
| 188 |
-
return output if output else "Code executed (no output)"
|
| 189 |
-
except Exception as e:
|
| 190 |
-
return f"Error: {str(e)}"
|
| 191 |
-
|
| 192 |
-
|
| 193 |
# ==========================================
|
| 194 |
# 🔍 PRE-PROCESSING
|
| 195 |
# ==========================================
|
|
@@ -197,79 +165,15 @@ def preprocess_question(question: str) -> str:
|
|
| 197 |
"""Detect reversed text and fix it."""
|
| 198 |
stripped = question.strip()
|
| 199 |
reversed_q = stripped[::-1]
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
if keywords_present_reversed > keywords_present_original and len(stripped) > 20:
|
| 206 |
-
print(f" [PRE-PROCESS] Reversed text detected! Using reversed version.")
|
| 207 |
return reversed_q
|
| 208 |
-
|
| 209 |
return question
|
| 210 |
|
| 211 |
|
| 212 |
-
# ==========================================
|
| 213 |
-
# 🔄 CHIAMATA DIRETTA HF INFERENCE API
|
| 214 |
-
# ==========================================
|
| 215 |
-
def call_hf_direct(question: str, task_context: str = "") -> str:
|
| 216 |
-
"""Fallback: chiama HF Inference API direttamente senza smolagents."""
|
| 217 |
-
|
| 218 |
-
prompt = f"""You are answering a question from the GAIA benchmark.
|
| 219 |
-
Give ONLY the final answer — no explanation, no preamble, no "The answer is".
|
| 220 |
-
|
| 221 |
-
Rules:
|
| 222 |
-
- For numbers: just digits (e.g., 42)
|
| 223 |
-
- For names: just the name (e.g., Einstein)
|
| 224 |
-
- For lists: comma-separated (e.g., apple, banana, cherry)
|
| 225 |
-
- No period at the end unless part of the answer
|
| 226 |
-
- If text seems reversed, reverse it first
|
| 227 |
-
|
| 228 |
-
{task_context}
|
| 229 |
-
|
| 230 |
-
Question: {question}
|
| 231 |
-
|
| 232 |
-
Answer:"""
|
| 233 |
-
|
| 234 |
-
hf_token = os.getenv("HF_TOKEN", "")
|
| 235 |
-
headers = {"Content-Type": "application/json"}
|
| 236 |
-
if hf_token:
|
| 237 |
-
headers["Authorization"] = f"Bearer {hf_token}"
|
| 238 |
-
|
| 239 |
-
for model in MODEL_CANDIDATES:
|
| 240 |
-
try:
|
| 241 |
-
api_url = f"https://api-inference.huggingface.co/models/{model}"
|
| 242 |
-
payload = {
|
| 243 |
-
"inputs": prompt,
|
| 244 |
-
"parameters": {
|
| 245 |
-
"max_new_tokens": 150,
|
| 246 |
-
"temperature": 0.1,
|
| 247 |
-
"return_full_text": False,
|
| 248 |
-
},
|
| 249 |
-
}
|
| 250 |
-
resp = requests.post(api_url, headers=headers, json=payload, timeout=45)
|
| 251 |
-
|
| 252 |
-
if resp.status_code == 200:
|
| 253 |
-
data = resp.json()
|
| 254 |
-
if isinstance(data, list) and len(data) > 0:
|
| 255 |
-
raw = data[0].get("generated_text", "").strip()
|
| 256 |
-
if raw:
|
| 257 |
-
answer = clean_answer(raw)
|
| 258 |
-
if answer and answer.lower() not in [
|
| 259 |
-
"i don't know", "unknown", "n/a", "none", "error", "",
|
| 260 |
-
]:
|
| 261 |
-
print(f" [FALLBACK OK via {model}]: {answer[:100]}")
|
| 262 |
-
return answer
|
| 263 |
-
else:
|
| 264 |
-
print(f" [FALLBACK {model}] HTTP {resp.status_code}")
|
| 265 |
-
|
| 266 |
-
except Exception as e:
|
| 267 |
-
print(f" [FALLBACK {model} ERROR]: {e}")
|
| 268 |
-
continue
|
| 269 |
-
|
| 270 |
-
return "I don't know"
|
| 271 |
-
|
| 272 |
-
|
| 273 |
# ==========================================
|
| 274 |
# 🧹 PULIZIA RISPOSTA
|
| 275 |
# ==========================================
|
|
@@ -277,12 +181,12 @@ def clean_answer(raw: str) -> str:
|
|
| 277 |
"""Pulisci la risposta grezza dall'agente."""
|
| 278 |
answer = str(raw).strip()
|
| 279 |
|
| 280 |
-
#
|
| 281 |
lines = [l.strip() for l in answer.split("\n") if l.strip()]
|
| 282 |
if lines:
|
| 283 |
answer = lines[0]
|
| 284 |
|
| 285 |
-
# Rimuovi prefissi
|
| 286 |
prefixes = [
|
| 287 |
"the answer is:", "the answer is", "final answer:", "final answer is:",
|
| 288 |
"final answer is", "answer:", "answer is:", "answer is",
|
|
@@ -298,19 +202,17 @@ def clean_answer(raw: str) -> str:
|
|
| 298 |
if lower.startswith(prefix):
|
| 299 |
answer = answer[len(prefix):].strip()
|
| 300 |
lower = answer.lower()
|
| 301 |
-
|
| 302 |
-
if answer.startswith('"') or answer.startswith("'"):
|
| 303 |
answer = answer[1:]
|
| 304 |
break
|
| 305 |
|
| 306 |
-
# Rimuovi punto finale (
|
| 307 |
if answer.endswith(".") and not re.search(r"\d\.$", answer):
|
| 308 |
answer = answer[:-1].strip()
|
| 309 |
|
| 310 |
-
#
|
| 311 |
answer = answer.replace("**", "").strip('"').strip("'").strip("`").strip()
|
| 312 |
|
| 313 |
-
# Se la risposta inizia con "is " (residuo), rimuovilo
|
| 314 |
if answer.lower().startswith("is "):
|
| 315 |
answer = answer[3:].strip()
|
| 316 |
|
|
@@ -318,220 +220,234 @@ def clean_answer(raw: str) -> str:
|
|
| 318 |
|
| 319 |
|
| 320 |
# ==========================================
|
| 321 |
-
#
|
| 322 |
# ==========================================
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
|
| 329 |
-
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
)
|
| 341 |
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
python_compute,
|
| 349 |
-
],
|
| 350 |
-
model=model,
|
| 351 |
-
max_steps=6,
|
| 352 |
-
additional_authorized_imports=[
|
| 353 |
-
"requests", "bs4", "json", "time", "math", "datetime",
|
| 354 |
-
"pandas", "numpy", "re", "csv", "urllib", "collections",
|
| 355 |
-
"itertools", "string", "unicodedata", "statistics",
|
| 356 |
-
],
|
| 357 |
-
)
|
| 358 |
-
print(f" ✅ Agent inizializzato con {model_id}")
|
| 359 |
-
break
|
| 360 |
-
except Exception as e:
|
| 361 |
-
print(f" ❌ {model_id} fallito: {e}")
|
| 362 |
-
continue
|
| 363 |
|
| 364 |
-
if
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
if task_id:
|
| 371 |
-
file_hint = f'\nThis question has task_id="{task_id}". Call download_task_file("{task_id}") to check for attached files.'
|
| 372 |
|
| 373 |
-
extra_context = ""
|
| 374 |
-
if file_context:
|
| 375 |
-
extra_context = f"\n\nFILE CONTENT:\n{file_context}\n"
|
| 376 |
|
| 377 |
-
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
def __call__(self, question: str, task_id: str = "") -> str:
|
| 398 |
print(f"\n{'─'*60}")
|
| 399 |
-
print(f"[Q]: {question[:150]}
|
| 400 |
print(f"[TASK]: {task_id}")
|
| 401 |
|
| 402 |
-
# 1. Pre-process (reversed text detection)
|
| 403 |
processed = preprocess_question(question)
|
|
|
|
|
|
|
|
|
|
| 404 |
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
fc = download_task_file.__wrapped__(task_id) if hasattr(download_task_file, '__wrapped__') else ""
|
| 410 |
-
if fc and "No file" not in fc and "error" not in fc.lower():
|
| 411 |
-
file_context = fc
|
| 412 |
-
print(f" [FILE PRE-FETCH]: {len(file_context)} chars")
|
| 413 |
-
except Exception:
|
| 414 |
-
# Smolagents tool wrapper, proviamo direttamente
|
| 415 |
-
try:
|
| 416 |
-
file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
| 417 |
-
resp = requests.get(file_url, timeout=15)
|
| 418 |
-
if resp.status_code == 200:
|
| 419 |
-
ct = resp.headers.get("Content-Type", "")
|
| 420 |
-
cd = resp.headers.get("Content-Disposition", "")
|
| 421 |
-
filename = ""
|
| 422 |
-
if "filename=" in cd:
|
| 423 |
-
filename = cd.split("filename=")[-1].strip('" ')
|
| 424 |
-
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
|
| 425 |
-
|
| 426 |
-
if any(t in ct for t in ["text", "json", "csv"]) or ext in ["txt", "csv", "json", "py"]:
|
| 427 |
-
file_context = resp.text[:8000]
|
| 428 |
-
elif "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
|
| 429 |
-
try:
|
| 430 |
-
df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
|
| 431 |
-
file_context = f"Excel: {len(df)} rows, cols={list(df.columns)}\n{df.to_string()}"[:8000]
|
| 432 |
-
except Exception:
|
| 433 |
-
pass
|
| 434 |
-
elif "pdf" in ct or ext == "pdf":
|
| 435 |
-
try:
|
| 436 |
-
import PyPDF2
|
| 437 |
-
reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
|
| 438 |
-
file_context = "\n".join(
|
| 439 |
-
[p.extract_text() or "" for p in reader.pages]
|
| 440 |
-
)[:8000]
|
| 441 |
-
except Exception:
|
| 442 |
-
pass
|
| 443 |
-
print(f" [FILE PRE-FETCH direct]: {len(file_context)} chars")
|
| 444 |
-
except Exception as e:
|
| 445 |
-
print(f" [FILE PRE-FETCH failed]: {e}")
|
| 446 |
-
|
| 447 |
-
# 3. Detect special question types and handle directly
|
| 448 |
-
answer = self._handle_special_cases(processed, task_id, file_context)
|
| 449 |
-
if answer:
|
| 450 |
-
print(f" [SPECIAL CASE]: {answer}")
|
| 451 |
-
return answer
|
| 452 |
-
|
| 453 |
-
# 4. Tentativo con agente smolagents
|
| 454 |
-
if self.agent:
|
| 455 |
try:
|
| 456 |
-
|
| 457 |
raw = self.agent.run(prompt)
|
| 458 |
answer = clean_answer(str(raw))
|
| 459 |
-
if self.
|
| 460 |
-
print(f" [✅ AGENT]: {answer}")
|
| 461 |
return answer
|
| 462 |
-
print(f" [⚠️
|
| 463 |
except Exception as e:
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
| 470 |
if file_context:
|
| 471 |
-
|
| 472 |
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
|
| 477 |
-
def
|
| 478 |
-
|
| 479 |
-
if not answer:
|
| 480 |
return False
|
| 481 |
-
invalid =
|
| 482 |
"i don't know", "unknown", "n/a", "none", "error",
|
| 483 |
"i cannot", "i can't", "not available", "no answer",
|
| 484 |
-
"could not", "unable to", "i'm not sure",
|
| 485 |
-
|
| 486 |
return answer.lower().strip() not in invalid
|
| 487 |
|
| 488 |
-
def
|
| 489 |
-
"""
|
| 490 |
-
|
|
|
|
|
|
|
| 491 |
|
| 492 |
-
|
| 493 |
-
if file_context
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
try:
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
return str(int(val))
|
| 525 |
-
return f"${val:,.2f}" if val > 100 else str(val)
|
| 526 |
-
# Altrimenti somma la prima colonna numerica
|
| 527 |
-
val = list(totals.values())[0]
|
| 528 |
-
if val == int(val):
|
| 529 |
-
return str(int(val))
|
| 530 |
-
return str(val)
|
| 531 |
except Exception as e:
|
| 532 |
-
print(f"
|
| 533 |
|
| 534 |
-
return ""
|
| 535 |
|
| 536 |
|
| 537 |
# ==========================================
|
|
@@ -549,7 +465,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 549 |
print(f"{'='*60}")
|
| 550 |
|
| 551 |
try:
|
| 552 |
-
agent =
|
| 553 |
except Exception as e:
|
| 554 |
traceback.print_exc()
|
| 555 |
return f"Errore inizializzazione agente: {e}", None
|
|
@@ -575,7 +491,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 575 |
if not task_id or question_text is None:
|
| 576 |
continue
|
| 577 |
|
| 578 |
-
print(f"\n[{i+1}/{len(questions_data)}]
|
| 579 |
try:
|
| 580 |
answer = agent(question_text, task_id=task_id)
|
| 581 |
except Exception as e:
|
|
@@ -622,13 +538,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 622 |
|
| 623 |
|
| 624 |
# ==========================================
|
| 625 |
-
# 🖥️ INTERFACCIA
|
| 626 |
# ==========================================
|
| 627 |
with gr.Blocks() as demo:
|
| 628 |
-
gr.Markdown("# 🚀
|
| 629 |
gr.Markdown(
|
| 630 |
-
"
|
| 631 |
-
"
|
| 632 |
)
|
| 633 |
gr.LoginButton()
|
| 634 |
run_button = gr.Button("🔥 Avvia Valutazione & Invia Risposte", variant="primary")
|
|
|
|
| 2 |
import re
|
| 3 |
import io
|
| 4 |
import json
|
| 5 |
+
import time
|
| 6 |
import traceback
|
| 7 |
import gradio as gr
|
| 8 |
import requests
|
| 9 |
import pandas as pd
|
| 10 |
from bs4 import BeautifulSoup
|
| 11 |
+
from smolagents import (
|
| 12 |
+
CodeAgent,
|
| 13 |
+
DuckDuckGoSearchTool,
|
| 14 |
+
LiteLLMModel,
|
| 15 |
+
tool,
|
| 16 |
+
)
|
| 17 |
|
| 18 |
# --- Constants ---
|
| 19 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 20 |
|
| 21 |
+
# Groq è GRATIS, velocissimo, e ha modelli 70B
|
| 22 |
+
# Registrati su console.groq.com e metti GROQ_API_KEY nei secrets
|
| 23 |
+
GROQ_MODEL = "groq/llama-3.3-70b-versatile"
|
| 24 |
+
GROQ_SMALL = "groq/llama-3.1-8b-instant"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
# ==========================================
|
|
|
|
| 43 |
soup = BeautifulSoup(response.text, "html.parser")
|
| 44 |
for el in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
|
| 45 |
el.extract()
|
| 46 |
+
lines = [l.strip() for l in soup.get_text(separator="\n", strip=True).splitlines() if l.strip()]
|
|
|
|
|
|
|
| 47 |
return "\n".join(lines)[:15000]
|
| 48 |
except Exception as e:
|
| 49 |
return f"Error fetching {url}: {str(e)}"
|
|
|
|
| 87 |
@tool
|
| 88 |
def download_task_file(task_id: str) -> str:
|
| 89 |
"""Downloads and reads the file attached to a GAIA task.
|
| 90 |
+
Handles text, CSV, JSON, PDF, Excel (.xlsx/.xls), and Python files.
|
| 91 |
+
Always try this tool first if the question might reference an attached file.
|
| 92 |
Args:
|
| 93 |
task_id: The task_id string from the GAIA question.
|
| 94 |
"""
|
|
|
|
| 101 |
ct = response.headers.get("Content-Type", "")
|
| 102 |
cd = response.headers.get("Content-Disposition", "")
|
| 103 |
|
|
|
|
| 104 |
filename = ""
|
| 105 |
if "filename=" in cd:
|
| 106 |
filename = cd.split("filename=")[-1].strip('" ')
|
|
|
|
| 108 |
|
| 109 |
print(f" [FILE] type={ct}, name={filename}, ext={ext}, size={len(response.content)}")
|
| 110 |
|
| 111 |
+
# --- TEXT / CSV / JSON / PY ---
|
| 112 |
if any(t in ct for t in ["text", "json", "csv"]) or ext in ["txt", "csv", "json", "py", "md"]:
|
| 113 |
text = response.text
|
| 114 |
if ext == "csv" or "csv" in ct:
|
| 115 |
try:
|
| 116 |
df = pd.read_csv(io.StringIO(text))
|
| 117 |
+
return f"CSV with {len(df)} rows, columns: {list(df.columns)}\n\n{df.to_string()}"[:12000]
|
| 118 |
except Exception:
|
| 119 |
pass
|
| 120 |
return text[:12000]
|
|
|
|
| 123 |
if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
|
| 124 |
try:
|
| 125 |
df = pd.read_excel(io.BytesIO(response.content), engine="openpyxl")
|
| 126 |
+
return f"Excel with {len(df)} rows, columns: {list(df.columns)}\n\n{df.to_string()}"[:12000]
|
|
|
|
|
|
|
|
|
|
| 127 |
except Exception as e:
|
| 128 |
return f"Excel file but read error: {e}"
|
| 129 |
|
|
|
|
| 132 |
try:
|
| 133 |
import PyPDF2
|
| 134 |
reader = PyPDF2.PdfReader(io.BytesIO(response.content))
|
| 135 |
+
pages = []
|
| 136 |
for i, page in enumerate(reader.pages):
|
| 137 |
t = page.extract_text() or ""
|
| 138 |
+
pages.append(f"[Page {i+1}] {t}")
|
| 139 |
+
return "\n".join(pages)[:12000]
|
| 140 |
except Exception as e:
|
| 141 |
return f"PDF attached but read error: {e}"
|
| 142 |
|
| 143 |
+
# --- AUDIO ---
|
| 144 |
if "audio" in ct or ext in ["mp3", "wav", "m4a", "ogg"]:
|
| 145 |
+
return f"Audio file attached ({ct}, {len(response.content)} bytes). Cannot transcribe in this environment."
|
| 146 |
|
| 147 |
# --- IMAGE ---
|
| 148 |
if "image" in ct or ext in ["png", "jpg", "jpeg", "gif", "webp"]:
|
| 149 |
return f"Image file attached ({ct}, {len(response.content)} bytes)."
|
| 150 |
|
| 151 |
+
# --- Fallback ---
|
| 152 |
try:
|
| 153 |
+
return response.content.decode("utf-8")[:12000]
|
|
|
|
| 154 |
except Exception:
|
| 155 |
+
return f"Binary file ({ct}, {len(response.content)} bytes)."
|
| 156 |
|
| 157 |
except Exception as e:
|
| 158 |
return f"File download error: {str(e)}"
|
| 159 |
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
# ==========================================
|
| 162 |
# 🔍 PRE-PROCESSING
|
| 163 |
# ==========================================
|
|
|
|
| 165 |
"""Detect reversed text and fix it."""
|
| 166 |
stripped = question.strip()
|
| 167 |
reversed_q = stripped[::-1]
|
| 168 |
+
keywords = ["answer", "what", "who", "how", "find", "list", "which", "where", "when", "the"]
|
| 169 |
+
score_orig = sum(1 for w in keywords if w in stripped.lower())
|
| 170 |
+
score_rev = sum(1 for w in keywords if w in reversed_q.lower())
|
| 171 |
+
if score_rev > score_orig and len(stripped) > 20:
|
| 172 |
+
print(f" [PRE-PROCESS] Reversed text detected!")
|
|
|
|
|
|
|
| 173 |
return reversed_q
|
|
|
|
| 174 |
return question
|
| 175 |
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
# ==========================================
|
| 178 |
# 🧹 PULIZIA RISPOSTA
|
| 179 |
# ==========================================
|
|
|
|
| 181 |
"""Pulisci la risposta grezza dall'agente."""
|
| 182 |
answer = str(raw).strip()
|
| 183 |
|
| 184 |
+
# Prima riga non vuota
|
| 185 |
lines = [l.strip() for l in answer.split("\n") if l.strip()]
|
| 186 |
if lines:
|
| 187 |
answer = lines[0]
|
| 188 |
|
| 189 |
+
# Rimuovi prefissi
|
| 190 |
prefixes = [
|
| 191 |
"the answer is:", "the answer is", "final answer:", "final answer is:",
|
| 192 |
"final answer is", "answer:", "answer is:", "answer is",
|
|
|
|
| 202 |
if lower.startswith(prefix):
|
| 203 |
answer = answer[len(prefix):].strip()
|
| 204 |
lower = answer.lower()
|
| 205 |
+
if answer and answer[0] in '"\'':
|
|
|
|
| 206 |
answer = answer[1:]
|
| 207 |
break
|
| 208 |
|
| 209 |
+
# Rimuovi punto finale (non se decimale)
|
| 210 |
if answer.endswith(".") and not re.search(r"\d\.$", answer):
|
| 211 |
answer = answer[:-1].strip()
|
| 212 |
|
| 213 |
+
# Pulizia markdown
|
| 214 |
answer = answer.replace("**", "").strip('"').strip("'").strip("`").strip()
|
| 215 |
|
|
|
|
| 216 |
if answer.lower().startswith("is "):
|
| 217 |
answer = answer[3:].strip()
|
| 218 |
|
|
|
|
| 220 |
|
| 221 |
|
| 222 |
# ==========================================
|
| 223 |
+
# 📁 PRE-FETCH FILE
|
| 224 |
# ==========================================
|
| 225 |
+
def prefetch_file(task_id: str) -> str:
|
| 226 |
+
"""Scarica il file prima di chiamare l'agente per avere contesto."""
|
| 227 |
+
if not task_id:
|
| 228 |
+
return ""
|
| 229 |
+
try:
|
| 230 |
+
file_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
|
| 231 |
+
resp = requests.get(file_url, timeout=15)
|
| 232 |
+
if resp.status_code != 200:
|
| 233 |
+
return ""
|
| 234 |
|
| 235 |
+
ct = resp.headers.get("Content-Type", "")
|
| 236 |
+
cd = resp.headers.get("Content-Disposition", "")
|
| 237 |
+
filename = ""
|
| 238 |
+
if "filename=" in cd:
|
| 239 |
+
filename = cd.split("filename=")[-1].strip('" ')
|
| 240 |
+
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
|
| 241 |
|
| 242 |
+
if any(t in ct for t in ["text", "json", "csv"]) or ext in ["txt", "csv", "json", "py"]:
|
| 243 |
+
if ext == "csv" or "csv" in ct:
|
| 244 |
+
try:
|
| 245 |
+
df = pd.read_csv(io.StringIO(resp.text))
|
| 246 |
+
return f"CSV with {len(df)} rows, columns: {list(df.columns)}\n{df.to_string()}"[:8000]
|
| 247 |
+
except Exception:
|
| 248 |
+
pass
|
| 249 |
+
return resp.text[:8000]
|
|
|
|
| 250 |
|
| 251 |
+
if "spreadsheet" in ct or "excel" in ct or ext in ["xlsx", "xls"]:
|
| 252 |
+
try:
|
| 253 |
+
df = pd.read_excel(io.BytesIO(resp.content), engine="openpyxl")
|
| 254 |
+
return f"Excel with {len(df)} rows, columns: {list(df.columns)}\n{df.to_string()}"[:8000]
|
| 255 |
+
except Exception:
|
| 256 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
+
if "pdf" in ct or ext == "pdf":
|
| 259 |
+
try:
|
| 260 |
+
import PyPDF2
|
| 261 |
+
reader = PyPDF2.PdfReader(io.BytesIO(resp.content))
|
| 262 |
+
return "\n".join([p.extract_text() or "" for p in reader.pages])[:8000]
|
| 263 |
+
except Exception:
|
| 264 |
+
pass
|
| 265 |
|
| 266 |
+
return ""
|
| 267 |
+
except Exception:
|
| 268 |
+
return ""
|
|
|
|
|
|
|
| 269 |
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
+
# ==========================================
|
| 272 |
+
# 🤖 AGENTE PRINCIPALE
|
| 273 |
+
# ==========================================
|
| 274 |
+
class GaiaAgent:
|
| 275 |
+
def __init__(self):
|
| 276 |
+
print("=" * 60)
|
| 277 |
+
print("🚀 Inizializzazione GaiaAgent con Groq...")
|
| 278 |
+
print("=" * 60)
|
| 279 |
|
| 280 |
+
groq_key = os.getenv("GROQ_API_KEY", "")
|
| 281 |
+
if not groq_key:
|
| 282 |
+
raise ValueError(
|
| 283 |
+
"❌ GROQ_API_KEY non trovata nei secrets!\n"
|
| 284 |
+
"1. Vai su console.groq.com\n"
|
| 285 |
+
"2. Crea account gratis (no carta)\n"
|
| 286 |
+
"3. Genera API key\n"
|
| 287 |
+
"4. Mettila in Settings → Secrets del tuo HF Space"
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
print(f" GROQ_API_KEY presente: ✅")
|
| 291 |
+
print(f" Modello: {GROQ_MODEL}")
|
| 292 |
+
|
| 293 |
+
self.model = LiteLLMModel(
|
| 294 |
+
model_id=GROQ_MODEL,
|
| 295 |
+
api_key=groq_key,
|
| 296 |
+
temperature=0.1,
|
| 297 |
+
max_tokens=1024,
|
| 298 |
+
)
|
| 299 |
|
| 300 |
+
self.agent = CodeAgent(
|
| 301 |
+
tools=[
|
| 302 |
+
DuckDuckGoSearchTool(),
|
| 303 |
+
visit_webpage,
|
| 304 |
+
get_youtube_transcript,
|
| 305 |
+
download_task_file,
|
| 306 |
+
],
|
| 307 |
+
model=self.model,
|
| 308 |
+
max_steps=8,
|
| 309 |
+
additional_authorized_imports=[
|
| 310 |
+
"requests", "bs4", "json", "time", "math", "datetime",
|
| 311 |
+
"pandas", "numpy", "re", "csv", "urllib", "collections",
|
| 312 |
+
"itertools", "string", "unicodedata", "statistics",
|
| 313 |
+
],
|
| 314 |
+
)
|
| 315 |
+
print(" ✅ Agent pronto!")
|
| 316 |
|
| 317 |
def __call__(self, question: str, task_id: str = "") -> str:
|
| 318 |
print(f"\n{'─'*60}")
|
| 319 |
+
print(f"[Q]: {question[:150]}")
|
| 320 |
print(f"[TASK]: {task_id}")
|
| 321 |
|
|
|
|
| 322 |
processed = preprocess_question(question)
|
| 323 |
+
file_context = prefetch_file(task_id)
|
| 324 |
+
if file_context:
|
| 325 |
+
print(f" [FILE PRE-FETCH]: {len(file_context)} chars")
|
| 326 |
|
| 327 |
+
prompt = self._build_prompt(processed, task_id, file_context)
|
| 328 |
+
|
| 329 |
+
# Run agent con retry
|
| 330 |
+
for attempt in range(2):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
try:
|
| 332 |
+
time.sleep(3) # Rate limit: Groq free = 30 RPM
|
| 333 |
raw = self.agent.run(prompt)
|
| 334 |
answer = clean_answer(str(raw))
|
| 335 |
+
if self._is_valid(answer):
|
| 336 |
+
print(f" [✅ AGENT attempt {attempt+1}]: {answer}")
|
| 337 |
return answer
|
| 338 |
+
print(f" [⚠️ Invalid: '{answer}'] attempt {attempt+1}")
|
| 339 |
except Exception as e:
|
| 340 |
+
err_str = str(e)
|
| 341 |
+
print(f" [⚠️ ERROR attempt {attempt+1}]: {err_str[:200]}")
|
| 342 |
+
if "429" in err_str or "rate" in err_str.lower():
|
| 343 |
+
print(" Waiting 15s for rate limit...")
|
| 344 |
+
time.sleep(15)
|
| 345 |
+
|
| 346 |
+
# Fallback diretto
|
| 347 |
+
print(" [→ FALLBACK DIRETTO]")
|
| 348 |
+
answer = self._direct_fallback(processed, file_context)
|
| 349 |
+
print(f" [FINAL]: {answer}")
|
| 350 |
+
return answer
|
| 351 |
|
| 352 |
+
def _build_prompt(self, question: str, task_id: str, file_context: str) -> str:
|
| 353 |
+
file_hint = ""
|
| 354 |
+
if task_id:
|
| 355 |
+
file_hint = f'\nThis question has task_id="{task_id}". Call download_task_file("{task_id}") to check for attached files.'
|
| 356 |
+
|
| 357 |
+
extra = ""
|
| 358 |
if file_context:
|
| 359 |
+
extra = f"\n\n--- ATTACHED FILE CONTENT ---\n{file_context[:4000]}\n--- END FILE ---\n"
|
| 360 |
|
| 361 |
+
return f"""You are an expert AI assistant solving GAIA benchmark questions.
|
| 362 |
+
Your ONLY goal: find the EXACT correct answer.
|
| 363 |
+
|
| 364 |
+
STRATEGY (follow in order):
|
| 365 |
+
1. If question has a YouTube URL → call get_youtube_transcript(url)
|
| 366 |
+
2. If question has any URL → call visit_webpage(url)
|
| 367 |
+
3. If there might be an attached file → call download_task_file(task_id)
|
| 368 |
+
4. For factual/historical questions → DuckDuckGoSearchTool, then visit_webpage for details
|
| 369 |
+
5. For math/calculations → write and execute Python code directly
|
| 370 |
+
6. If text looks reversed → reverse it: text[::-1]
|
| 371 |
+
7. For Excel/CSV data → use pandas to compute the answer from the data
|
| 372 |
+
|
| 373 |
+
ANSWER FORMAT (CRITICAL):
|
| 374 |
+
- Output ONLY the bare final answer
|
| 375 |
+
- Numbers: just the number (e.g., 3 or 12.5)
|
| 376 |
+
- Names: just the name (e.g., Einstein)
|
| 377 |
+
- Lists: comma-separated (e.g., cat, dog, bird)
|
| 378 |
+
- NEVER say "The answer is..." or "Based on..." — just the raw answer
|
| 379 |
+
- No periods at the end
|
| 380 |
+
{file_hint}{extra}
|
| 381 |
+
Question: {question}"""
|
| 382 |
|
| 383 |
+
def _is_valid(self, answer: str) -> bool:
|
| 384 |
+
if not answer or len(answer) < 1:
|
|
|
|
| 385 |
return False
|
| 386 |
+
invalid = {
|
| 387 |
"i don't know", "unknown", "n/a", "none", "error",
|
| 388 |
"i cannot", "i can't", "not available", "no answer",
|
| 389 |
+
"could not", "unable to", "i'm not sure", "i am not sure",
|
| 390 |
+
}
|
| 391 |
return answer.lower().strip() not in invalid
|
| 392 |
|
| 393 |
+
def _direct_fallback(self, question: str, file_context: str = "") -> str:
|
| 394 |
+
"""Chiamata diretta a Groq senza smolagents."""
|
| 395 |
+
groq_key = os.getenv("GROQ_API_KEY", "")
|
| 396 |
+
if not groq_key:
|
| 397 |
+
return "I don't know"
|
| 398 |
|
| 399 |
+
extra = ""
|
| 400 |
+
if file_context:
|
| 401 |
+
extra = f"\n\nAttached file content:\n{file_context[:3000]}\n"
|
| 402 |
+
|
| 403 |
+
prompt = f"""Answer this question with ONLY the final answer.
|
| 404 |
+
No explanation. No preamble. No "The answer is".
|
| 405 |
+
Just the bare answer.
|
| 406 |
+
|
| 407 |
+
- Numbers: just digits
|
| 408 |
+
- Names: just the name
|
| 409 |
+
- Lists: comma-separated
|
| 410 |
+
- No period at the end
|
| 411 |
+
{extra}
|
| 412 |
+
Question: {question}
|
| 413 |
+
|
| 414 |
+
Answer:"""
|
| 415 |
+
|
| 416 |
+
for model in ["llama-3.3-70b-versatile", "llama-3.1-8b-instant"]:
|
| 417 |
try:
|
| 418 |
+
time.sleep(3)
|
| 419 |
+
resp = requests.post(
|
| 420 |
+
"https://api.groq.com/openai/v1/chat/completions",
|
| 421 |
+
headers={
|
| 422 |
+
"Authorization": f"Bearer {groq_key}",
|
| 423 |
+
"Content-Type": "application/json",
|
| 424 |
+
},
|
| 425 |
+
json={
|
| 426 |
+
"model": model,
|
| 427 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 428 |
+
"temperature": 0.1,
|
| 429 |
+
"max_tokens": 200,
|
| 430 |
+
},
|
| 431 |
+
timeout=30,
|
| 432 |
+
)
|
| 433 |
+
|
| 434 |
+
if resp.status_code == 200:
|
| 435 |
+
data = resp.json()
|
| 436 |
+
raw = data["choices"][0]["message"]["content"].strip()
|
| 437 |
+
answer = clean_answer(raw)
|
| 438 |
+
if self._is_valid(answer):
|
| 439 |
+
print(f" [FALLBACK OK via {model}]: {answer}")
|
| 440 |
+
return answer
|
| 441 |
+
elif resp.status_code == 429:
|
| 442 |
+
print(f" [RATE LIMITED {model}] — waiting 15s...")
|
| 443 |
+
time.sleep(15)
|
| 444 |
+
continue
|
| 445 |
+
else:
|
| 446 |
+
print(f" [FALLBACK {model}] HTTP {resp.status_code}: {resp.text[:200]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
except Exception as e:
|
| 448 |
+
print(f" [FALLBACK {model} ERROR]: {e}")
|
| 449 |
|
| 450 |
+
return "I don't know"
|
| 451 |
|
| 452 |
|
| 453 |
# ==========================================
|
|
|
|
| 465 |
print(f"{'='*60}")
|
| 466 |
|
| 467 |
try:
|
| 468 |
+
agent = GaiaAgent()
|
| 469 |
except Exception as e:
|
| 470 |
traceback.print_exc()
|
| 471 |
return f"Errore inizializzazione agente: {e}", None
|
|
|
|
| 491 |
if not task_id or question_text is None:
|
| 492 |
continue
|
| 493 |
|
| 494 |
+
print(f"\n[{i+1}/{len(questions_data)}] ════════════════════════")
|
| 495 |
try:
|
| 496 |
answer = agent(question_text, task_id=task_id)
|
| 497 |
except Exception as e:
|
|
|
|
| 538 |
|
| 539 |
|
| 540 |
# ==========================================
|
| 541 |
+
# 🖥️ INTERFACCIA
|
| 542 |
# ==========================================
|
| 543 |
with gr.Blocks() as demo:
|
| 544 |
+
gr.Markdown("# 🚀 GAIA Agent — Final Assignment")
|
| 545 |
gr.Markdown(
|
| 546 |
+
"Powered by **Groq** (Llama 3.3 70B) — free & fast.\n\n"
|
| 547 |
+
"Login con HF, poi clicca il bottone."
|
| 548 |
)
|
| 549 |
gr.LoginButton()
|
| 550 |
run_button = gr.Button("🔥 Avvia Valutazione & Invia Risposte", variant="primary")
|