Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,13 @@ import gradio as gr
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# (Keep Constants as is)
|
| 8 |
# --- Constants ---
|
|
@@ -11,189 +18,277 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 11 |
# --- Basic Agent Definition ---
|
| 12 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
|
| 14 |
-
class
|
| 15 |
def __init__(self):
|
| 16 |
-
self.
|
| 17 |
self.setup_agent()
|
| 18 |
|
| 19 |
-
def
|
| 20 |
-
"""Configura i
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
def setup_agent(self):
|
| 33 |
-
"""Configura l'agente con
|
|
|
|
|
|
|
|
|
|
| 34 |
self.agent = CodeAgent(
|
| 35 |
tools=[
|
| 36 |
-
self.
|
| 37 |
-
self.
|
| 38 |
-
self.
|
| 39 |
-
self.
|
| 40 |
-
self.
|
| 41 |
],
|
| 42 |
-
model=
|
| 43 |
-
max_iterations=
|
| 44 |
-
verbosity=
|
| 45 |
)
|
| 46 |
|
| 47 |
@tool
|
| 48 |
-
def
|
| 49 |
-
"""Analizza immagini usando
|
| 50 |
try:
|
| 51 |
-
|
| 52 |
-
|
|
|
|
| 53 |
|
|
|
|
| 54 |
prompt = f"""
|
| 55 |
Analizza questa immagine per rispondere alla domanda: {question}
|
| 56 |
|
| 57 |
-
|
| 58 |
-
-
|
| 59 |
-
-
|
| 60 |
-
-
|
| 61 |
-
-
|
| 62 |
-
|
| 63 |
-
Risposta:
|
| 64 |
"""
|
| 65 |
|
| 66 |
-
response = self.
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
except Exception as e:
|
| 69 |
-
return f"Errore
|
| 70 |
|
| 71 |
@tool
|
| 72 |
-
def
|
| 73 |
-
"""Trascrizione audio
|
| 74 |
try:
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
except Exception as e:
|
| 78 |
-
return f"Errore
|
| 79 |
|
| 80 |
@tool
|
| 81 |
-
def
|
| 82 |
-
"""Estrae testo da
|
| 83 |
try:
|
|
|
|
|
|
|
| 84 |
if file_path.endswith('.txt'):
|
| 85 |
with open(file_path, 'r', encoding='utf-8') as f:
|
| 86 |
-
|
| 87 |
elif file_path.endswith('.csv'):
|
| 88 |
import pandas as pd
|
| 89 |
df = pd.read_csv(file_path)
|
| 90 |
-
|
| 91 |
elif file_path.endswith(('.xlsx', '.xls')):
|
| 92 |
import pandas as pd
|
| 93 |
df = pd.read_excel(file_path)
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
except Exception as e:
|
| 98 |
-
return f"Errore
|
| 99 |
|
| 100 |
@tool
|
| 101 |
-
def
|
| 102 |
-
"""
|
| 103 |
try:
|
| 104 |
-
#
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
except Exception as e:
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
@tool
|
| 111 |
-
def
|
| 112 |
-
"""
|
| 113 |
try:
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
except Exception as e:
|
| 119 |
-
return f"Errore
|
| 120 |
|
| 121 |
def solve_question(self, question: str, file_path: Optional[str] = None) -> str:
|
| 122 |
-
"""Risolve
|
| 123 |
|
| 124 |
-
# Prompt engineering
|
| 125 |
system_prompt = f"""
|
| 126 |
-
Sei un agente AI specializzato nel
|
| 127 |
|
| 128 |
-
OBIETTIVO: Fornire risposte
|
| 129 |
|
| 130 |
-
|
| 131 |
-
1.
|
| 132 |
-
2. Se c'è un file
|
| 133 |
-
3.
|
| 134 |
-
4.
|
| 135 |
-
5.
|
| 136 |
-
6. NON aggiungere prefissi come "Risposta:", "Il risultato è:", etc.
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
|
| 145 |
DOMANDA: {question}
|
| 146 |
-
{f"FILE
|
| 147 |
|
| 148 |
-
Risolvi step-by-step e fornisci SOLO la risposta finale
|
| 149 |
"""
|
| 150 |
|
| 151 |
try:
|
| 152 |
-
# Esegui l'agente con il prompt ottimizzato
|
| 153 |
response = self.agent.run(system_prompt)
|
| 154 |
-
|
| 155 |
-
# Post-processing per garantire formato corretto
|
| 156 |
-
answer = self.clean_answer(response, question)
|
| 157 |
-
|
| 158 |
-
return answer
|
| 159 |
|
| 160 |
except Exception as e:
|
| 161 |
-
return f"Errore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
-
def
|
| 164 |
-
"""
|
| 165 |
|
| 166 |
# Rimuovi prefissi comuni
|
| 167 |
-
|
| 168 |
"Final Answer:", "Risposta:", "Answer:", "Il risultato è:",
|
| 169 |
-
"La risposta è:", "Risposta finale:", "ANSWER:", "RISPOSTA:"
|
|
|
|
| 170 |
]
|
| 171 |
|
| 172 |
cleaned = raw_answer.strip()
|
| 173 |
-
for prefix in
|
| 174 |
if cleaned.startswith(prefix):
|
| 175 |
cleaned = cleaned[len(prefix):].strip()
|
| 176 |
|
| 177 |
-
#
|
| 178 |
-
|
| 179 |
-
# Assicura formato lista separata da virgole
|
| 180 |
-
cleaned = re.sub(r'\s*,\s*', ', ', cleaned)
|
| 181 |
|
| 182 |
-
if "
|
| 183 |
-
# Estrai solo
|
| 184 |
-
numbers = re.findall(r'\d+
|
| 185 |
if numbers:
|
| 186 |
-
|
| 187 |
|
| 188 |
-
if "yes" in
|
| 189 |
-
#
|
| 190 |
if "yes" in cleaned.lower():
|
| 191 |
-
|
| 192 |
elif "no" in cleaned.lower():
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
return cleaned.strip()
|
| 196 |
|
|
|
|
| 197 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 198 |
"""
|
| 199 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
@@ -316,35 +411,131 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 316 |
|
| 317 |
|
| 318 |
# --- Build Gradio Interface using Blocks ---
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
"""
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
gr.
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
-
run_button.click(
|
| 345 |
-
fn=run_and_submit_all,
|
| 346 |
-
outputs=[status_output, results_table]
|
| 347 |
-
)
|
| 348 |
|
| 349 |
if __name__ == "__main__":
|
| 350 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
@@ -368,4 +559,5 @@ if __name__ == "__main__":
|
|
| 368 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 369 |
|
| 370 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
| 371 |
-
|
|
|
|
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
+
from smolagents import CodeAgent, tool, HfApiModel
|
| 7 |
+
from huggingface_hub import InferenceClient
|
| 8 |
+
import requests
|
| 9 |
+
import json
|
| 10 |
+
from typing import Optional, Any, Dict, List
|
| 11 |
+
import base64
|
| 12 |
+
import io
|
| 13 |
|
| 14 |
# (Keep Constants as is)
|
| 15 |
# --- Constants ---
|
|
|
|
| 18 |
# --- Basic Agent Definition ---
|
| 19 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 20 |
|
| 21 |
+
class GAIAAgentHF:
|
| 22 |
def __init__(self):
|
| 23 |
+
self.setup_hf_clients()
|
| 24 |
self.setup_agent()
|
| 25 |
|
| 26 |
+
def setup_hf_clients(self):
|
| 27 |
+
"""Configura i client per le API Hugging Face"""[1][3]
|
| 28 |
+
self.hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN")
|
| 29 |
+
|
| 30 |
+
# Client principale per inference
|
| 31 |
+
self.inference_client = InferenceClient(token=self.hf_token)[4][8]
|
| 32 |
+
|
| 33 |
+
# Modelli specializzati disponibili via API
|
| 34 |
+
self.models = {
|
| 35 |
+
"vision": "microsoft/kosmos-2-patch14-224", # Multimodale per analisi immagini
|
| 36 |
+
"audio": "openai/whisper-large-v3", # Trascrizione audio
|
| 37 |
+
"reasoning": "microsoft/DialoGPT-medium", # Reasoning e chat
|
| 38 |
+
"math": "microsoft/DialoGPT-medium", # Calcoli matematici
|
| 39 |
+
"summarization": "facebook/bart-large-cnn" # Summarization
|
| 40 |
+
}
|
| 41 |
|
| 42 |
def setup_agent(self):
|
| 43 |
+
"""Configura l'agente con modello HF API"""[3][5]
|
| 44 |
+
# Usa HfApiModel per il reasoning principale
|
| 45 |
+
model = HfApiModel(model_id="microsoft/DialoGPT-medium", token=self.hf_token)
|
| 46 |
+
|
| 47 |
self.agent = CodeAgent(
|
| 48 |
tools=[
|
| 49 |
+
self.analyze_image_hf,
|
| 50 |
+
self.transcribe_audio_hf,
|
| 51 |
+
self.extract_and_analyze_text,
|
| 52 |
+
self.perform_calculation,
|
| 53 |
+
self.summarize_content
|
| 54 |
],
|
| 55 |
+
model=model,
|
| 56 |
+
max_iterations=8,
|
| 57 |
+
verbosity=1
|
| 58 |
)
|
| 59 |
|
| 60 |
@tool
|
| 61 |
+
def analyze_image_hf(self, image_path: str, question: str) -> str:
|
| 62 |
+
"""Analizza immagini usando Kosmos-2 via API HF"""[4][8]
|
| 63 |
try:
|
| 64 |
+
# Converti immagine in base64 per API
|
| 65 |
+
with open(image_path, "rb") as img_file:
|
| 66 |
+
img_data = base64.b64encode(img_file.read()).decode()
|
| 67 |
|
| 68 |
+
# Prompt ottimizzato per GAIA
|
| 69 |
prompt = f"""
|
| 70 |
Analizza questa immagine per rispondere alla domanda: {question}
|
| 71 |
|
| 72 |
+
Istruzioni specifiche:
|
| 73 |
+
- Se devi contare oggetti: fornisci il numero esatto
|
| 74 |
+
- Se devi leggere testo: trascrivi letteralmente
|
| 75 |
+
- Se devi identificare posizioni: usa riferimenti precisi
|
| 76 |
+
- Rispondi solo con l'informazione richiesta, senza prefissi
|
|
|
|
|
|
|
| 77 |
"""
|
| 78 |
|
| 79 |
+
response = self.inference_client.visual_question_answering(
|
| 80 |
+
image=img_data,
|
| 81 |
+
question=prompt,
|
| 82 |
+
model=self.models["vision"]
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
return self._clean_response(response)
|
| 86 |
+
|
| 87 |
except Exception as e:
|
| 88 |
+
return f"Errore analisi immagine: {str(e)}"
|
| 89 |
|
| 90 |
@tool
|
| 91 |
+
def transcribe_audio_hf(self, audio_path: str) -> str:
|
| 92 |
+
"""Trascrizione audio con Whisper via API HF"""[4][8]
|
| 93 |
try:
|
| 94 |
+
with open(audio_path, "rb") as audio_file:
|
| 95 |
+
audio_data = audio_file.read()
|
| 96 |
+
|
| 97 |
+
response = self.inference_client.automatic_speech_recognition(
|
| 98 |
+
audio_data,
|
| 99 |
+
model=self.models["audio"]
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
return response.get("text", "Trascrizione non disponibile")
|
| 103 |
+
|
| 104 |
except Exception as e:
|
| 105 |
+
return f"Errore trascrizione: {str(e)}"
|
| 106 |
|
| 107 |
@tool
|
| 108 |
+
def extract_and_analyze_text(self, file_path: str, question: str) -> str:
|
| 109 |
+
"""Estrae e analizza testo da file con modelli HF"""[2][4]
|
| 110 |
try:
|
| 111 |
+
# Estrazione testo base
|
| 112 |
+
content = ""
|
| 113 |
if file_path.endswith('.txt'):
|
| 114 |
with open(file_path, 'r', encoding='utf-8') as f:
|
| 115 |
+
content = f.read()
|
| 116 |
elif file_path.endswith('.csv'):
|
| 117 |
import pandas as pd
|
| 118 |
df = pd.read_csv(file_path)
|
| 119 |
+
content = df.to_string()
|
| 120 |
elif file_path.endswith(('.xlsx', '.xls')):
|
| 121 |
import pandas as pd
|
| 122 |
df = pd.read_excel(file_path)
|
| 123 |
+
content = df.to_string()
|
| 124 |
+
|
| 125 |
+
# Analisi con modello HF se il contenuto è lungo
|
| 126 |
+
if len(content) > 1000:
|
| 127 |
+
summary_prompt = f"""
|
| 128 |
+
Analizza questo contenuto per rispondere alla domanda: {question}
|
| 129 |
+
|
| 130 |
+
Contenuto: {content[:2000]}...
|
| 131 |
+
|
| 132 |
+
Fornisci una risposta precisa e diretta.
|
| 133 |
+
"""
|
| 134 |
+
|
| 135 |
+
response = self.inference_client.text_generation(
|
| 136 |
+
summary_prompt,
|
| 137 |
+
model=self.models["summarization"],
|
| 138 |
+
max_new_tokens=200
|
| 139 |
+
)
|
| 140 |
+
return response
|
| 141 |
+
|
| 142 |
+
return content
|
| 143 |
+
|
| 144 |
except Exception as e:
|
| 145 |
+
return f"Errore elaborazione file: {str(e)}"
|
| 146 |
|
| 147 |
@tool
|
| 148 |
+
def perform_calculation(self, expression: str, context: str = "") -> str:
|
| 149 |
+
"""Calcoli matematici precisi con validazione"""[8]
|
| 150 |
try:
|
| 151 |
+
# Sanitizza l'espressione
|
| 152 |
+
import re
|
| 153 |
+
safe_expr = re.sub(r'[^0-9+\-*/().\s]', '', expression)
|
| 154 |
+
|
| 155 |
+
# Valuta l'espressione
|
| 156 |
+
result = eval(safe_expr)
|
| 157 |
+
|
| 158 |
+
# Formatta il risultato basandosi sul contesto
|
| 159 |
+
if "decimal" in context.lower():
|
| 160 |
+
return f"{result:.6f}".rstrip('0').rstrip('.')
|
| 161 |
+
elif "integer" in context.lower():
|
| 162 |
+
return str(int(result))
|
| 163 |
+
else:
|
| 164 |
+
return str(result)
|
| 165 |
+
|
| 166 |
except Exception as e:
|
| 167 |
+
# Fallback con modello HF per calcoli complessi
|
| 168 |
+
try:
|
| 169 |
+
calc_prompt = f"Calcola: {expression}. Fornisci solo il risultato numerico."
|
| 170 |
+
response = self.inference_client.text_generation(
|
| 171 |
+
calc_prompt,
|
| 172 |
+
model=self.models["math"],
|
| 173 |
+
max_new_tokens=50
|
| 174 |
+
)
|
| 175 |
+
return self._extract_number(response)
|
| 176 |
+
except:
|
| 177 |
+
return f"Errore calcolo: {str(e)}"
|
| 178 |
|
| 179 |
@tool
|
| 180 |
+
def summarize_content(self, text: str, focus: str = "") -> str:
|
| 181 |
+
"""Summarization con BART via API HF"""[4][8]
|
| 182 |
try:
|
| 183 |
+
if focus:
|
| 184 |
+
prompt = f"Riassumi questo testo focalizzandoti su: {focus}\n\nTesto: {text}"
|
| 185 |
+
else:
|
| 186 |
+
prompt = text
|
| 187 |
+
|
| 188 |
+
response = self.inference_client.summarization(
|
| 189 |
+
prompt,
|
| 190 |
+
model=self.models["summarization"],
|
| 191 |
+
max_length=150,
|
| 192 |
+
min_length=30
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
return response[0]["summary_text"] if isinstance(response, list) else response
|
| 196 |
+
|
| 197 |
except Exception as e:
|
| 198 |
+
return f"Errore summarization: {str(e)}"
|
| 199 |
|
| 200 |
def solve_question(self, question: str, file_path: Optional[str] = None) -> str:
|
| 201 |
+
"""Risolve domande GAIA con approccio multi-step"""[11]
|
| 202 |
|
| 203 |
+
# Prompt engineering specifico per GAIA Level 1
|
| 204 |
system_prompt = f"""
|
| 205 |
+
Sei un agente AI specializzato nel benchmark GAIA Level 1.
|
| 206 |
|
| 207 |
+
OBIETTIVO CRITICO: Fornire risposte in formato EXACT MATCH.
|
| 208 |
|
| 209 |
+
STRATEGIA:
|
| 210 |
+
1. Analizza la domanda per identificare il tipo di risposta richiesta
|
| 211 |
+
2. Se c'è un file, analizzalo completamente prima di procedere
|
| 212 |
+
3. Usa gli strumenti appropriati per ogni tipo di task
|
| 213 |
+
4. Verifica che la risposta sia nel formato esatto richiesto
|
| 214 |
+
5. NON aggiungere prefissi, suffissi o spiegazioni extra
|
|
|
|
| 215 |
|
| 216 |
+
FORMATI COMUNI GAIA:
|
| 217 |
+
- Numeri: solo il valore (es. "42")
|
| 218 |
+
- Liste: formato specificato nella domanda
|
| 219 |
+
- Date: formato richiesto (es. "2023-01-15")
|
| 220 |
+
- Yes/No: "Yes" o "No" esatti
|
| 221 |
+
- Testo: risposta diretta senza elaborazioni
|
| 222 |
|
| 223 |
DOMANDA: {question}
|
| 224 |
+
{f"FILE DISPONIBILE: {file_path}" if file_path else ""}
|
| 225 |
|
| 226 |
+
Risolvi step-by-step e fornisci SOLO la risposta finale.
|
| 227 |
"""
|
| 228 |
|
| 229 |
try:
|
|
|
|
| 230 |
response = self.agent.run(system_prompt)
|
| 231 |
+
return self._format_final_answer(response, question)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
except Exception as e:
|
| 234 |
+
return f"Errore risoluzione: {str(e)}"
|
| 235 |
+
|
| 236 |
+
def _clean_response(self, response: Any) -> str:
|
| 237 |
+
"""Pulisce le risposte dalle API HF"""
|
| 238 |
+
if isinstance(response, dict):
|
| 239 |
+
if "generated_text" in response:
|
| 240 |
+
return response["generated_text"].strip()
|
| 241 |
+
elif "answer" in response:
|
| 242 |
+
return response["answer"].strip()
|
| 243 |
+
elif isinstance(response, list) and response:
|
| 244 |
+
return str(response[0]).strip()
|
| 245 |
+
return str(response).strip()
|
| 246 |
+
|
| 247 |
+
def _extract_number(self, text: str) -> str:
|
| 248 |
+
"""Estrae numeri dalle risposte testuali"""
|
| 249 |
+
import re
|
| 250 |
+
numbers = re.findall(r'-?\d+(?:\.\d+)?', text)
|
| 251 |
+
return numbers[0] if numbers else text.strip()
|
| 252 |
|
| 253 |
+
def _format_final_answer(self, raw_answer: str, question: str) -> str:
|
| 254 |
+
"""Formatta la risposta finale per EXACT MATCH"""
|
| 255 |
|
| 256 |
# Rimuovi prefissi comuni
|
| 257 |
+
prefixes = [
|
| 258 |
"Final Answer:", "Risposta:", "Answer:", "Il risultato è:",
|
| 259 |
+
"La risposta è:", "Risposta finale:", "ANSWER:", "RISPOSTA:",
|
| 260 |
+
"The answer is:", "Result:", "Output:"
|
| 261 |
]
|
| 262 |
|
| 263 |
cleaned = raw_answer.strip()
|
| 264 |
+
for prefix in prefixes:
|
| 265 |
if cleaned.startswith(prefix):
|
| 266 |
cleaned = cleaned[len(prefix):].strip()
|
| 267 |
|
| 268 |
+
# Formattazione specifica per tipo di domanda
|
| 269 |
+
question_lower = question.lower()
|
|
|
|
|
|
|
| 270 |
|
| 271 |
+
if "how many" in question_lower or "count" in question_lower:
|
| 272 |
+
# Estrai solo il numero per domande di conteggio
|
| 273 |
+
numbers = re.findall(r'\d+', cleaned)
|
| 274 |
if numbers:
|
| 275 |
+
return numbers[0]
|
| 276 |
|
| 277 |
+
if "yes or no" in question_lower or ("yes" in question_lower and "no" in question_lower):
|
| 278 |
+
# Standardizza risposte yes/no
|
| 279 |
if "yes" in cleaned.lower():
|
| 280 |
+
return "Yes"
|
| 281 |
elif "no" in cleaned.lower():
|
| 282 |
+
return "No"
|
| 283 |
+
|
| 284 |
+
if "list" in question_lower and "comma" in question_lower:
|
| 285 |
+
# Formatta liste separate da virgole
|
| 286 |
+
import re
|
| 287 |
+
cleaned = re.sub(r'\s*,\s*', ', ', cleaned)
|
| 288 |
|
| 289 |
return cleaned.strip()
|
| 290 |
|
| 291 |
+
|
| 292 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 293 |
"""
|
| 294 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
|
|
| 411 |
|
| 412 |
|
| 413 |
# --- Build Gradio Interface using Blocks ---
|
| 414 |
+
class GAIAEvaluatorHF:
|
| 415 |
+
def __init__(self):
|
| 416 |
+
self.base_url = "https://huggingface.co/spaces/huggingface-projects/gaia-benchmark-scoring/api"
|
| 417 |
+
self.agent = GAIAAgentHF()
|
| 418 |
+
|
| 419 |
+
def run_single_question(self, username: str) -> Dict:
|
| 420 |
+
"""Testa su una singola domanda random"""
|
| 421 |
+
try:
|
| 422 |
+
# Ottieni domanda random
|
| 423 |
+
response = requests.get(f"{self.base_url}/random-question")
|
| 424 |
+
question_data = response.json()
|
| 425 |
+
|
| 426 |
+
task_id = question_data.get("task_id")
|
| 427 |
+
question_text = question_data.get("Question")
|
| 428 |
+
|
| 429 |
+
# Scarica file se disponibile
|
| 430 |
+
file_path = self._download_file(task_id)
|
| 431 |
+
|
| 432 |
+
# Risolvi con l'agente
|
| 433 |
+
answer = self.agent.solve_question(question_text, file_path)
|
| 434 |
+
|
| 435 |
+
# Invia risposta
|
| 436 |
+
result = self._submit_answer(task_id, answer)
|
| 437 |
+
|
| 438 |
+
return {
|
| 439 |
+
"task_id": task_id,
|
| 440 |
+
"question": question_text,
|
| 441 |
+
"answer": answer,
|
| 442 |
+
"result": result,
|
| 443 |
+
"file_used": file_path is not None
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
except Exception as e:
|
| 447 |
+
return {"error": str(e)}
|
| 448 |
+
|
| 449 |
+
def _download_file(self, task_id: str) -> Optional[str]:
|
| 450 |
+
"""Scarica file associato alla task"""
|
| 451 |
+
try:
|
| 452 |
+
response = requests.get(f"{self.base_url}/files/{task_id}")
|
| 453 |
+
if response.status_code == 200:
|
| 454 |
+
filename = f"task_{task_id}_file"
|
| 455 |
+
with open(filename, 'wb') as f:
|
| 456 |
+
f.write(response.content)
|
| 457 |
+
return filename
|
| 458 |
+
except:
|
| 459 |
+
pass
|
| 460 |
+
return None
|
| 461 |
+
|
| 462 |
+
def _submit_answer(self, task_id: str, answer: str) -> Dict:
|
| 463 |
+
"""Invia risposta per valutazione"""
|
| 464 |
+
payload = {"task_id": task_id, "submitted_answer": answer.strip()}
|
| 465 |
+
try:
|
| 466 |
+
response = requests.post(f"{self.base_url}/submit", json=payload)
|
| 467 |
+
return response.json()
|
| 468 |
+
except Exception as e:
|
| 469 |
+
return {"error": str(e)}
|
| 470 |
|
| 471 |
+
def create_interface():
|
| 472 |
+
evaluator = GAIAEvaluatorHF()
|
| 473 |
+
|
| 474 |
+
def test_single_question(username):
|
| 475 |
+
if not username:
|
| 476 |
+
return "⚠️ Inserisci il tuo username Hugging Face"
|
| 477 |
+
|
| 478 |
+
result = evaluator.run_single_question(username)
|
| 479 |
+
|
| 480 |
+
if "error" in result:
|
| 481 |
+
return f"❌ Errore: {result['error']}"
|
| 482 |
+
|
| 483 |
+
status = "✅ CORRETTO" if result["result"].get("correct", False) else "❌ SBAGLIATO"
|
| 484 |
+
file_info = "📁 Con file allegato" if result["file_used"] else "📄 Solo testo"
|
| 485 |
+
|
| 486 |
+
return f"""
|
| 487 |
+
## 🧪 Test Singola Domanda GAIA
|
| 488 |
+
|
| 489 |
+
**Status:** {status}
|
| 490 |
+
**Task ID:** {result['task_id']}
|
| 491 |
+
**Tipo:** {file_info}
|
| 492 |
+
|
| 493 |
+
### 📝 Domanda:
|
| 494 |
+
{result['question']}
|
| 495 |
+
|
| 496 |
+
### 🤖 Risposta dell'Agente:
|
| 497 |
+
`{result['answer']}`
|
| 498 |
+
|
| 499 |
+
### 📊 Risultato Valutazione:
|
| 500 |
+
{json.dumps(result['result'], indent=2)}
|
| 501 |
"""
|
| 502 |
+
|
| 503 |
+
# Interfaccia Gradio
|
| 504 |
+
with gr.Blocks(title="🏆 GAIA Agent - HF API Version") as iface:
|
| 505 |
+
gr.Markdown("# 🏆 GAIA Benchmark Agent - Hugging Face API")
|
| 506 |
+
gr.Markdown("Agente ottimizzato per GAIA Level 1 usando esclusivamente modelli Hugging Face via API")
|
| 507 |
+
|
| 508 |
+
with gr.Row():
|
| 509 |
+
username_input = gr.Textbox(
|
| 510 |
+
label="Username Hugging Face",
|
| 511 |
+
placeholder="il-tuo-username",
|
| 512 |
+
value=""
|
| 513 |
+
)
|
| 514 |
+
test_btn = gr.Button("🧪 Testa Domanda Random", variant="primary")
|
| 515 |
+
|
| 516 |
+
output_display = gr.Markdown()
|
| 517 |
+
|
| 518 |
+
test_btn.click(
|
| 519 |
+
fn=test_single_question,
|
| 520 |
+
inputs=[username_input],
|
| 521 |
+
outputs=[output_display]
|
| 522 |
+
)
|
| 523 |
+
|
| 524 |
+
gr.Markdown("""
|
| 525 |
+
### 🔧 Configurazione Richiesta:
|
| 526 |
+
1. Imposta `HUGGINGFACE_HUB_TOKEN` nelle variabili d'ambiente
|
| 527 |
+
2. Il token deve avere permessi per Inference API
|
| 528 |
+
3. Assicurati di avere accesso ai modelli utilizzati
|
| 529 |
+
|
| 530 |
+
### 🎯 Modelli Utilizzati:
|
| 531 |
+
- **Vision**: microsoft/kosmos-2-patch14-224
|
| 532 |
+
- **Audio**: openai/whisper-large-v3
|
| 533 |
+
- **Reasoning**: microsoft/DialoGPT-medium
|
| 534 |
+
- **Summarization**: facebook/bart-large-cnn
|
| 535 |
+
""")
|
| 536 |
+
|
| 537 |
+
return iface
|
| 538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
if __name__ == "__main__":
|
| 541 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
|
| 559 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 560 |
|
| 561 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
| 562 |
+
iface = create_interface()
|
| 563 |
+
iface.launch()
|