Update app.py
Browse files
app.py
CHANGED
|
@@ -5,43 +5,42 @@ import pandas as pd
|
|
| 5 |
import re
|
| 6 |
from datetime import datetime
|
| 7 |
import time
|
| 8 |
-
import tempfile
|
| 9 |
-
import atexit
|
|
|
|
| 10 |
|
| 11 |
-
# --- Smol Agents und HF Imports (
|
| 12 |
try:
|
| 13 |
-
|
| 14 |
-
from smolagents
|
| 15 |
-
|
| 16 |
-
print("Successfully imported from 'smolagents'")
|
| 17 |
except ImportError as e:
|
| 18 |
print(f"Error importing from smolagents: {e}")
|
| 19 |
print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
|
| 20 |
-
|
| 21 |
-
import sys
|
| 22 |
-
sys.exit(f"Fatal Error: Could not import smolagents. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
|
| 23 |
|
| 24 |
-
|
|
|
|
| 25 |
|
| 26 |
-
# --- Suchtool Imports (
|
| 27 |
-
USE_TAVILY = False
|
| 28 |
-
USE_DUCKDUCKGO = True
|
| 29 |
|
| 30 |
if USE_TAVILY:
|
| 31 |
try:
|
| 32 |
from tavily import TavilyClient
|
| 33 |
except ImportError:
|
| 34 |
-
print("WARNUNG: TavilyClient nicht installiert.
|
| 35 |
USE_TAVILY = False
|
| 36 |
USE_DUCKDUCKGO = True # Fallback
|
| 37 |
if USE_DUCKDUCKGO:
|
| 38 |
try:
|
| 39 |
from duckduckgo_search import DDGS
|
| 40 |
except ImportError:
|
| 41 |
-
print("WARNUNG: duckduckgo-search nicht installiert.
|
| 42 |
USE_DUCKDUCKGO = False
|
| 43 |
|
| 44 |
-
# --- PDF Reader Import ---
|
| 45 |
try:
|
| 46 |
import PyPDF2
|
| 47 |
PDF_READER_AVAILABLE = True
|
|
@@ -51,36 +50,36 @@ except ImportError:
|
|
| 51 |
|
| 52 |
# --- Konstanten ---
|
| 53 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 54 |
-
#
|
| 55 |
-
|
|
|
|
| 56 |
|
| 57 |
-
# --- Globale Variablen
|
| 58 |
-
|
| 59 |
search_client = None
|
| 60 |
-
|
|
|
|
| 61 |
|
| 62 |
-
# --- Temporäre Datei Verwaltung ---
|
| 63 |
temp_files_to_clean = set()
|
| 64 |
-
|
| 65 |
def cleanup_temp_files():
|
|
|
|
| 66 |
print("Cleaning up temporary files...")
|
| 67 |
-
for file_path in list(temp_files_to_clean):
|
| 68 |
try:
|
| 69 |
if os.path.exists(file_path):
|
| 70 |
os.remove(file_path)
|
| 71 |
print(f"Removed temporary file: {file_path}")
|
| 72 |
-
if file_path in temp_files_to_clean:
|
| 73 |
temp_files_to_clean.remove(file_path)
|
| 74 |
except OSError as e:
|
| 75 |
print(f"Error removing temporary file {file_path}: {e}")
|
| 76 |
except KeyError:
|
| 77 |
print(f"Warning: File path {file_path} already removed from cleanup set.")
|
| 78 |
-
|
| 79 |
-
# Registriere die Cleanup-Funktion für das Beenden des Skripts
|
| 80 |
atexit.register(cleanup_temp_files)
|
| 81 |
|
| 82 |
-
|
| 83 |
-
# -
|
| 84 |
|
| 85 |
@tool
|
| 86 |
def search_web(query: str, max_results: int = 3) -> str:
|
|
@@ -93,6 +92,7 @@ def search_web(query: str, max_results: int = 3) -> str:
|
|
| 93 |
Returns:
|
| 94 |
str: A string containing the search results, or an error message.
|
| 95 |
"""
|
|
|
|
| 96 |
print(f"Tool: search_web(query='{query}', max_results={max_results})")
|
| 97 |
if not search_client:
|
| 98 |
return "Search tool is not available/configured."
|
|
@@ -107,7 +107,6 @@ def search_web(query: str, max_results: int = 3) -> str:
|
|
| 107 |
if not results: return "No search results found."
|
| 108 |
return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
|
| 109 |
else:
|
| 110 |
-
# Dies sollte nicht passieren, wenn search_client gesetzt ist, aber als Absicherung
|
| 111 |
return "No compatible search client configured or available."
|
| 112 |
except Exception as e:
|
| 113 |
print(f"Search API Error ({type(e).__name__}): {e}")
|
|
@@ -124,13 +123,12 @@ def download_task_file(task_id: str) -> str:
|
|
| 124 |
str: The local path to the downloaded file (e.g., '/tmp/gaia_task_abc-123.pdf') if successful,
|
| 125 |
otherwise an error message starting with 'Error:'.
|
| 126 |
"""
|
|
|
|
| 127 |
print(f"Tool: download_task_file(task_id='{task_id}')")
|
| 128 |
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 129 |
-
|
| 130 |
try:
|
| 131 |
response = requests.get(file_url, stream=True, timeout=30)
|
| 132 |
response.raise_for_status()
|
| 133 |
-
|
| 134 |
content_type = response.headers.get('content-type', '').lower()
|
| 135 |
suffix = ".tmp"
|
| 136 |
if 'pdf' in content_type: suffix = ".pdf"
|
|
@@ -138,36 +136,29 @@ def download_task_file(task_id: str) -> str:
|
|
| 138 |
elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
|
| 139 |
elif 'csv' in content_type: suffix = ".csv"
|
| 140 |
elif 'plain' in content_type or 'text' in content_type: suffix = ".txt"
|
| 141 |
-
# Füge ggf. weitere Mappings hinzu
|
| 142 |
-
|
| 143 |
temp_dir = tempfile.gettempdir()
|
| 144 |
safe_task_id = re.sub(r'[^\w\-]+', '_', task_id)
|
| 145 |
-
# Erzeuge eindeutigeren Dateinamen, um Konflikte bei schnellen Läufen zu minimieren
|
| 146 |
timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
|
| 147 |
temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}_{timestamp}{suffix}")
|
| 148 |
-
|
| 149 |
-
|
| 150 |
with open(temp_file_path, 'wb') as f:
|
| 151 |
for chunk in response.iter_content(chunk_size=8192):
|
| 152 |
f.write(chunk)
|
| 153 |
-
|
| 154 |
print(f"File downloaded successfully to {temp_file_path}")
|
| 155 |
temp_files_to_clean.add(temp_file_path)
|
| 156 |
-
return temp_file_path
|
| 157 |
-
|
| 158 |
except requests.exceptions.HTTPError as e:
|
| 159 |
if e.response.status_code == 404:
|
| 160 |
print(f"No file found on server for task_id {task_id}.")
|
| 161 |
-
return "Error: No file found for this task ID."
|
| 162 |
else:
|
| 163 |
print(f"HTTP Error downloading file for task {task_id}: {e}")
|
| 164 |
-
return f"Error: Failed to download file (HTTP {e.response.status_code})."
|
| 165 |
except requests.exceptions.RequestException as e:
|
| 166 |
print(f"Network Error downloading file for task {task_id}: {e}")
|
| 167 |
-
return f"Error: Failed to download file due to network issue: {e}"
|
| 168 |
except Exception as e:
|
| 169 |
print(f"Unexpected error downloading file for task {task_id} ({type(e).__name__}): {e}")
|
| 170 |
-
return f"Error: Unexpected error during file download: {e}"
|
| 171 |
|
| 172 |
@tool
|
| 173 |
def read_file_content(file_path: str) -> str:
|
|
@@ -179,43 +170,35 @@ def read_file_content(file_path: str) -> str:
|
|
| 179 |
Returns:
|
| 180 |
str: The extracted text content (truncated if very long), or an error message starting with 'Error:'.
|
| 181 |
"""
|
|
|
|
| 182 |
print(f"Tool: read_file_content(file_path='{file_path}')")
|
| 183 |
-
|
| 184 |
-
# Überprüfung des Inputs: Ist es überhaupt ein Pfad?
|
| 185 |
if not isinstance(file_path, str) or not os.path.isabs(file_path):
|
| 186 |
print(f"Invalid input for read_file_content: '{file_path}'. Expected an absolute file path.")
|
| 187 |
return "Error: Invalid input. Provide the absolute file path returned by download_task_file."
|
| 188 |
-
|
| 189 |
-
# Sicherheitscheck: Erlaube nur Lesen aus dem Temp-Verzeichnis (bleibt wichtig)
|
| 190 |
if not file_path.startswith(tempfile.gettempdir()):
|
| 191 |
print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
|
| 192 |
return "Error: Invalid file path provided. Only downloaded files can be read."
|
| 193 |
-
|
| 194 |
if not os.path.exists(file_path):
|
| 195 |
print(f"Error: File not found at path: {file_path}")
|
| 196 |
-
return f"Error: File not found at the specified path '{os.path.basename(file_path)}'."
|
| 197 |
-
|
| 198 |
try:
|
| 199 |
file_size = os.path.getsize(file_path)
|
| 200 |
print(f"Reading file: {os.path.basename(file_path)}, Size: {file_size} bytes")
|
| 201 |
if file_size == 0:
|
| 202 |
print(f"Warning: File {os.path.basename(file_path)} is empty.")
|
| 203 |
return f"Observation: The file '{os.path.basename(file_path)}' is empty."
|
| 204 |
-
|
| 205 |
if file_path.lower().endswith(".pdf"):
|
| 206 |
-
if not PDF_READER_AVAILABLE:
|
| 207 |
-
return "Error: Cannot read PDF file because PyPDF2 library is not installed."
|
| 208 |
text = ""
|
| 209 |
with open(file_path, 'rb') as f:
|
| 210 |
reader = PyPDF2.PdfReader(f)
|
| 211 |
num_pages = len(reader.pages)
|
| 212 |
print(f"Reading {num_pages} pages from PDF...")
|
| 213 |
for page_num in range(num_pages):
|
| 214 |
-
# Prüfe ob Seite Text enthält bevor Extraktion versucht wird
|
| 215 |
if reader.pages[page_num].extract_text():
|
| 216 |
page_text = reader.pages[page_num].extract_text()
|
| 217 |
-
text += page_text + "\n"
|
| 218 |
-
if len(text) > 7000:
|
| 219 |
text = text[:7000] + "\n... (content truncated)"
|
| 220 |
print(f"Text truncated at {len(text)} chars.")
|
| 221 |
break
|
|
@@ -224,155 +207,120 @@ def read_file_content(file_path: str) -> str:
|
|
| 224 |
return f"Observation: Could not extract any text content from the PDF file '{os.path.basename(file_path)}'."
|
| 225 |
print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
|
| 226 |
return f"Content of '{os.path.basename(file_path)}':\n{text}"
|
| 227 |
-
|
| 228 |
elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
|
| 229 |
print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
|
| 230 |
return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content with this tool."
|
| 231 |
-
|
| 232 |
-
else: # Versuche als Text zu lesen (TXT, CSV, etc.)
|
| 233 |
-
# Lese in Chunks um Speicher zu schonen bei großen Textdateien
|
| 234 |
content = ""
|
| 235 |
-
chunk_size = 4096
|
| 236 |
-
max_len = 7000
|
| 237 |
-
truncated = False
|
| 238 |
try:
|
| 239 |
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 240 |
while len(content) < max_len:
|
| 241 |
-
chunk = f.read(chunk_size)
|
| 242 |
-
if not chunk:
|
| 243 |
-
break
|
| 244 |
content += chunk
|
| 245 |
-
if len(content) > max_len:
|
| 246 |
-
content = content[:max_len]
|
| 247 |
-
truncated = True
|
| 248 |
print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
|
| 249 |
result = f"Content of '{os.path.basename(file_path)}':\n{content}"
|
| 250 |
-
if truncated:
|
| 251 |
-
result += "\n... (content truncated)"
|
| 252 |
return result
|
| 253 |
-
except Exception as read_err:
|
| 254 |
print(f"Error reading file {file_path} as text: {read_err}")
|
| 255 |
return f"Error: Failed to read file '{os.path.basename(file_path)}' as text: {read_err}"
|
| 256 |
-
|
| 257 |
-
|
| 258 |
except Exception as e:
|
| 259 |
print(f"Error reading file {file_path} ({type(e).__name__}): {e}")
|
| 260 |
return f"Error: Failed to read file content: {e}"
|
| 261 |
|
| 262 |
|
| 263 |
-
# --- Agent Initialisierung ---
|
| 264 |
def initialize_agent():
|
| 265 |
-
"""Initialisiert den smolagents
|
| 266 |
-
global
|
| 267 |
print("Initializing agent and clients...")
|
| 268 |
|
| 269 |
-
#
|
| 270 |
-
if
|
| 271 |
-
|
| 272 |
-
if not hf_token:
|
| 273 |
-
raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden!")
|
| 274 |
-
|
| 275 |
-
if not search_client:
|
| 276 |
if USE_TAVILY:
|
| 277 |
tavily_key = os.getenv("TAVILY_API_KEY")
|
| 278 |
if tavily_key:
|
| 279 |
-
try:
|
| 280 |
-
|
| 281 |
-
print("Using Tavily for search.")
|
| 282 |
-
except NameError:
|
| 283 |
-
print("WARNUNG: TavilyClient Klasse nicht gefunden, obwohl USE_TAVILY=True.")
|
| 284 |
-
search_client = None # Verhindere Nutzung
|
| 285 |
else:
|
| 286 |
-
print("WARNUNG: TAVILY_API_KEY nicht gefunden
|
| 287 |
-
# Fallback nur wenn Tavily nicht
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
print("Falling back to DuckDuckGo for search.")
|
| 292 |
-
except NameError:
|
| 293 |
-
search_client = None
|
| 294 |
-
print("WARNUNG: DuckDuckGo auch nicht verfügbar. Suche deaktiviert.")
|
| 295 |
-
else:
|
| 296 |
-
search_client = None
|
| 297 |
-
print("WARNUNG: Suche deaktiviert (Tavily Key fehlt).")
|
| 298 |
elif USE_DUCKDUCKGO:
|
| 299 |
-
try:
|
| 300 |
-
|
| 301 |
-
print("Using DuckDuckGo for search.")
|
| 302 |
-
except NameError:
|
| 303 |
-
search_client = None
|
| 304 |
-
print("WARNUNG: duckduckgo-search nicht installiert/verfügbar. Suche deaktiviert.")
|
| 305 |
else:
|
| 306 |
-
search_client = None
|
| 307 |
print("Web search is disabled by configuration.")
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
#
|
| 311 |
-
#
|
| 312 |
-
#
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
|
| 325 |
# --- Agent Instanz ---
|
| 326 |
available_tools = [search_web, download_task_file, read_file_content]
|
| 327 |
-
# Filter out None tools
|
| 328 |
-
active_tools = [t for t in available_tools if t is not None]
|
| 329 |
-
|
| 330 |
-
# Stelle sicher, dass 'tool' importiert wurde
|
| 331 |
-
if 'tool' not in globals():
|
| 332 |
-
raise NameError("Die 'tool' Funktion von smolagents konnte nicht importiert werden.")
|
| 333 |
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
tools=active_tools,
|
| 337 |
-
|
| 338 |
)
|
| 339 |
-
print(f"Smol
|
| 340 |
if len(active_tools) < len(available_tools):
|
| 341 |
-
print(f"Warning: Some tools might be inactive
|
| 342 |
|
| 343 |
|
| 344 |
-
# ---
|
|
|
|
|
|
|
| 345 |
def run_and_submit_all( profile: gr.OAuthProfile | None, progress=gr.Progress(track_tqdm=True)):
|
| 346 |
"""
|
| 347 |
-
Fetches all questions, runs the smolagents
|
| 348 |
and displays the results. Includes Gradio progress tracking.
|
| 349 |
"""
|
| 350 |
space_id = os.getenv("SPACE_ID")
|
| 351 |
-
|
| 352 |
-
if profile:
|
| 353 |
-
username= f"{profile.username}"
|
| 354 |
-
print(f"User logged in: {username}")
|
| 355 |
-
else:
|
| 356 |
print("User not logged in.")
|
| 357 |
-
# Gib None für DataFrame zurück, um Fehler in Gradio zu vermeiden
|
| 358 |
return "Please Login to Hugging Face with the button.", None
|
|
|
|
|
|
|
| 359 |
|
| 360 |
api_url = DEFAULT_API_URL
|
| 361 |
questions_url = f"{api_url}/questions"
|
| 362 |
submit_url = f"{api_url}/submit"
|
| 363 |
|
| 364 |
-
# 1. Initialisiere Agent
|
| 365 |
progress(0, desc="Initializing Agent...")
|
| 366 |
try:
|
| 367 |
initialize_agent()
|
| 368 |
-
if not agent_instance:
|
| 369 |
-
|
| 370 |
-
except
|
| 371 |
-
print(f"Error during initialization: {e}")
|
| 372 |
-
return f"Configuration Error: {e}", None
|
| 373 |
-
except Exception as e:
|
| 374 |
-
print(f"Error initializing agent/clients ({type(e).__name__}): {e}")
|
| 375 |
-
return f"Error initializing agent: {e}", None
|
| 376 |
|
| 377 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
|
| 378 |
print(f"Agent Code Link: {agent_code}")
|
|
@@ -389,28 +337,22 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, progress=gr.Progress(tr
|
|
| 389 |
return "Fetched questions list is empty or invalid format.", None
|
| 390 |
num_questions = len(questions_data)
|
| 391 |
print(f"Fetched {num_questions} questions.")
|
| 392 |
-
except Exception as e:
|
| 393 |
-
print(f"Error fetching questions ({type(e).__name__}): {e}")
|
| 394 |
-
return f"Error fetching questions: {e}", None
|
| 395 |
|
| 396 |
-
|
| 397 |
-
# 3. Run your Smol Agent with progress tracking
|
| 398 |
start_time = datetime.now()
|
| 399 |
results_log = []
|
| 400 |
answers_payload = []
|
| 401 |
-
print(f"Running
|
| 402 |
|
| 403 |
-
|
| 404 |
-
# for i, item in enumerate(tqdm(questions_data, desc="Processing Questions")): # Standard tqdm
|
| 405 |
-
for i, item in enumerate(progress.tqdm(questions_data, desc="Processing Questions")): # Gradio tqdm
|
| 406 |
task_id = item.get("task_id")
|
| 407 |
question_text = item.get("question")
|
| 408 |
-
|
| 409 |
if not task_id or question_text is None:
|
| 410 |
print(f"Skipping item {i+1} with missing task_id or question: {item}")
|
| 411 |
continue
|
| 412 |
|
| 413 |
-
# --- Prompt für smolagents ---
|
| 414 |
agent_prompt = f"""
|
| 415 |
You are an expert AI assistant solving a challenge question based on the GAIA benchmark.
|
| 416 |
Your task is to answer the following question accurately and concisely.
|
|
@@ -435,177 +377,110 @@ Think step-by-step before deciding on an action or the final answer.
|
|
| 435 |
|
| 436 |
Let's begin the thinking process for Task {task_id}.
|
| 437 |
"""
|
| 438 |
-
|
| 439 |
-
submitted_answer = f"Error: Agent failed for task {task_id}" # Default error
|
| 440 |
try:
|
| 441 |
-
# Führe den Agenten aus
|
| 442 |
-
agent_response = agent_instance.run(prompt=agent_prompt)
|
| 443 |
|
| 444 |
if agent_response:
|
| 445 |
-
# Einfache Bereinigung: Entferne häufige Präfixe und überflüssige Leerzeichen
|
| 446 |
-
# Manchmal geben Modelle trotz Anweisung Präfixe aus.
|
| 447 |
cleaned_response = re.sub(r"^(Final Answer:|Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
|
| 448 |
-
# Entferne auch Anführungszeichen am Anfang/Ende, falls das Modell sie hinzufügt
|
| 449 |
cleaned_response = cleaned_response.strip('"').strip("'")
|
| 450 |
submitted_answer = cleaned_response if cleaned_response else "Error: Agent returned empty response after cleaning."
|
| 451 |
else:
|
| 452 |
submitted_answer = "Error: Agent returned an empty or None response."
|
| 453 |
-
|
| 454 |
-
|
| 455 |
print(f"Task {task_id} completed. Submitted Answer: '{submitted_answer}'")
|
| 456 |
|
| 457 |
-
# Handle specific exceptions if needed, e.g., RateLimitError from HF
|
| 458 |
except Exception as e:
|
| 459 |
error_msg = f"AGENT_RUN_ERROR on task {task_id} ({type(e).__name__}): {e}"
|
| 460 |
print(error_msg)
|
| 461 |
-
|
|
|
|
| 462 |
|
| 463 |
finally:
|
| 464 |
-
# Füge das Ergebnis (oder den Fehler) hinzu
|
| 465 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 466 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 467 |
-
# Optional: Sofortige Bereinigung (kann Laufzeit verlängern)
|
| 468 |
-
# cleanup_temp_files()
|
| 469 |
|
| 470 |
-
end_time = datetime.now()
|
| 471 |
-
duration = end_time - start_time
|
| 472 |
print(f"Agent processing finished in {duration}.")
|
| 473 |
progress(0.9, desc="Submitting answers...")
|
| 474 |
|
| 475 |
-
# 4. Prepare Submission
|
| 476 |
if not answers_payload:
|
| 477 |
-
print("Agent did not produce any answers to submit.")
|
| 478 |
-
cleanup_temp_files() # Aufräumen
|
| 479 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 480 |
-
|
| 481 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 482 |
-
|
| 483 |
-
print(status_update)
|
| 484 |
-
|
| 485 |
-
# 5. Submit
|
| 486 |
-
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 487 |
-
final_status = "Submission attempt finished." # Default status
|
| 488 |
-
results_df = pd.DataFrame(results_log) # Erstelle DataFrame vor dem Try-Block
|
| 489 |
|
|
|
|
|
|
|
|
|
|
| 490 |
try:
|
| 491 |
response = requests.post(submit_url, json=submission_data, timeout=120)
|
| 492 |
response.raise_for_status()
|
| 493 |
result_data = response.json()
|
| 494 |
-
final_status = (
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
f"Score: {result_data.get('score', 'N/A'):.2f}% "
|
| 498 |
-
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 499 |
-
f"Message: {result_data.get('message', 'No message received.')}"
|
| 500 |
-
)
|
| 501 |
print("Submission successful.")
|
| 502 |
except requests.exceptions.HTTPError as e:
|
| 503 |
-
error_detail = f"
|
|
|
|
| 504 |
try:
|
| 505 |
-
error_json = e.response.json()
|
| 506 |
-
api_error =
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
print(final_status)
|
| 515 |
-
except requests.exceptions.Timeout:
|
| 516 |
-
final_status = "Submission Failed: The request timed out."
|
| 517 |
-
print(final_status)
|
| 518 |
-
except requests.exceptions.RequestException as e:
|
| 519 |
-
final_status = f"Submission Failed: Network error - {e}"
|
| 520 |
-
print(final_status)
|
| 521 |
-
except Exception as e:
|
| 522 |
-
final_status = f"An unexpected error occurred during submission ({type(e).__name__}): {e}"
|
| 523 |
-
print(final_status)
|
| 524 |
-
finally:
|
| 525 |
-
cleanup_temp_files() # Stelle sicher, dass aufgeräumt wird
|
| 526 |
|
| 527 |
progress(1, desc="Done.")
|
| 528 |
return final_status, results_df
|
| 529 |
|
| 530 |
|
| 531 |
-
# --- Gradio Interface (
|
| 532 |
with gr.Blocks() as demo:
|
| 533 |
-
gr.Markdown("# Smol
|
| 534 |
-
gr.Markdown(
|
| 535 |
-
f"""
|
| 536 |
**Instructions:**
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
5. Click 'Run Evaluation & Submit'. **This will take time.** Monitor progress below and console logs.
|
| 543 |
-
|
| 544 |
---
|
| 545 |
-
**Agent Details:**
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
"""
|
| 549 |
-
)
|
| 550 |
-
|
| 551 |
-
# Platzhalter für Login-Status (vereinfacht)
|
| 552 |
-
# Gradio's LoginButton handhabt das meiste intern
|
| 553 |
-
# profile_info = gr.State(None) # Nicht unbedingt nötig, wenn LoginButton direkt genutzt wird
|
| 554 |
-
|
| 555 |
-
with gr.Row():
|
| 556 |
-
login_button = gr.LoginButton()
|
| 557 |
-
# Logout nicht direkt implementiert, User kann sich auf HF ausloggen
|
| 558 |
-
|
| 559 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 560 |
-
|
| 561 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
|
| 562 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
|
| 563 |
|
| 564 |
-
# --- Event Handler für den Button ---
|
| 565 |
-
# Diese Funktion wird aufgerufen, wenn der Button geklickt wird.
|
| 566 |
-
# Sie erhält das OAuth-Profil, wenn der User eingeloggt ist.
|
| 567 |
def handle_run(request: gr.Request):
|
| 568 |
-
# Das Profil wird aus dem Request-Objekt extrahiert, wenn eingeloggt
|
| 569 |
profile = getattr(request, 'profile', None)
|
| 570 |
-
# Rufe die Hauptfunktion auf und gib ihre Ausgaben zurück
|
| 571 |
-
# Füge das gr.Progress() Objekt hinzu, das von Gradio verwaltet wird
|
| 572 |
return run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True))
|
| 573 |
|
| 574 |
-
run_button.click(
|
| 575 |
-
fn=handle_run, # Verwende die Wrapper-Funktion
|
| 576 |
-
inputs=[], # Keine expliziten Inputs nötig, Profil kommt vom Request
|
| 577 |
-
outputs=[status_output, results_table],
|
| 578 |
-
api_name="run_evaluation_smol"
|
| 579 |
-
)
|
| 580 |
-
|
| 581 |
|
| 582 |
# --- App Start (unverändert) ---
|
| 583 |
if __name__ == "__main__":
|
| 584 |
-
print("\n" + "-"*30 + " App Starting (Smol
|
|
|
|
| 585 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 586 |
space_id_startup = os.getenv("SPACE_ID")
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
if space_id_startup:
|
| 595 |
-
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 596 |
-
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 597 |
-
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 598 |
-
else:
|
| 599 |
-
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 600 |
-
|
| 601 |
-
print(f" Using HF Model via Inference API: {HF_MODEL_ID}")
|
| 602 |
-
search_tool_status = 'Disabled'
|
| 603 |
if USE_TAVILY: search_tool_status = 'Tavily'
|
| 604 |
elif USE_DUCKDUCKGO: search_tool_status = 'DuckDuckGo'
|
| 605 |
print(f" Search Tool: {search_tool_status}")
|
| 606 |
print(f" PDF Reading: {'Enabled' if PDF_READER_AVAILABLE else 'Disabled (PyPDF2 missing)'}")
|
| 607 |
-
print("-"*(60 + len(" App Starting (Smol
|
| 608 |
-
|
| 609 |
-
print("Launching Gradio Interface for Smol Agents Evaluation...")
|
| 610 |
-
# Setze queue=True für bessere Handhabung langer Läufe
|
| 611 |
demo.queue().launch(debug=False, share=False)
|
|
|
|
| 5 |
import re
|
| 6 |
from datetime import datetime
|
| 7 |
import time
|
| 8 |
+
import tempfile
|
| 9 |
+
import atexit
|
| 10 |
+
import sys # Für sys.exit bei Importfehlern
|
| 11 |
|
| 12 |
+
# --- Smol Agents und HF Imports (angepasst an Beispiel) ---
|
| 13 |
try:
|
| 14 |
+
# Verwende CodeAgent und HfApiModel wie im Beispiel
|
| 15 |
+
from smolagents import CodeAgent, tool, HfApiModel
|
| 16 |
+
print("Successfully imported CodeAgent, tool, HfApiModel from 'smolagents'")
|
|
|
|
| 17 |
except ImportError as e:
|
| 18 |
print(f"Error importing from smolagents: {e}")
|
| 19 |
print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
|
| 20 |
+
sys.exit(f"Fatal Error: Could not import smolagents components. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
# huggingface_hub wird möglicherweise von HfApiModel intern genutzt
|
| 23 |
+
from huggingface_hub import HfApi
|
| 24 |
|
| 25 |
+
# --- Suchtool Imports (wie zuvor) ---
|
| 26 |
+
USE_TAVILY = False
|
| 27 |
+
USE_DUCKDUCKGO = True
|
| 28 |
|
| 29 |
if USE_TAVILY:
|
| 30 |
try:
|
| 31 |
from tavily import TavilyClient
|
| 32 |
except ImportError:
|
| 33 |
+
print("WARNUNG: TavilyClient nicht installiert.")
|
| 34 |
USE_TAVILY = False
|
| 35 |
USE_DUCKDUCKGO = True # Fallback
|
| 36 |
if USE_DUCKDUCKGO:
|
| 37 |
try:
|
| 38 |
from duckduckgo_search import DDGS
|
| 39 |
except ImportError:
|
| 40 |
+
print("WARNUNG: duckduckgo-search nicht installiert.")
|
| 41 |
USE_DUCKDUCKGO = False
|
| 42 |
|
| 43 |
+
# --- PDF Reader Import (wie zuvor) ---
|
| 44 |
try:
|
| 45 |
import PyPDF2
|
| 46 |
PDF_READER_AVAILABLE = True
|
|
|
|
| 50 |
|
| 51 |
# --- Konstanten ---
|
| 52 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 53 |
+
# HfApiModel liest dies wahrscheinlich aus der Umgebungsvariable HF_MODEL_ID
|
| 54 |
+
# oder hat einen internen Default. Wir setzen sie weiterhin als Fallback/Info.
|
| 55 |
+
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
| 56 |
|
| 57 |
+
# --- Globale Variablen ---
|
| 58 |
+
# search_client wird weiterhin benötigt, da die Tools darauf zugreifen
|
| 59 |
search_client = None
|
| 60 |
+
# agent_instance wird pro Lauf initialisiert
|
| 61 |
+
agent_instance = None
|
| 62 |
|
| 63 |
+
# --- Temporäre Datei Verwaltung (wie zuvor) ---
|
| 64 |
temp_files_to_clean = set()
|
|
|
|
| 65 |
def cleanup_temp_files():
|
| 66 |
+
# (Code unverändert)
|
| 67 |
print("Cleaning up temporary files...")
|
| 68 |
+
for file_path in list(temp_files_to_clean):
|
| 69 |
try:
|
| 70 |
if os.path.exists(file_path):
|
| 71 |
os.remove(file_path)
|
| 72 |
print(f"Removed temporary file: {file_path}")
|
| 73 |
+
if file_path in temp_files_to_clean:
|
| 74 |
temp_files_to_clean.remove(file_path)
|
| 75 |
except OSError as e:
|
| 76 |
print(f"Error removing temporary file {file_path}: {e}")
|
| 77 |
except KeyError:
|
| 78 |
print(f"Warning: File path {file_path} already removed from cleanup set.")
|
|
|
|
|
|
|
| 79 |
atexit.register(cleanup_temp_files)
|
| 80 |
|
| 81 |
+
# --- Tool Definitionen (unverändert) ---
|
| 82 |
+
# Die @tool-Deklarationen und Funktionslogik bleiben gleich.
|
| 83 |
|
| 84 |
@tool
|
| 85 |
def search_web(query: str, max_results: int = 3) -> str:
|
|
|
|
| 92 |
Returns:
|
| 93 |
str: A string containing the search results, or an error message.
|
| 94 |
"""
|
| 95 |
+
# (Code unverändert)
|
| 96 |
print(f"Tool: search_web(query='{query}', max_results={max_results})")
|
| 97 |
if not search_client:
|
| 98 |
return "Search tool is not available/configured."
|
|
|
|
| 107 |
if not results: return "No search results found."
|
| 108 |
return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
|
| 109 |
else:
|
|
|
|
| 110 |
return "No compatible search client configured or available."
|
| 111 |
except Exception as e:
|
| 112 |
print(f"Search API Error ({type(e).__name__}): {e}")
|
|
|
|
| 123 |
str: The local path to the downloaded file (e.g., '/tmp/gaia_task_abc-123.pdf') if successful,
|
| 124 |
otherwise an error message starting with 'Error:'.
|
| 125 |
"""
|
| 126 |
+
# (Code unverändert)
|
| 127 |
print(f"Tool: download_task_file(task_id='{task_id}')")
|
| 128 |
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
|
|
|
| 129 |
try:
|
| 130 |
response = requests.get(file_url, stream=True, timeout=30)
|
| 131 |
response.raise_for_status()
|
|
|
|
| 132 |
content_type = response.headers.get('content-type', '').lower()
|
| 133 |
suffix = ".tmp"
|
| 134 |
if 'pdf' in content_type: suffix = ".pdf"
|
|
|
|
| 136 |
elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
|
| 137 |
elif 'csv' in content_type: suffix = ".csv"
|
| 138 |
elif 'plain' in content_type or 'text' in content_type: suffix = ".txt"
|
|
|
|
|
|
|
| 139 |
temp_dir = tempfile.gettempdir()
|
| 140 |
safe_task_id = re.sub(r'[^\w\-]+', '_', task_id)
|
|
|
|
| 141 |
timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
|
| 142 |
temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}_{timestamp}{suffix}")
|
|
|
|
|
|
|
| 143 |
with open(temp_file_path, 'wb') as f:
|
| 144 |
for chunk in response.iter_content(chunk_size=8192):
|
| 145 |
f.write(chunk)
|
|
|
|
| 146 |
print(f"File downloaded successfully to {temp_file_path}")
|
| 147 |
temp_files_to_clean.add(temp_file_path)
|
| 148 |
+
return temp_file_path
|
|
|
|
| 149 |
except requests.exceptions.HTTPError as e:
|
| 150 |
if e.response.status_code == 404:
|
| 151 |
print(f"No file found on server for task_id {task_id}.")
|
| 152 |
+
return "Error: No file found for this task ID."
|
| 153 |
else:
|
| 154 |
print(f"HTTP Error downloading file for task {task_id}: {e}")
|
| 155 |
+
return f"Error: Failed to download file (HTTP {e.response.status_code})."
|
| 156 |
except requests.exceptions.RequestException as e:
|
| 157 |
print(f"Network Error downloading file for task {task_id}: {e}")
|
| 158 |
+
return f"Error: Failed to download file due to network issue: {e}"
|
| 159 |
except Exception as e:
|
| 160 |
print(f"Unexpected error downloading file for task {task_id} ({type(e).__name__}): {e}")
|
| 161 |
+
return f"Error: Unexpected error during file download: {e}"
|
| 162 |
|
| 163 |
@tool
|
| 164 |
def read_file_content(file_path: str) -> str:
|
|
|
|
| 170 |
Returns:
|
| 171 |
str: The extracted text content (truncated if very long), or an error message starting with 'Error:'.
|
| 172 |
"""
|
| 173 |
+
# (Code weitgehend unverändert, ggf. kleine Optimierungen wie zuvor)
|
| 174 |
print(f"Tool: read_file_content(file_path='{file_path}')")
|
|
|
|
|
|
|
| 175 |
if not isinstance(file_path, str) or not os.path.isabs(file_path):
|
| 176 |
print(f"Invalid input for read_file_content: '{file_path}'. Expected an absolute file path.")
|
| 177 |
return "Error: Invalid input. Provide the absolute file path returned by download_task_file."
|
|
|
|
|
|
|
| 178 |
if not file_path.startswith(tempfile.gettempdir()):
|
| 179 |
print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
|
| 180 |
return "Error: Invalid file path provided. Only downloaded files can be read."
|
|
|
|
| 181 |
if not os.path.exists(file_path):
|
| 182 |
print(f"Error: File not found at path: {file_path}")
|
| 183 |
+
return f"Error: File not found at the specified path '{os.path.basename(file_path)}'."
|
|
|
|
| 184 |
try:
|
| 185 |
file_size = os.path.getsize(file_path)
|
| 186 |
print(f"Reading file: {os.path.basename(file_path)}, Size: {file_size} bytes")
|
| 187 |
if file_size == 0:
|
| 188 |
print(f"Warning: File {os.path.basename(file_path)} is empty.")
|
| 189 |
return f"Observation: The file '{os.path.basename(file_path)}' is empty."
|
|
|
|
| 190 |
if file_path.lower().endswith(".pdf"):
|
| 191 |
+
if not PDF_READER_AVAILABLE: return "Error: Cannot read PDF file because PyPDF2 library is not installed."
|
|
|
|
| 192 |
text = ""
|
| 193 |
with open(file_path, 'rb') as f:
|
| 194 |
reader = PyPDF2.PdfReader(f)
|
| 195 |
num_pages = len(reader.pages)
|
| 196 |
print(f"Reading {num_pages} pages from PDF...")
|
| 197 |
for page_num in range(num_pages):
|
|
|
|
| 198 |
if reader.pages[page_num].extract_text():
|
| 199 |
page_text = reader.pages[page_num].extract_text()
|
| 200 |
+
text += page_text + "\n"
|
| 201 |
+
if len(text) > 7000:
|
| 202 |
text = text[:7000] + "\n... (content truncated)"
|
| 203 |
print(f"Text truncated at {len(text)} chars.")
|
| 204 |
break
|
|
|
|
| 207 |
return f"Observation: Could not extract any text content from the PDF file '{os.path.basename(file_path)}'."
|
| 208 |
print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
|
| 209 |
return f"Content of '{os.path.basename(file_path)}':\n{text}"
|
|
|
|
| 210 |
elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
|
| 211 |
print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
|
| 212 |
return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content with this tool."
|
| 213 |
+
else:
|
|
|
|
|
|
|
| 214 |
content = ""
|
| 215 |
+
chunk_size = 4096; max_len = 7000; truncated = False
|
|
|
|
|
|
|
| 216 |
try:
|
| 217 |
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 218 |
while len(content) < max_len:
|
| 219 |
+
chunk = f.read(chunk_size);
|
| 220 |
+
if not chunk: break
|
|
|
|
| 221 |
content += chunk
|
| 222 |
+
if len(content) > max_len: content = content[:max_len]; truncated = True
|
|
|
|
|
|
|
| 223 |
print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
|
| 224 |
result = f"Content of '{os.path.basename(file_path)}':\n{content}"
|
| 225 |
+
if truncated: result += "\n... (content truncated)"
|
|
|
|
| 226 |
return result
|
| 227 |
+
except Exception as read_err:
|
| 228 |
print(f"Error reading file {file_path} as text: {read_err}")
|
| 229 |
return f"Error: Failed to read file '{os.path.basename(file_path)}' as text: {read_err}"
|
|
|
|
|
|
|
| 230 |
except Exception as e:
|
| 231 |
print(f"Error reading file {file_path} ({type(e).__name__}): {e}")
|
| 232 |
return f"Error: Failed to read file content: {e}"
|
| 233 |
|
| 234 |
|
| 235 |
+
# --- Agent Initialisierung (VEREINFACHT) ---
|
| 236 |
def initialize_agent():
|
| 237 |
+
"""Initialisiert den smolagents CodeAgent und die Clients."""
|
| 238 |
+
global search_client, agent_instance
|
| 239 |
print("Initializing agent and clients...")
|
| 240 |
|
| 241 |
+
# Initialisiere Search Client (wenn nicht bereits geschehen)
|
| 242 |
+
if search_client is None:
|
| 243 |
+
print("Initializing search client...")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
if USE_TAVILY:
|
| 245 |
tavily_key = os.getenv("TAVILY_API_KEY")
|
| 246 |
if tavily_key:
|
| 247 |
+
try: search_client = TavilyClient(api_key=tavily_key); print("Using Tavily for search.")
|
| 248 |
+
except NameError: print("WARNUNG: TavilyClient Klasse nicht gefunden."); search_client = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
else:
|
| 250 |
+
print("WARNUNG: TAVILY_API_KEY nicht gefunden.")
|
| 251 |
+
if USE_DUCKDUCKGO: # Fallback nur wenn Tavily nicht initialisiert werden konnte
|
| 252 |
+
try: search_client = DDGS(); print("Falling back to DuckDuckGo for search.")
|
| 253 |
+
except NameError: print("WARNUNG: DuckDuckGo nicht verfügbar."); search_client = None
|
| 254 |
+
else: search_client = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
elif USE_DUCKDUCKGO:
|
| 256 |
+
try: search_client = DDGS(); print("Using DuckDuckGo for search.")
|
| 257 |
+
except NameError: print("WARNUNG: duckduckgo-search nicht installiert/verfügbar."); search_client = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
else:
|
|
|
|
| 259 |
print("Web search is disabled by configuration.")
|
| 260 |
+
search_client = False # Setze auf False, um erneute Initialisierung zu verhindern
|
| 261 |
+
|
| 262 |
+
# --- LLM Model (vereinfacht via HfApiModel) ---
|
| 263 |
+
# HfApiModel sollte HUGGINGFACE_TOKEN und HF_MODEL_ID aus Umgebungsvariablen lesen.
|
| 264 |
+
# Wir prüfen hier nur, ob das Token vorhanden ist, da HfApiModel es benötigt.
|
| 265 |
+
hf_token_check = os.getenv("HUGGINGFACE_TOKEN")
|
| 266 |
+
if not hf_token_check:
|
| 267 |
+
raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden! HfApiModel benötigt dies.")
|
| 268 |
+
|
| 269 |
+
# Optional: Explizit Model ID übergeben, falls Umgebungsvariable nicht reicht
|
| 270 |
+
model_config = {}
|
| 271 |
+
if HF_MODEL_ID:
|
| 272 |
+
# Prüfe, ob HfApiModel 'model_id' als Argument akzeptiert (Annahme: ja)
|
| 273 |
+
# Falls nicht, muss man sich auf die Umgebungsvariable verlassen.
|
| 274 |
+
# Man könnte hier versuchen, das Modell explizit zu setzen:
|
| 275 |
+
# model_config['model_id'] = HF_MODEL_ID
|
| 276 |
+
# Wir versuchen es erstmal ohne explizite Übergabe:
|
| 277 |
+
print(f"HfApiModel will attempt to use model specified by HF_MODEL_ID env var (or its default): {HF_MODEL_ID}")
|
| 278 |
+
# Man kann auch Parameter direkt übergeben, falls unterstützt:
|
| 279 |
+
# model_config['max_new_tokens'] = 1500
|
| 280 |
+
# model_config['temperature'] = 0.1
|
| 281 |
+
|
| 282 |
+
hf_model = HfApiModel(**model_config) # Initialisiere mit optionalen Configs
|
| 283 |
|
| 284 |
# --- Agent Instanz ---
|
| 285 |
available_tools = [search_web, download_task_file, read_file_content]
|
| 286 |
+
active_tools = [t for t in available_tools if t is not None] # Filter out None tools (falls search nicht ging)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
+
# Verwende CodeAgent wie im Beispiel
|
| 289 |
+
agent_instance = CodeAgent(
|
| 290 |
+
tools=active_tools,
|
| 291 |
+
model=hf_model
|
| 292 |
)
|
| 293 |
+
print(f"Smol CodeAgent initialized with {len(active_tools)} tools and HfApiModel.")
|
| 294 |
if len(active_tools) < len(available_tools):
|
| 295 |
+
print(f"Warning: Some tools might be inactive.")
|
| 296 |
|
| 297 |
|
| 298 |
+
# --- Hauptfunktion run_and_submit_all (weitgehend unverändert) ---
|
| 299 |
+
# Die Logik zum Holen der Fragen, Iterieren, Prompt erstellen, Agent aufrufen,
|
| 300 |
+
# Antworten sammeln und Submitten bleibt gleich. Nur die Initialisierung oben ist anders.
|
| 301 |
def run_and_submit_all( profile: gr.OAuthProfile | None, progress=gr.Progress(track_tqdm=True)):
|
| 302 |
"""
|
| 303 |
+
Fetches all questions, runs the smolagents CodeAgent on them, submits all answers,
|
| 304 |
and displays the results. Includes Gradio progress tracking.
|
| 305 |
"""
|
| 306 |
space_id = os.getenv("SPACE_ID")
|
| 307 |
+
if not profile:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
print("User not logged in.")
|
|
|
|
| 309 |
return "Please Login to Hugging Face with the button.", None
|
| 310 |
+
username = f"{profile.username}"
|
| 311 |
+
print(f"User logged in: {username}")
|
| 312 |
|
| 313 |
api_url = DEFAULT_API_URL
|
| 314 |
questions_url = f"{api_url}/questions"
|
| 315 |
submit_url = f"{api_url}/submit"
|
| 316 |
|
| 317 |
+
# 1. Initialisiere Agent (vereinfacht)
|
| 318 |
progress(0, desc="Initializing Agent...")
|
| 319 |
try:
|
| 320 |
initialize_agent()
|
| 321 |
+
if not agent_instance: raise RuntimeError("Agent instance could not be initialized.")
|
| 322 |
+
except ValueError as e: return f"Configuration Error: {e}", None
|
| 323 |
+
except Exception as e: return f"Error initializing agent ({type(e).__name__}): {e}", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
|
| 325 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
|
| 326 |
print(f"Agent Code Link: {agent_code}")
|
|
|
|
| 337 |
return "Fetched questions list is empty or invalid format.", None
|
| 338 |
num_questions = len(questions_data)
|
| 339 |
print(f"Fetched {num_questions} questions.")
|
| 340 |
+
except Exception as e: return f"Error fetching questions ({type(e).__name__}): {e}", None
|
|
|
|
|
|
|
| 341 |
|
| 342 |
+
# 3. Run your Smol CodeAgent
|
|
|
|
| 343 |
start_time = datetime.now()
|
| 344 |
results_log = []
|
| 345 |
answers_payload = []
|
| 346 |
+
print(f"Running smol CodeAgent on {num_questions} questions using HfApiModel...")
|
| 347 |
|
| 348 |
+
for i, item in enumerate(progress.tqdm(questions_data, desc="Processing Questions")):
|
|
|
|
|
|
|
| 349 |
task_id = item.get("task_id")
|
| 350 |
question_text = item.get("question")
|
|
|
|
| 351 |
if not task_id or question_text is None:
|
| 352 |
print(f"Skipping item {i+1} with missing task_id or question: {item}")
|
| 353 |
continue
|
| 354 |
|
| 355 |
+
# --- Prompt für smolagents (unverändert) ---
|
| 356 |
agent_prompt = f"""
|
| 357 |
You are an expert AI assistant solving a challenge question based on the GAIA benchmark.
|
| 358 |
Your task is to answer the following question accurately and concisely.
|
|
|
|
| 377 |
|
| 378 |
Let's begin the thinking process for Task {task_id}.
|
| 379 |
"""
|
| 380 |
+
submitted_answer = f"Error: Agent failed for task {task_id}"
|
|
|
|
| 381 |
try:
|
| 382 |
+
# Führe den Agenten aus
|
| 383 |
+
agent_response = agent_instance.run(prompt=agent_prompt) # Der Aufruf bleibt gleich
|
| 384 |
|
| 385 |
if agent_response:
|
|
|
|
|
|
|
| 386 |
cleaned_response = re.sub(r"^(Final Answer:|Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
|
|
|
|
| 387 |
cleaned_response = cleaned_response.strip('"').strip("'")
|
| 388 |
submitted_answer = cleaned_response if cleaned_response else "Error: Agent returned empty response after cleaning."
|
| 389 |
else:
|
| 390 |
submitted_answer = "Error: Agent returned an empty or None response."
|
|
|
|
|
|
|
| 391 |
print(f"Task {task_id} completed. Submitted Answer: '{submitted_answer}'")
|
| 392 |
|
|
|
|
| 393 |
except Exception as e:
|
| 394 |
error_msg = f"AGENT_RUN_ERROR on task {task_id} ({type(e).__name__}): {e}"
|
| 395 |
print(error_msg)
|
| 396 |
+
# Hier könnte man spezifischere Fehler von HfApiModel abfangen, falls bekannt
|
| 397 |
+
submitted_answer = f"ERROR: Agent failed ({type(e).__name__})"
|
| 398 |
|
| 399 |
finally:
|
|
|
|
| 400 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 401 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
|
|
|
|
|
|
| 402 |
|
| 403 |
+
end_time = datetime.now(); duration = end_time - start_time
|
|
|
|
| 404 |
print(f"Agent processing finished in {duration}.")
|
| 405 |
progress(0.9, desc="Submitting answers...")
|
| 406 |
|
| 407 |
+
# 4. Prepare Submission (unverändert)
|
| 408 |
if not answers_payload:
|
| 409 |
+
print("Agent did not produce any answers to submit."); cleanup_temp_files()
|
|
|
|
| 410 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
|
| 411 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 412 |
+
print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
|
| 414 |
+
# 5. Submit (unverändert)
|
| 415 |
+
final_status = "Submission attempt finished."
|
| 416 |
+
results_df = pd.DataFrame(results_log)
|
| 417 |
try:
|
| 418 |
response = requests.post(submit_url, json=submission_data, timeout=120)
|
| 419 |
response.raise_for_status()
|
| 420 |
result_data = response.json()
|
| 421 |
+
final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
|
| 422 |
+
f"Score: {result_data.get('score', 'N/A'):.2f}% ({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 423 |
+
f"Message: {result_data.get('message', 'No message received.')}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
print("Submission successful.")
|
| 425 |
except requests.exceptions.HTTPError as e:
|
| 426 |
+
error_detail = f"... {e.response.status_code}." # Gekürzte Fehlermeldung für Code-Lesbarkeit
|
| 427 |
+
# (Vollständige Fehlerbehandlung wie zuvor)
|
| 428 |
try:
|
| 429 |
+
error_json = e.response.json(); api_error = error_json.get('detail', e.response.text)
|
| 430 |
+
if isinstance(api_error, list) and len(api_error) > 0: error_detail += f" Detail: {api_error[0].get('msg', str(api_error))}"
|
| 431 |
+
else: error_detail += f" Detail: {str(api_error)}"
|
| 432 |
+
except: error_detail += f" Response: {e.response.text[:200]}"
|
| 433 |
+
final_status = f"Submission Failed: {error_detail}"; print(final_status)
|
| 434 |
+
except requests.exceptions.Timeout: final_status = "Submission Failed: Timeout."; print(final_status)
|
| 435 |
+
except requests.exceptions.RequestException as e: final_status = f"Submission Failed: Network error - {e}"; print(final_status)
|
| 436 |
+
except Exception as e: final_status = f"Submission Failed: Unexpected error ({type(e).__name__}): {e}"; print(final_status)
|
| 437 |
+
finally: cleanup_temp_files()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
|
| 439 |
progress(1, desc="Done.")
|
| 440 |
return final_status, results_df
|
| 441 |
|
| 442 |
|
| 443 |
+
# --- Gradio Interface (weitgehend unverändert) ---
|
| 444 |
with gr.Blocks() as demo:
|
| 445 |
+
gr.Markdown("# Smol CodeAgent Evaluation Runner (Hugging Face)") # Titel angepasst
|
| 446 |
+
gr.Markdown(f"""
|
|
|
|
| 447 |
**Instructions:**
|
| 448 |
+
1. Ensure `HUGGINGFACE_TOKEN` is a Secret. Add `TAVILY_API_KEY` if using Tavily.
|
| 449 |
+
2. Verify `requirements.txt` includes `smolagents[huggingface]`, etc.
|
| 450 |
+
3. Agent uses `CodeAgent` with `HfApiModel`. Target Model (via env var or default): **{HF_MODEL_ID}**.
|
| 451 |
+
4. Log in below.
|
| 452 |
+
5. Click 'Run Evaluation & Submit'. Expect a potentially long runtime.
|
|
|
|
|
|
|
| 453 |
---
|
| 454 |
+
**Agent Details:** Uses `smolagents.CodeAgent`. Search: {'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}.
|
| 455 |
+
""")
|
| 456 |
+
with gr.Row(): login_button = gr.LoginButton()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
|
|
|
| 458 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
|
| 459 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
|
| 460 |
|
|
|
|
|
|
|
|
|
|
| 461 |
def handle_run(request: gr.Request):
|
|
|
|
| 462 |
profile = getattr(request, 'profile', None)
|
|
|
|
|
|
|
| 463 |
return run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True))
|
| 464 |
|
| 465 |
+
run_button.click(fn=handle_run, inputs=[], outputs=[status_output, results_table], api_name="run_evaluation_smol_codeagent")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
# --- App Start (unverändert) ---
|
| 468 |
if __name__ == "__main__":
|
| 469 |
+
print("\n" + "-"*30 + " App Starting (Smol CodeAgent Version) " + "-"*30)
|
| 470 |
+
# (Rest des Startblocks unverändert)
|
| 471 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 472 |
space_id_startup = os.getenv("SPACE_ID")
|
| 473 |
+
if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup} -> Runtime URL: https://{space_host_startup}.hf.space")
|
| 474 |
+
else: print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 475 |
+
if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup} -> Repo: https://huggingface.co/spaces/{space_id_startup}")
|
| 476 |
+
else: print("ℹ️ SPACE_ID environment variable not found (running locally?).")
|
| 477 |
+
print(f" Using Smol CodeAgent with HfApiModel.")
|
| 478 |
+
print(f" Target HF Model (via env var or default): {HF_MODEL_ID}")
|
| 479 |
+
search_tool_status = 'Disabled';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
if USE_TAVILY: search_tool_status = 'Tavily'
|
| 481 |
elif USE_DUCKDUCKGO: search_tool_status = 'DuckDuckGo'
|
| 482 |
print(f" Search Tool: {search_tool_status}")
|
| 483 |
print(f" PDF Reading: {'Enabled' if PDF_READER_AVAILABLE else 'Disabled (PyPDF2 missing)'}")
|
| 484 |
+
print("-"*(60 + len(" App Starting (Smol CodeAgent Version) ")) + "\n")
|
| 485 |
+
print("Launching Gradio Interface for Smol CodeAgent Evaluation...")
|
|
|
|
|
|
|
| 486 |
demo.queue().launch(debug=False, share=False)
|