Update app.py
Browse files
app.py
CHANGED
|
@@ -7,11 +7,10 @@ from datetime import datetime
|
|
| 7 |
import time
|
| 8 |
import tempfile
|
| 9 |
import atexit
|
| 10 |
-
import sys
|
| 11 |
|
| 12 |
# --- Smol Agents und HF Imports (angepasst an Beispiel) ---
|
| 13 |
try:
|
| 14 |
-
# Verwende CodeAgent und HfApiModel wie im Beispiel
|
| 15 |
from smolagents import CodeAgent, tool, HfApiModel
|
| 16 |
print("Successfully imported CodeAgent, tool, HfApiModel from 'smolagents'")
|
| 17 |
except ImportError as e:
|
|
@@ -19,28 +18,26 @@ except ImportError as e:
|
|
| 19 |
print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
|
| 20 |
sys.exit(f"Fatal Error: Could not import smolagents components. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
|
| 21 |
|
| 22 |
-
# huggingface_hub wird möglicherweise von HfApiModel intern genutzt
|
| 23 |
from huggingface_hub import HfApi
|
| 24 |
|
| 25 |
-
# --- Suchtool Imports
|
| 26 |
-
USE_TAVILY = False
|
| 27 |
-
USE_DUCKDUCKGO = True
|
| 28 |
-
|
| 29 |
if USE_TAVILY:
|
| 30 |
try:
|
| 31 |
from tavily import TavilyClient
|
| 32 |
except ImportError:
|
| 33 |
print("WARNUNG: TavilyClient nicht installiert.")
|
| 34 |
USE_TAVILY = False
|
| 35 |
-
USE_DUCKDUCKGO = True
|
| 36 |
if USE_DUCKDUCKGO:
|
| 37 |
try:
|
| 38 |
from duckduckgo_search import DDGS
|
| 39 |
except ImportError:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
|
| 43 |
-
# --- PDF Reader Import
|
| 44 |
try:
|
| 45 |
import PyPDF2
|
| 46 |
PDF_READER_AVAILABLE = True
|
|
@@ -50,68 +47,49 @@ except ImportError:
|
|
| 50 |
|
| 51 |
# --- Konstanten ---
|
| 52 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 53 |
-
# HfApiModel liest dies wahrscheinlich aus der Umgebungsvariable HF_MODEL_ID
|
| 54 |
-
# oder hat einen internen Default. Wir setzen sie weiterhin als Fallback/Info.
|
| 55 |
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
| 56 |
|
| 57 |
# --- Globale Variablen ---
|
| 58 |
-
# search_client wird weiterhin benötigt, da die Tools darauf zugreifen
|
| 59 |
search_client = None
|
| 60 |
-
# agent_instance wird pro Lauf initialisiert
|
| 61 |
agent_instance = None
|
| 62 |
|
| 63 |
-
# --- Temporäre Datei Verwaltung (wie zuvor) ---
|
| 64 |
temp_files_to_clean = set()
|
|
|
|
| 65 |
def cleanup_temp_files():
|
| 66 |
-
# (Code unverändert)
|
| 67 |
print("Cleaning up temporary files...")
|
| 68 |
for file_path in list(temp_files_to_clean):
|
| 69 |
try:
|
| 70 |
if os.path.exists(file_path):
|
| 71 |
os.remove(file_path)
|
| 72 |
print(f"Removed temporary file: {file_path}")
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
except OSError as e:
|
| 76 |
print(f"Error removing temporary file {file_path}: {e}")
|
| 77 |
-
except KeyError:
|
| 78 |
-
print(f"Warning: File path {file_path} already removed from cleanup set.")
|
| 79 |
-
atexit.register(cleanup_temp_files)
|
| 80 |
|
| 81 |
-
|
| 82 |
-
# Die @tool-Deklarationen und Funktionslogik bleiben gleich.
|
| 83 |
|
|
|
|
| 84 |
@tool
|
| 85 |
def search_web(query: str, max_results: int = 3) -> str:
|
| 86 |
-
"""
|
| 87 |
-
Searches the web for the given query and returns a summary of the top results.
|
| 88 |
-
Use this to find recent information or facts not readily available.
|
| 89 |
-
Args:
|
| 90 |
-
query (str): The search query.
|
| 91 |
-
max_results (int): The maximum number of results to return (default 3).
|
| 92 |
-
Returns:
|
| 93 |
-
str: A string containing the search results, or an error message.
|
| 94 |
-
"""
|
| 95 |
-
# (Code unverändert)
|
| 96 |
print(f"Tool: search_web(query='{query}', max_results={max_results})")
|
| 97 |
if not search_client:
|
| 98 |
-
|
| 99 |
-
if search_client is False:
|
| 100 |
-
return "Search tool is disabled by configuration or missing libraries."
|
| 101 |
-
else:
|
| 102 |
-
# Sollte nicht passieren, wenn initialize_agent korrekt läuft, aber zur Sicherheit
|
| 103 |
-
print("Warning: Search client not initialized before tool use.")
|
| 104 |
-
return "Search tool is not available/configured."
|
| 105 |
try:
|
| 106 |
if USE_TAVILY and isinstance(search_client, TavilyClient):
|
| 107 |
response = search_client.search(query=query, search_depth="basic", max_results=max_results)
|
| 108 |
-
context =
|
| 109 |
-
if not context:
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
| 111 |
elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
| 115 |
else:
|
| 116 |
return "No compatible search client configured or available."
|
| 117 |
except Exception as e:
|
|
@@ -120,16 +98,6 @@ def search_web(query: str, max_results: int = 3) -> str:
|
|
| 120 |
|
| 121 |
@tool
|
| 122 |
def download_task_file(task_id: str) -> str:
|
| 123 |
-
"""
|
| 124 |
-
Downloads a file associated with a specific task ID from the evaluation server.
|
| 125 |
-
Use this ONLY if the question requires information from a specific file linked to the task.
|
| 126 |
-
Args:
|
| 127 |
-
task_id (str): The unique identifier for the task whose file needs to be downloaded.
|
| 128 |
-
Returns:
|
| 129 |
-
str: The local path to the downloaded file (e.g., '/tmp/gaia_task_abc-123.pdf') if successful,
|
| 130 |
-
otherwise an error message starting with 'Error:'.
|
| 131 |
-
"""
|
| 132 |
-
# (Code unverändert)
|
| 133 |
print(f"Tool: download_task_file(task_id='{task_id}')")
|
| 134 |
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 135 |
try:
|
|
@@ -141,381 +109,128 @@ def download_task_file(task_id: str) -> str:
|
|
| 141 |
elif 'png' in content_type: suffix = ".png"
|
| 142 |
elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
|
| 143 |
elif 'csv' in content_type: suffix = ".csv"
|
| 144 |
-
elif 'plain' in content_type or 'text' in content_type: suffix = ".txt"
|
| 145 |
temp_dir = tempfile.gettempdir()
|
| 146 |
-
|
| 147 |
timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
|
| 148 |
-
|
| 149 |
-
with open(
|
| 150 |
-
for chunk in response.iter_content(
|
| 151 |
f.write(chunk)
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
return temp_file_path
|
| 155 |
except requests.exceptions.HTTPError as e:
|
| 156 |
if e.response.status_code == 404:
|
| 157 |
-
print(f"No file found on server for task_id {task_id}.")
|
| 158 |
return "Error: No file found for this task ID."
|
| 159 |
-
|
| 160 |
-
print(f"HTTP Error downloading file for task {task_id}: {e}")
|
| 161 |
-
return f"Error: Failed to download file (HTTP {e.response.status_code})."
|
| 162 |
-
except requests.exceptions.RequestException as e:
|
| 163 |
-
print(f"Network Error downloading file for task {task_id}: {e}")
|
| 164 |
-
return f"Error: Failed to download file due to network issue: {e}"
|
| 165 |
except Exception as e:
|
| 166 |
-
print(f"Unexpected error downloading file for task {task_id} ({type(e).__name__}): {e}")
|
| 167 |
return f"Error: Unexpected error during file download: {e}"
|
| 168 |
|
| 169 |
@tool
|
| 170 |
def read_file_content(file_path: str) -> str:
|
| 171 |
-
"""
|
| 172 |
-
Reads the text content of a previously downloaded file (PDF or plain text).
|
| 173 |
-
Use this tool AFTER 'download_task_file' has successfully returned a file path (not an error message).
|
| 174 |
-
Args:
|
| 175 |
-
file_path (str): The local path to the file (must be a path returned by 'download_task_file').
|
| 176 |
-
Returns:
|
| 177 |
-
str: The extracted text content (truncated if very long), or an error message starting with 'Error:'.
|
| 178 |
-
"""
|
| 179 |
-
# (Code weitgehend unverändert, ggf. kleine Optimierungen wie zuvor)
|
| 180 |
print(f"Tool: read_file_content(file_path='{file_path}')")
|
| 181 |
-
if not
|
| 182 |
-
|
| 183 |
-
return "Error: Invalid input. Provide the absolute file path returned by download_task_file."
|
| 184 |
-
if not file_path.startswith(tempfile.gettempdir()):
|
| 185 |
-
print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
|
| 186 |
-
return "Error: Invalid file path provided. Only downloaded files can be read."
|
| 187 |
if not os.path.exists(file_path):
|
| 188 |
-
|
| 189 |
-
return f"Error: File not found at the specified path '{os.path.basename(file_path)}'."
|
| 190 |
try:
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
print(f"Warning: File {os.path.basename(file_path)} is empty.")
|
| 195 |
-
return f"Observation: The file '{os.path.basename(file_path)}' is empty."
|
| 196 |
-
if file_path.lower().endswith(".pdf"):
|
| 197 |
-
if not PDF_READER_AVAILABLE: return "Error: Cannot read PDF file because PyPDF2 library is not installed."
|
| 198 |
text = ""
|
| 199 |
with open(file_path, 'rb') as f:
|
| 200 |
reader = PyPDF2.PdfReader(f)
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
for page_num in range(num_pages):
|
| 204 |
-
if reader.pages[page_num].extract_text():
|
| 205 |
-
page_text = reader.pages[page_num].extract_text()
|
| 206 |
-
text += page_text + "\n"
|
| 207 |
if len(text) > 7000:
|
| 208 |
-
text = text[:7000] + "\n... (
|
| 209 |
-
print(f"Text truncated at {len(text)} chars.")
|
| 210 |
break
|
| 211 |
-
if not text:
|
| 212 |
-
print(f"Warning: Could not extract text from PDF: {os.path.basename(file_path)}")
|
| 213 |
-
return f"Observation: Could not extract any text content from the PDF file '{os.path.basename(file_path)}'."
|
| 214 |
-
print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
|
| 215 |
return f"Content of '{os.path.basename(file_path)}':\n{text}"
|
| 216 |
-
elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
|
| 217 |
-
print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
|
| 218 |
-
return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content with this tool."
|
| 219 |
else:
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 224 |
-
while len(content) < max_len:
|
| 225 |
-
chunk = f.read(chunk_size);
|
| 226 |
-
if not chunk: break
|
| 227 |
-
content += chunk
|
| 228 |
-
if len(content) > max_len: content = content[:max_len]; truncated = True
|
| 229 |
-
print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
|
| 230 |
-
result = f"Content of '{os.path.basename(file_path)}':\n{content}"
|
| 231 |
-
if truncated: result += "\n... (content truncated)"
|
| 232 |
-
return result
|
| 233 |
-
except Exception as read_err:
|
| 234 |
-
print(f"Error reading file {file_path} as text: {read_err}")
|
| 235 |
-
return f"Error: Failed to read file '{os.path.basename(file_path)}' as text: {read_err}"
|
| 236 |
except Exception as e:
|
| 237 |
-
|
| 238 |
-
return f"Error: Failed to read file content: {e}"
|
| 239 |
|
| 240 |
-
|
| 241 |
-
# --- Agent Initialisierung (VEREINFACHT) ---
|
| 242 |
def initialize_agent():
|
| 243 |
-
"""Initialisiert den smolagents CodeAgent und die Clients."""
|
| 244 |
global search_client, agent_instance
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
# Initialisiere Search Client (wenn nicht bereits geschehen oder fehlgeschlagen)
|
| 248 |
-
if search_client is None: # Nur initialisieren, wenn noch nicht versucht
|
| 249 |
-
print("Initializing search client...")
|
| 250 |
if USE_TAVILY:
|
| 251 |
-
|
| 252 |
-
if
|
| 253 |
-
try: search_client = TavilyClient(api_key=
|
| 254 |
-
except
|
| 255 |
else:
|
| 256 |
-
|
| 257 |
-
search_client = False # Fehler markieren
|
| 258 |
-
if USE_DUCKDUCKGO: # Fallback nur wenn Tavily nicht initialisiert werden konnte
|
| 259 |
-
try: search_client = DDGS(); print("Falling back to DuckDuckGo for search.")
|
| 260 |
-
except NameError: print("WARNUNG: DuckDuckGo nicht verfügbar."); search_client = False # Fehler markieren
|
| 261 |
elif USE_DUCKDUCKGO:
|
| 262 |
-
try: search_client = DDGS()
|
| 263 |
-
except
|
| 264 |
else:
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden! HfApiModel benötigt dies.")
|
| 272 |
-
|
| 273 |
-
print(f"HfApiModel will attempt to use model specified by HF_MODEL_ID env var (or its default): {HF_MODEL_ID}")
|
| 274 |
-
model_config = {}
|
| 275 |
-
# Optional: Parameter für HfApiModel setzen, falls nötig und unterstützt
|
| 276 |
-
# model_config['max_new_tokens'] = 1500
|
| 277 |
-
# model_config['temperature'] = 0.1
|
| 278 |
-
|
| 279 |
-
hf_model = HfApiModel(**model_config)
|
| 280 |
-
|
| 281 |
-
# --- Agent Instanz ---
|
| 282 |
-
available_tools = [search_web, download_task_file, read_file_content]
|
| 283 |
-
# Nur aktive Tools übergeben (wenn search_client nicht False ist)
|
| 284 |
-
active_tools = [t for t in available_tools if t is not None]
|
| 285 |
if search_client is False:
|
| 286 |
-
|
|
|
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
model=hf_model
|
| 291 |
-
)
|
| 292 |
-
print(f"Smol CodeAgent initialized with {len(active_tools)} tools and HfApiModel.")
|
| 293 |
-
if len(active_tools) < len(available_tools):
|
| 294 |
-
print(f"Warning: Some tools might be inactive due to configuration or missing libraries.")
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
# --- Hauptfunktion run_and_submit_all (Nimmt gr.Request) ---
|
| 298 |
-
def run_and_submit_all( request: gr.Request, progress=gr.Progress(track_tqdm=True)): # Geänderter Parameter
|
| 299 |
-
"""
|
| 300 |
-
Fetches all questions, runs the smolagents CodeAgent on them, submits all answers,
|
| 301 |
-
and displays the results. Includes Gradio progress tracking.
|
| 302 |
-
"""
|
| 303 |
-
# +++ Profil aus Request extrahieren +++
|
| 304 |
-
profile = getattr(request, 'profile', None)
|
| 305 |
-
# +++ DEBUGGING PRINT (wie zuvor) +++
|
| 306 |
-
print(f"--- Entering run_and_submit_all ---")
|
| 307 |
-
print(f"Received profile object via request: {profile}")
|
| 308 |
-
if profile:
|
| 309 |
-
print(f"Profile username: {getattr(profile, 'username', 'N/A')}")
|
| 310 |
-
# print(f"Profile details: {vars(profile) if profile else 'N/A'}") # Details können viel loggen
|
| 311 |
-
else:
|
| 312 |
-
print("Profile object via request is None.")
|
| 313 |
-
# +++ END DEBUGGING PRINT +++
|
| 314 |
-
|
| 315 |
-
space_id = os.getenv("SPACE_ID")
|
| 316 |
-
|
| 317 |
-
# *** HIER die eigentliche Prüfung ***
|
| 318 |
if not profile:
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
# Ab hier sollte der Code nur laufen, wenn profile NICHT None ist
|
| 323 |
-
username = f"{profile.username}" # Jetzt sicher, da profile nicht None ist
|
| 324 |
-
print(f"Proceeding with run for user: {username}")
|
| 325 |
-
|
| 326 |
api_url = DEFAULT_API_URL
|
| 327 |
-
questions_url = f"{api_url}/questions"
|
| 328 |
-
submit_url = f"{api_url}/submit"
|
| 329 |
-
|
| 330 |
-
# 1. Initialisiere Agent (vereinfacht)
|
| 331 |
-
progress(0, desc="Initializing Agent...")
|
| 332 |
try:
|
| 333 |
initialize_agent()
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
# 2. Fetch Questions
|
| 342 |
-
progress(0.1, desc="Fetching questions...")
|
| 343 |
-
print(f"Fetching questions from: {questions_url}")
|
| 344 |
-
try:
|
| 345 |
-
response = requests.get(questions_url, timeout=30)
|
| 346 |
-
response.raise_for_status()
|
| 347 |
-
questions_data = response.json()
|
| 348 |
-
if not questions_data or not isinstance(questions_data, list):
|
| 349 |
-
print(f"Fetched questions list is empty or invalid format: {questions_data}")
|
| 350 |
-
return "Fetched questions list is empty or invalid format.", None
|
| 351 |
-
num_questions = len(questions_data)
|
| 352 |
-
print(f"Fetched {num_questions} questions.")
|
| 353 |
-
except Exception as e: return f"Error fetching questions ({type(e).__name__}): {e}", None
|
| 354 |
-
|
| 355 |
-
# 3. Run your Smol CodeAgent
|
| 356 |
-
start_time = datetime.now()
|
| 357 |
-
results_log = []
|
| 358 |
-
answers_payload = []
|
| 359 |
-
print(f"Running smol CodeAgent on {num_questions} questions using HfApiModel...")
|
| 360 |
-
|
| 361 |
-
for i, item in enumerate(progress.tqdm(questions_data, desc="Processing Questions")):
|
| 362 |
task_id = item.get("task_id")
|
| 363 |
-
|
| 364 |
-
if not task_id or
|
| 365 |
-
print(f"Skipping item {i+1} with missing task_id or question: {item}")
|
| 366 |
continue
|
| 367 |
-
|
| 368 |
-
# --- Prompt für smolagents (unverändert) ---
|
| 369 |
-
agent_prompt = f"""
|
| 370 |
-
You are an expert AI assistant solving a challenge question based on the GAIA benchmark.
|
| 371 |
-
Your task is to answer the following question accurately and concisely.
|
| 372 |
-
Use the available tools ONLY when necessary to find information or access required files.
|
| 373 |
-
Think step-by-step before deciding on an action or the final answer.
|
| 374 |
-
|
| 375 |
-
**Available Tools:** (These are the functions you can call)
|
| 376 |
-
* `search_web(query: str, max_results: int = 3)`: Searches the web for information.
|
| 377 |
-
* `download_task_file(task_id: str)`: Downloads the specific file FOR THIS TASK ONLY. Use the task_id '{task_id}'. Returns the local file path needed for 'read_file_content'.
|
| 378 |
-
* `read_file_content(file_path: str)`: Reads text from a file previously downloaded with 'download_task_file'. Requires the exact file path returned by that tool.
|
| 379 |
-
|
| 380 |
-
**Current Task:**
|
| 381 |
-
* Task ID: {task_id}
|
| 382 |
-
* Question: {question_text}
|
| 383 |
-
|
| 384 |
-
**Instructions & Output Format:**
|
| 385 |
-
1. Carefully analyze the question.
|
| 386 |
-
2. Think step-by-step. Outline your plan if needed.
|
| 387 |
-
3. Execute tools sequentially if information depends on previous steps (e.g., download then read).
|
| 388 |
-
4. Review the gathered information and your reasoning.
|
| 389 |
-
5. **Crucially**: Provide ONLY the final answer. Do not include your reasoning, steps, tool calls, introductions (like "The answer is..."), or any other conversational text in the final output. The answer must be exact and stand-alone. Format it as requested by the question (e.g., just a number, a comma-separated list 'apple,banana,orange', etc.).
|
| 390 |
-
|
| 391 |
-
Let's begin the thinking process for Task {task_id}.
|
| 392 |
-
"""
|
| 393 |
-
submitted_answer = f"Error: Agent failed for task {task_id}"
|
| 394 |
try:
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
if agent_response:
|
| 399 |
-
cleaned_response = re.sub(r"^(Final Answer:|Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
|
| 400 |
-
cleaned_response = cleaned_response.strip('"').strip("'")
|
| 401 |
-
submitted_answer = cleaned_response if cleaned_response else "Error: Agent returned empty response after cleaning."
|
| 402 |
-
else:
|
| 403 |
-
submitted_answer = "Error: Agent returned an empty or None response."
|
| 404 |
-
# Kurze Pause nach jedem Agentenlauf, um Rate Limits etc. zu vermeiden (optional)
|
| 405 |
-
# time.sleep(0.5)
|
| 406 |
-
|
| 407 |
except Exception as e:
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
finally:
|
| 415 |
-
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 416 |
-
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 417 |
-
# Logge die konkrete Antwort, die hinzugefügt wird
|
| 418 |
-
print(f"Task {task_id} logged. Answer added: '{submitted_answer[:100]}...'")
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
end_time = datetime.now(); duration = end_time - start_time
|
| 422 |
-
print(f"Agent processing finished in {duration}.")
|
| 423 |
-
progress(0.9, desc="Submitting answers...")
|
| 424 |
-
|
| 425 |
-
# 4. Prepare Submission (unverändert)
|
| 426 |
-
if not answers_payload:
|
| 427 |
-
print("Agent did not produce any answers to submit."); cleanup_temp_files()
|
| 428 |
-
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 429 |
-
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 430 |
-
print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
|
| 431 |
-
# Debug: Zeige die ersten paar Antworten vor dem Senden
|
| 432 |
-
print(f"Sample answers payload: {answers_payload[:2]}")
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
# 5. Submit (unverändert)
|
| 436 |
-
final_status = "Submission attempt finished."
|
| 437 |
-
results_df = pd.DataFrame(results_log)
|
| 438 |
try:
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
error_detail = f"Server responded with status {e.response.status_code}."
|
| 448 |
-
try:
|
| 449 |
-
error_json = e.response.json(); api_error = error_json.get('detail', e.response.text)
|
| 450 |
-
# Verbesserte Fehleranzeige für Validierungsfehler
|
| 451 |
-
if isinstance(api_error, list) and api_error and isinstance(api_error[0], dict):
|
| 452 |
-
error_msgs = [f"{err.get('loc', ['unknown'])[-1]}: {err.get('msg', '')}" for err in api_error]
|
| 453 |
-
error_detail += f" Details: {'; '.join(error_msgs)}"
|
| 454 |
-
elif isinstance(api_error, str):
|
| 455 |
-
error_detail += f" Detail: {api_error[:500]}" # Begrenze Länge
|
| 456 |
-
else:
|
| 457 |
-
error_detail += f" Detail: {str(api_error)[:500]}"
|
| 458 |
-
except requests.exceptions.JSONDecodeError:
|
| 459 |
-
error_detail += f" Raw Response: {e.response.text[:500]}" # Begrenze Länge
|
| 460 |
-
final_status = f"Submission Failed: {error_detail}"; print(final_status)
|
| 461 |
-
except requests.exceptions.Timeout: final_status = "Submission Failed: The request timed out after 180 seconds."; print(final_status)
|
| 462 |
-
except requests.exceptions.RequestException as e: final_status = f"Submission Failed: Network error - {e}"; print(final_status)
|
| 463 |
-
except Exception as e: final_status = f"Submission Failed: Unexpected error during submission ({type(e).__name__}): {e}"; print(final_status)
|
| 464 |
-
finally: cleanup_temp_files()
|
| 465 |
-
|
| 466 |
-
progress(1, desc="Done.")
|
| 467 |
-
return final_status, results_df
|
| 468 |
-
|
| 469 |
|
| 470 |
-
# --- Gradio Interface
|
| 471 |
with gr.Blocks() as demo:
|
| 472 |
-
gr.Markdown("# Smol CodeAgent Evaluation Runner
|
| 473 |
-
gr.Markdown(
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
2. Verify `requirements.txt` includes `smolagents[huggingface]`, etc.
|
| 477 |
-
3. Agent uses `CodeAgent` with `HfApiModel`. Target Model (via env var or default): **{HF_MODEL_ID}**.
|
| 478 |
-
4. Log in below.
|
| 479 |
-
5. Click 'Run Evaluation & Submit'. Expect a potentially long runtime.
|
| 480 |
-
---
|
| 481 |
-
**Agent Details:** Uses `smolagents.CodeAgent`. Search: {'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}.
|
| 482 |
-
""")
|
| 483 |
-
with gr.Row(): login_button = gr.LoginButton()
|
| 484 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
|
|
|
|
|
|
| 485 |
|
| 486 |
-
status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
|
| 487 |
-
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) # Ohne 'height'
|
| 488 |
-
|
| 489 |
-
# KORREKTUR: run_and_submit_all direkt aufrufen
|
| 490 |
-
# inputs=[] damit Gradio den request Parameter injiziert.
|
| 491 |
run_button.click(
|
| 492 |
fn=run_and_submit_all,
|
| 493 |
-
inputs=[],
|
| 494 |
outputs=[status_output, results_table],
|
| 495 |
api_name="run_evaluation_smol_codeagent"
|
| 496 |
)
|
| 497 |
|
| 498 |
-
# --- App Start (unverändert) ---
|
| 499 |
if __name__ == "__main__":
|
| 500 |
-
|
| 501 |
-
# (Rest des Startblocks unverändert)
|
| 502 |
-
space_host_startup = os.getenv("SPACE_HOST")
|
| 503 |
-
space_id_startup = os.getenv("SPACE_ID")
|
| 504 |
-
if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup} -> Runtime URL: https://{space_host_startup}.hf.space")
|
| 505 |
-
else: print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 506 |
-
if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup} -> Repo: https://huggingface.co/spaces/{space_id_startup}")
|
| 507 |
-
else: print("ℹ️ SPACE_ID environment variable not found (running locally?).")
|
| 508 |
-
print(f" Using Smol CodeAgent with HfApiModel.")
|
| 509 |
-
print(f" Target HF Model (via env var or default): {HF_MODEL_ID}")
|
| 510 |
-
search_tool_status = 'Disabled';
|
| 511 |
-
if USE_TAVILY: search_tool_status = 'Tavily'
|
| 512 |
-
elif USE_DUCKDUCKGO: search_tool_status = 'DuckDuckGo'
|
| 513 |
-
# Check search client status based on initialization logic
|
| 514 |
-
if search_client is None and (USE_TAVILY or USE_DUCKDUCKGO): search_tool_status += " (Initialization Pending)"
|
| 515 |
-
elif search_client is False: search_tool_status += " (Failed to Initialize / Disabled)"
|
| 516 |
-
|
| 517 |
-
print(f" Search Tool: {search_tool_status}")
|
| 518 |
-
print(f" PDF Reading: {'Enabled' if PDF_READER_AVAILABLE else 'Disabled (PyPDF2 missing)'}")
|
| 519 |
-
print("-"*(60 + len(" App Starting (Smol CodeAgent Version - Request Fix) ")) + "\n")
|
| 520 |
-
print("Launching Gradio Interface for Smol CodeAgent Evaluation...")
|
| 521 |
-
demo.queue().launch(debug=False, share=False) # queue() ist wichtig
|
|
|
|
| 7 |
import time
|
| 8 |
import tempfile
|
| 9 |
import atexit
|
| 10 |
+
import sys # Für sys.exit bei Importfehlern
|
| 11 |
|
| 12 |
# --- Smol Agents und HF Imports (angepasst an Beispiel) ---
|
| 13 |
try:
|
|
|
|
| 14 |
from smolagents import CodeAgent, tool, HfApiModel
|
| 15 |
print("Successfully imported CodeAgent, tool, HfApiModel from 'smolagents'")
|
| 16 |
except ImportError as e:
|
|
|
|
| 18 |
print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
|
| 19 |
sys.exit(f"Fatal Error: Could not import smolagents components. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
|
| 20 |
|
|
|
|
| 21 |
from huggingface_hub import HfApi
|
| 22 |
|
| 23 |
+
# --- Suchtool Imports ---
|
| 24 |
+
USE_TAVILY = False
|
| 25 |
+
USE_DUCKDUCKGO = True
|
|
|
|
| 26 |
if USE_TAVILY:
|
| 27 |
try:
|
| 28 |
from tavily import TavilyClient
|
| 29 |
except ImportError:
|
| 30 |
print("WARNUNG: TavilyClient nicht installiert.")
|
| 31 |
USE_TAVILY = False
|
| 32 |
+
USE_DUCKDUCKGO = True
|
| 33 |
if USE_DUCKDUCKGO:
|
| 34 |
try:
|
| 35 |
from duckduckgo_search import DDGS
|
| 36 |
except ImportError:
|
| 37 |
+
print("WARNUNG: duckduckgo-search nicht installiert.")
|
| 38 |
+
USE_DUCKDUCKGO = False
|
| 39 |
|
| 40 |
+
# --- PDF Reader Import ---
|
| 41 |
try:
|
| 42 |
import PyPDF2
|
| 43 |
PDF_READER_AVAILABLE = True
|
|
|
|
| 47 |
|
| 48 |
# --- Konstanten ---
|
| 49 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
|
|
|
|
|
| 50 |
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
|
| 51 |
|
| 52 |
# --- Globale Variablen ---
|
|
|
|
| 53 |
search_client = None
|
|
|
|
| 54 |
agent_instance = None
|
| 55 |
|
|
|
|
| 56 |
temp_files_to_clean = set()
|
| 57 |
+
|
| 58 |
def cleanup_temp_files():
|
|
|
|
| 59 |
print("Cleaning up temporary files...")
|
| 60 |
for file_path in list(temp_files_to_clean):
|
| 61 |
try:
|
| 62 |
if os.path.exists(file_path):
|
| 63 |
os.remove(file_path)
|
| 64 |
print(f"Removed temporary file: {file_path}")
|
| 65 |
+
temp_files_to_clean.discard(file_path)
|
| 66 |
+
except Exception as e:
|
|
|
|
| 67 |
print(f"Error removing temporary file {file_path}: {e}")
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
+
atexit.register(cleanup_temp_files)
|
|
|
|
| 70 |
|
| 71 |
+
# --- Tool Definitionen ---
|
| 72 |
@tool
|
| 73 |
def search_web(query: str, max_results: int = 3) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
print(f"Tool: search_web(query='{query}', max_results={max_results})")
|
| 75 |
if not search_client:
|
| 76 |
+
return "Search tool is not available/configured."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
try:
|
| 78 |
if USE_TAVILY and isinstance(search_client, TavilyClient):
|
| 79 |
response = search_client.search(query=query, search_depth="basic", max_results=max_results)
|
| 80 |
+
context = response.get('results', [])
|
| 81 |
+
if not context:
|
| 82 |
+
return "No search results found."
|
| 83 |
+
return "\n".join(
|
| 84 |
+
[f"URL: {c['url']}\nContent: {c['content'][:500]}..." for c in context]
|
| 85 |
+
)
|
| 86 |
elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
|
| 87 |
+
results = search_client.text(query, max_results=max_results)
|
| 88 |
+
if not results:
|
| 89 |
+
return "No search results found."
|
| 90 |
+
return "\n".join(
|
| 91 |
+
[f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results]
|
| 92 |
+
)
|
| 93 |
else:
|
| 94 |
return "No compatible search client configured or available."
|
| 95 |
except Exception as e:
|
|
|
|
| 98 |
|
| 99 |
@tool
|
| 100 |
def download_task_file(task_id: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
print(f"Tool: download_task_file(task_id='{task_id}')")
|
| 102 |
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 103 |
try:
|
|
|
|
| 109 |
elif 'png' in content_type: suffix = ".png"
|
| 110 |
elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
|
| 111 |
elif 'csv' in content_type: suffix = ".csv"
|
|
|
|
| 112 |
temp_dir = tempfile.gettempdir()
|
| 113 |
+
safe_id = re.sub(r'[^\w\-]+', '_', task_id)
|
| 114 |
timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
|
| 115 |
+
path = os.path.join(temp_dir, f"gaia_task_{safe_id}_{timestamp}{suffix}")
|
| 116 |
+
with open(path, 'wb') as f:
|
| 117 |
+
for chunk in response.iter_content(8192):
|
| 118 |
f.write(chunk)
|
| 119 |
+
temp_files_to_clean.add(path)
|
| 120 |
+
return path
|
|
|
|
| 121 |
except requests.exceptions.HTTPError as e:
|
| 122 |
if e.response.status_code == 404:
|
|
|
|
| 123 |
return "Error: No file found for this task ID."
|
| 124 |
+
return f"Error: Failed to download file (HTTP {e.response.status_code})."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
except Exception as e:
|
|
|
|
| 126 |
return f"Error: Unexpected error during file download: {e}"
|
| 127 |
|
| 128 |
@tool
|
| 129 |
def read_file_content(file_path: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
print(f"Tool: read_file_content(file_path='{file_path}')")
|
| 131 |
+
if not os.path.isabs(file_path) or not file_path.startswith(tempfile.gettempdir()):
|
| 132 |
+
return "Error: Invalid file path provided."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
if not os.path.exists(file_path):
|
| 134 |
+
return f"Error: File not found '{file_path}'."
|
|
|
|
| 135 |
try:
|
| 136 |
+
if file_path.lower().endswith('.pdf'):
|
| 137 |
+
if not PDF_READER_AVAILABLE:
|
| 138 |
+
return "Error: PyPDF2 not installed."
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
text = ""
|
| 140 |
with open(file_path, 'rb') as f:
|
| 141 |
reader = PyPDF2.PdfReader(f)
|
| 142 |
+
for p in reader.pages:
|
| 143 |
+
text += p.extract_text() or ''
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
if len(text) > 7000:
|
| 145 |
+
text = text[:7000] + "\n... (truncated)"
|
|
|
|
| 146 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
return f"Content of '{os.path.basename(file_path)}':\n{text}"
|
|
|
|
|
|
|
|
|
|
| 148 |
else:
|
| 149 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 150 |
+
content = f.read(7000)
|
| 151 |
+
return f"Content of '{os.path.basename(file_path)}':\n{content}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
except Exception as e:
|
| 153 |
+
return f"Error: Failed to read file: {e}"
|
|
|
|
| 154 |
|
| 155 |
+
# --- Agent Initialisierung ---
|
|
|
|
| 156 |
def initialize_agent():
|
|
|
|
| 157 |
global search_client, agent_instance
|
| 158 |
+
if search_client is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
if USE_TAVILY:
|
| 160 |
+
key = os.getenv("TAVILY_API_KEY")
|
| 161 |
+
if key:
|
| 162 |
+
try: search_client = TavilyClient(api_key=key)
|
| 163 |
+
except: search_client = False
|
| 164 |
else:
|
| 165 |
+
search_client = False
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
elif USE_DUCKDUCKGO:
|
| 167 |
+
try: search_client = DDGS()
|
| 168 |
+
except: search_client = False
|
| 169 |
else:
|
| 170 |
+
search_client = False
|
| 171 |
+
token = os.getenv("HUGGINGFACE_TOKEN")
|
| 172 |
+
if not token:
|
| 173 |
+
raise ValueError("HUGGINGFACE_TOKEN Secret nicht gefunden!")
|
| 174 |
+
hf_model = HfApiModel()
|
| 175 |
+
tools = [search_web, download_task_file, read_file_content]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
if search_client is False:
|
| 177 |
+
tools = [t for t in tools if t != search_web]
|
| 178 |
+
agent_instance = CodeAgent(tools=tools, model=hf_model)
|
| 179 |
|
| 180 |
+
# --- Hauptfunktion run_and_submit_all ---
|
| 181 |
+
def run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
if not profile:
|
| 183 |
+
return "Bitte zuerst mit Hugging Face einloggen.", None
|
| 184 |
+
username = profile.username if hasattr(profile, 'username') else profile['username']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
api_url = DEFAULT_API_URL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
try:
|
| 187 |
initialize_agent()
|
| 188 |
+
except Exception as e:
|
| 189 |
+
return f"Fehler bei der Agent-Initialisierung: {e}", None
|
| 190 |
+
questions = requests.get(f"{api_url}/questions", timeout=30).json()
|
| 191 |
+
answers_log = []
|
| 192 |
+
payload = []
|
| 193 |
+
for item in progress.tqdm(questions, desc="Bearbeite Fragen"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
task_id = item.get("task_id")
|
| 195 |
+
question = item.get("question")
|
| 196 |
+
if not task_id or question is None:
|
|
|
|
| 197 |
continue
|
| 198 |
+
prompt = f"... Task {task_id}: {question}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
try:
|
| 200 |
+
resp = agent_instance.run(prompt=prompt)
|
| 201 |
+
ans = re.sub(r"^(Answer:|Final Answer:)", "", resp or "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
except Exception as e:
|
| 203 |
+
ans = f"ERROR: {e}"
|
| 204 |
+
answers_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": ans})
|
| 205 |
+
payload.append({"task_id": task_id, "submitted_answer": ans})
|
| 206 |
+
df = pd.DataFrame(answers_log)
|
| 207 |
+
submission = {"username": username, "agent_code": "...", "answers": payload}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
try:
|
| 209 |
+
r = requests.post(f"{api_url}/submit", json=submission, timeout=180)
|
| 210 |
+
r.raise_for_status()
|
| 211 |
+
res = r.json()
|
| 212 |
+
status = f"Erfolg! Score: {res.get('score', 0):.2f}%"
|
| 213 |
+
except Exception as e:
|
| 214 |
+
status = f"Fehler bei der Submission: {e}"
|
| 215 |
+
cleanup_temp_files()
|
| 216 |
+
return status, df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
+
# --- Gradio Interface ---
|
| 219 |
with gr.Blocks() as demo:
|
| 220 |
+
gr.Markdown("# Smol CodeAgent Evaluation Runner")
|
| 221 |
+
gr.Markdown("Bitte einloggen und dann auf Ausführen klicken.")
|
| 222 |
+
with gr.Row():
|
| 223 |
+
login_button = gr.LoginButton()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 225 |
+
status_output = gr.Textbox(label="Status", lines=5)
|
| 226 |
+
results_table = gr.DataFrame(label="Ergebnisse")
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
run_button.click(
|
| 229 |
fn=run_and_submit_all,
|
| 230 |
+
inputs=[login_button],
|
| 231 |
outputs=[status_output, results_table],
|
| 232 |
api_name="run_evaluation_smol_codeagent"
|
| 233 |
)
|
| 234 |
|
|
|
|
| 235 |
if __name__ == "__main__":
|
| 236 |
+
demo.queue().launch(debug=False, share=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|