Update app.py
Browse files
app.py
CHANGED
|
@@ -8,10 +8,19 @@ import time
|
|
| 8 |
import tempfile # Für temporäre Dateien
|
| 9 |
import atexit # Zum Aufräumen beim Beenden
|
| 10 |
|
| 11 |
-
# --- Smol
|
| 12 |
-
|
| 13 |
-
from
|
| 14 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
from huggingface_hub import HfApi, InferenceClient
|
| 16 |
|
| 17 |
# --- Suchtool Imports (wähle eins) ---
|
|
@@ -43,9 +52,7 @@ except ImportError:
|
|
| 43 |
# --- Konstanten ---
|
| 44 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 45 |
# Wähle ein Instruction-Following Modell von Hugging Face Hub
|
| 46 |
-
|
| 47 |
-
# Stelle sicher, dass das Modell über die kostenlose Inference API verfügbar ist oder du Inference Endpoints verwendest.
|
| 48 |
-
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") # Standardmodell, kann über Env Var überschrieben werden
|
| 49 |
|
| 50 |
# --- Globale Variablen für Clients (werden in initialize_agent gesetzt) ---
|
| 51 |
hf_token = None
|
|
@@ -57,20 +64,23 @@ temp_files_to_clean = set()
|
|
| 57 |
|
| 58 |
def cleanup_temp_files():
|
| 59 |
print("Cleaning up temporary files...")
|
| 60 |
-
for file_path in list(temp_files_to_clean):
|
| 61 |
try:
|
| 62 |
if os.path.exists(file_path):
|
| 63 |
os.remove(file_path)
|
| 64 |
print(f"Removed temporary file: {file_path}")
|
| 65 |
-
|
|
|
|
| 66 |
except OSError as e:
|
| 67 |
print(f"Error removing temporary file {file_path}: {e}")
|
|
|
|
|
|
|
| 68 |
|
| 69 |
# Registriere die Cleanup-Funktion für das Beenden des Skripts
|
| 70 |
atexit.register(cleanup_temp_files)
|
| 71 |
|
| 72 |
|
| 73 |
-
# --- Tool Definitionen für
|
| 74 |
|
| 75 |
@tool
|
| 76 |
def search_web(query: str, max_results: int = 3) -> str:
|
|
@@ -97,9 +107,10 @@ def search_web(query: str, max_results: int = 3) -> str:
|
|
| 97 |
if not results: return "No search results found."
|
| 98 |
return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
|
| 99 |
else:
|
| 100 |
-
|
|
|
|
| 101 |
except Exception as e:
|
| 102 |
-
print(f"Search API Error: {e}")
|
| 103 |
return f"Error during search: {e}"
|
| 104 |
|
| 105 |
@tool
|
|
@@ -110,179 +121,231 @@ def download_task_file(task_id: str) -> str:
|
|
| 110 |
Args:
|
| 111 |
task_id (str): The unique identifier for the task whose file needs to be downloaded.
|
| 112 |
Returns:
|
| 113 |
-
str: The local path to the downloaded file (e.g., '/tmp/
|
| 114 |
-
otherwise an error message
|
| 115 |
"""
|
| 116 |
print(f"Tool: download_task_file(task_id='{task_id}')")
|
| 117 |
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 118 |
|
| 119 |
try:
|
| 120 |
-
response = requests.get(file_url, stream=True, timeout=30)
|
| 121 |
-
response.raise_for_status()
|
| 122 |
|
| 123 |
-
# Bestimme Dateiendung aus Content-Type
|
| 124 |
content_type = response.headers.get('content-type', '').lower()
|
| 125 |
-
suffix = ".tmp"
|
| 126 |
-
if 'pdf' in content_type:
|
| 127 |
-
|
| 128 |
-
elif '
|
| 129 |
-
|
| 130 |
-
elif '
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
suffix = ".csv"
|
| 134 |
-
elif 'plain' in content_type or 'text' in content_type:
|
| 135 |
-
suffix = ".txt"
|
| 136 |
-
|
| 137 |
-
# Erstelle eine sichere temporäre Datei
|
| 138 |
temp_dir = tempfile.gettempdir()
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
| 142 |
|
| 143 |
with open(temp_file_path, 'wb') as f:
|
| 144 |
for chunk in response.iter_content(chunk_size=8192):
|
| 145 |
f.write(chunk)
|
| 146 |
|
| 147 |
print(f"File downloaded successfully to {temp_file_path}")
|
| 148 |
-
temp_files_to_clean.add(temp_file_path)
|
| 149 |
-
return temp_file_path # Gib
|
| 150 |
|
| 151 |
except requests.exceptions.HTTPError as e:
|
| 152 |
if e.response.status_code == 404:
|
| 153 |
print(f"No file found on server for task_id {task_id}.")
|
| 154 |
-
return "Error: No file found for this task ID."
|
| 155 |
else:
|
| 156 |
print(f"HTTP Error downloading file for task {task_id}: {e}")
|
| 157 |
-
return f"Error: Failed to download file (HTTP {e.response.status_code})."
|
| 158 |
except requests.exceptions.RequestException as e:
|
| 159 |
print(f"Network Error downloading file for task {task_id}: {e}")
|
| 160 |
-
return f"Error: Failed to download file due to network issue: {e}"
|
| 161 |
except Exception as e:
|
| 162 |
-
print(f"Unexpected error downloading file for task {task_id}: {e}")
|
| 163 |
-
return f"Error: Unexpected error during file download: {e}"
|
| 164 |
|
| 165 |
@tool
|
| 166 |
def read_file_content(file_path: str) -> str:
|
| 167 |
"""
|
| 168 |
Reads the text content of a previously downloaded file (PDF or plain text).
|
| 169 |
-
Use this tool AFTER 'download_task_file' has successfully returned a file path.
|
| 170 |
Args:
|
| 171 |
file_path (str): The local path to the file (must be a path returned by 'download_task_file').
|
| 172 |
Returns:
|
| 173 |
-
str: The extracted text content (truncated if very long), or an error message.
|
| 174 |
"""
|
| 175 |
print(f"Tool: read_file_content(file_path='{file_path}')")
|
| 176 |
|
| 177 |
-
#
|
| 178 |
-
if not file_path or not
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
|
| 180 |
return "Error: Invalid file path provided. Only downloaded files can be read."
|
| 181 |
|
| 182 |
if not os.path.exists(file_path):
|
| 183 |
print(f"Error: File not found at path: {file_path}")
|
| 184 |
-
return "Error: File not found at the specified path."
|
| 185 |
|
| 186 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
if file_path.lower().endswith(".pdf"):
|
| 188 |
if not PDF_READER_AVAILABLE:
|
| 189 |
return "Error: Cannot read PDF file because PyPDF2 library is not installed."
|
| 190 |
text = ""
|
| 191 |
with open(file_path, 'rb') as f:
|
| 192 |
reader = PyPDF2.PdfReader(f)
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
text = text[:7000] + "\n... (content truncated)"
|
|
|
|
| 198 |
break
|
|
|
|
|
|
|
|
|
|
| 199 |
print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
|
| 200 |
return f"Content of '{os.path.basename(file_path)}':\n{text}"
|
| 201 |
|
| 202 |
-
elif file_path.lower().endswith((".png", ".jpg", ".jpeg")):
|
| 203 |
print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
-
else: # Versuche als Text zu lesen
|
| 208 |
-
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 209 |
-
content = f.read(7000) # Begrenze auf 7000 Zeichen
|
| 210 |
-
if len(content) == 7000:
|
| 211 |
-
content += "\n... (content truncated)"
|
| 212 |
-
print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
|
| 213 |
-
return f"Content of '{os.path.basename(file_path)}':\n{content}"
|
| 214 |
|
| 215 |
except Exception as e:
|
| 216 |
-
print(f"Error reading file {file_path}: {e}")
|
| 217 |
return f"Error: Failed to read file content: {e}"
|
| 218 |
|
| 219 |
|
| 220 |
# --- Agent Initialisierung ---
|
| 221 |
def initialize_agent():
|
| 222 |
-
"""Initialisiert den
|
| 223 |
global hf_token, search_client, agent_instance
|
| 224 |
print("Initializing agent and clients...")
|
| 225 |
|
| 226 |
-
|
| 227 |
if not hf_token:
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
if
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
else:
|
| 237 |
-
print("WARNUNG: TAVILY_API_KEY nicht gefunden, obwohl USE_TAVILY=True.")
|
| 238 |
-
# Fallback auf DuckDuckGo wenn möglich
|
| 239 |
-
if USE_DUCKDUCKGO:
|
| 240 |
try:
|
| 241 |
-
search_client =
|
| 242 |
-
print("
|
| 243 |
except NameError:
|
| 244 |
-
|
| 245 |
-
|
| 246 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
search_client = None
|
| 248 |
-
print("WARNUNG: Suche deaktiviert.")
|
| 249 |
-
|
| 250 |
-
try:
|
| 251 |
-
search_client = DDGS()
|
| 252 |
-
print("Using DuckDuckGo for search.")
|
| 253 |
-
except NameError:
|
| 254 |
search_client = None
|
| 255 |
-
print("
|
| 256 |
-
else:
|
| 257 |
-
search_client = None
|
| 258 |
-
print("Web search is disabled.")
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
# --- LLM Client (Hugging Face Inference API) ---
|
| 262 |
llm = InferenceAPI(
|
| 263 |
model_id=HF_MODEL_ID,
|
| 264 |
token=hf_token,
|
| 265 |
-
max_new_tokens=1500, #
|
| 266 |
-
temperature=0.1,
|
| 267 |
-
#
|
|
|
|
| 268 |
)
|
| 269 |
print(f"LLM configured with model: {HF_MODEL_ID}")
|
| 270 |
|
| 271 |
# --- Agent Instanz ---
|
| 272 |
available_tools = [search_web, download_task_file, read_file_content]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
agent_instance = Agent(
|
| 274 |
llm=llm,
|
| 275 |
-
|
| 276 |
-
# system_prompt=...
|
| 277 |
)
|
| 278 |
-
print(f"Smol Agent initialized with {len(
|
|
|
|
|
|
|
| 279 |
|
| 280 |
|
| 281 |
# --- Angepasste Hauptfunktion ---
|
| 282 |
-
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 283 |
"""
|
| 284 |
-
Fetches all questions, runs the
|
| 285 |
-
and displays the results.
|
| 286 |
"""
|
| 287 |
space_id = os.getenv("SPACE_ID")
|
| 288 |
|
|
@@ -291,28 +354,31 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 291 |
print(f"User logged in: {username}")
|
| 292 |
else:
|
| 293 |
print("User not logged in.")
|
|
|
|
| 294 |
return "Please Login to Hugging Face with the button.", None
|
| 295 |
|
| 296 |
api_url = DEFAULT_API_URL
|
| 297 |
questions_url = f"{api_url}/questions"
|
| 298 |
submit_url = f"{api_url}/submit"
|
| 299 |
|
| 300 |
-
# 1. Initialisiere Agent und Clients
|
|
|
|
| 301 |
try:
|
| 302 |
initialize_agent()
|
| 303 |
-
if not agent_instance:
|
| 304 |
raise RuntimeError("Agent instance could not be initialized.")
|
| 305 |
except ValueError as e:
|
| 306 |
print(f"Error during initialization: {e}")
|
| 307 |
return f"Configuration Error: {e}", None
|
| 308 |
except Exception as e:
|
| 309 |
-
print(f"Error initializing agent/clients: {e}")
|
| 310 |
return f"Error initializing agent: {e}", None
|
| 311 |
|
| 312 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
|
| 313 |
print(f"Agent Code Link: {agent_code}")
|
| 314 |
|
| 315 |
-
# 2. Fetch Questions
|
|
|
|
| 316 |
print(f"Fetching questions from: {questions_url}")
|
| 317 |
try:
|
| 318 |
response = requests.get(questions_url, timeout=30)
|
|
@@ -321,109 +387,106 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 321 |
if not questions_data or not isinstance(questions_data, list):
|
| 322 |
print(f"Fetched questions list is empty or invalid format: {questions_data}")
|
| 323 |
return "Fetched questions list is empty or invalid format.", None
|
| 324 |
-
|
|
|
|
| 325 |
except Exception as e:
|
| 326 |
-
# Detailiertere Fehlermeldung
|
| 327 |
print(f"Error fetching questions ({type(e).__name__}): {e}")
|
| 328 |
return f"Error fetching questions: {e}", None
|
| 329 |
|
| 330 |
|
| 331 |
-
# 3. Run your Smol Agent
|
| 332 |
start_time = datetime.now()
|
| 333 |
results_log = []
|
| 334 |
answers_payload = []
|
| 335 |
-
print(f"Running
|
| 336 |
-
status_updates = []
|
| 337 |
|
| 338 |
-
|
|
|
|
|
|
|
| 339 |
task_id = item.get("task_id")
|
| 340 |
question_text = item.get("question")
|
| 341 |
|
| 342 |
if not task_id or question_text is None:
|
| 343 |
-
print(f"Skipping item with missing task_id or question: {item}")
|
| 344 |
continue
|
| 345 |
|
| 346 |
-
|
| 347 |
-
print(current_status)
|
| 348 |
-
status_updates.append(current_status)
|
| 349 |
-
|
| 350 |
-
# --- Prompt für smol-agent ---
|
| 351 |
-
# Wichtig: Klare Anweisung für das Endformat geben!
|
| 352 |
-
# Gib dem Agenten den Task-ID Kontext mit!
|
| 353 |
agent_prompt = f"""
|
| 354 |
-
You are an expert AI assistant solving a challenge question.
|
| 355 |
Your task is to answer the following question accurately and concisely.
|
| 356 |
Use the available tools ONLY when necessary to find information or access required files.
|
|
|
|
| 357 |
|
| 358 |
-
**Available Tools:**
|
| 359 |
-
* `search_web(query: str, max_results: int = 3)`: Searches the web.
|
| 360 |
-
* `download_task_file(task_id: str)`: Downloads the specific file
|
| 361 |
-
* `read_file_content(file_path: str)`: Reads text from a downloaded
|
| 362 |
|
| 363 |
**Current Task:**
|
| 364 |
* Task ID: {task_id}
|
| 365 |
* Question: {question_text}
|
| 366 |
|
| 367 |
-
**Instructions:**
|
| 368 |
-
1.
|
| 369 |
-
2.
|
| 370 |
-
3.
|
| 371 |
-
4.
|
| 372 |
-
5.
|
| 373 |
|
| 374 |
-
|
| 375 |
"""
|
| 376 |
|
| 377 |
-
submitted_answer = f"Error: Agent failed
|
| 378 |
try:
|
| 379 |
-
# Führe den Agenten aus
|
| 380 |
-
agent_response = agent_instance.run(
|
| 381 |
-
prompt=agent_prompt,
|
| 382 |
-
tools=[search_web, download_task_file, read_file_content] # Übergebe Tools hier
|
| 383 |
-
)
|
| 384 |
|
| 385 |
if agent_response:
|
| 386 |
-
#
|
| 387 |
-
#
|
| 388 |
-
cleaned_response = re.sub(r"^(Final Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
|
| 389 |
-
|
|
|
|
|
|
|
| 390 |
else:
|
| 391 |
-
submitted_answer = "Error: Agent returned an empty response."
|
| 392 |
|
| 393 |
|
| 394 |
-
print(f"Task {task_id} completed
|
| 395 |
|
|
|
|
| 396 |
except Exception as e:
|
| 397 |
-
error_msg = f"
|
| 398 |
print(error_msg)
|
| 399 |
-
|
| 400 |
-
submitted_answer = f"ERROR: {type(e).__name__}" # Kürzere Fehlermeldung für die Payload
|
| 401 |
|
| 402 |
finally:
|
| 403 |
# Füge das Ergebnis (oder den Fehler) hinzu
|
| 404 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 405 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 406 |
-
#
|
| 407 |
-
# (
|
| 408 |
-
# cleanup_temp_files() # Kann hier aufgerufen werden, wenn Ressourcen knapp sind
|
| 409 |
|
| 410 |
end_time = datetime.now()
|
| 411 |
duration = end_time - start_time
|
| 412 |
print(f"Agent processing finished in {duration}.")
|
|
|
|
| 413 |
|
| 414 |
-
# 4. Prepare Submission
|
| 415 |
if not answers_payload:
|
| 416 |
print("Agent did not produce any answers to submit.")
|
| 417 |
-
#
|
| 418 |
-
cleanup_temp_files()
|
| 419 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 420 |
|
| 421 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 422 |
status_update = f"Agent finished in {duration}. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 423 |
print(status_update)
|
| 424 |
|
| 425 |
-
# 5. Submit
|
| 426 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
|
|
|
|
|
|
|
|
|
| 427 |
try:
|
| 428 |
response = requests.post(submit_url, json=submission_data, timeout=120)
|
| 429 |
response.raise_for_status()
|
|
@@ -436,10 +499,7 @@ Begin!
|
|
| 436 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 437 |
)
|
| 438 |
print("Submission successful.")
|
| 439 |
-
results_df = pd.DataFrame(results_log)
|
| 440 |
-
return final_status + f"\n\nAgent ({HF_MODEL_ID}) Processing Log:\n" + "\n".join(status_updates[-5:]), results_df
|
| 441 |
except requests.exceptions.HTTPError as e:
|
| 442 |
-
# (Fehlerbehandlung wie zuvor)
|
| 443 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 444 |
try:
|
| 445 |
error_json = e.response.json()
|
|
@@ -450,88 +510,81 @@ Begin!
|
|
| 450 |
error_detail += f" Detail: {str(api_error)}"
|
| 451 |
except requests.exceptions.JSONDecodeError:
|
| 452 |
error_detail += f" Response: {e.response.text[:500]}"
|
| 453 |
-
|
| 454 |
-
print(
|
| 455 |
-
results_df = pd.DataFrame(results_log)
|
| 456 |
-
return status_message, results_df
|
| 457 |
except requests.exceptions.Timeout:
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
print(status_message)
|
| 461 |
-
results_df = pd.DataFrame(results_log)
|
| 462 |
-
return status_message, results_df
|
| 463 |
except requests.exceptions.RequestException as e:
|
| 464 |
-
|
| 465 |
-
print(
|
| 466 |
-
results_df = pd.DataFrame(results_log)
|
| 467 |
-
return status_message, results_df
|
| 468 |
except Exception as e:
|
| 469 |
-
|
| 470 |
-
print(
|
| 471 |
-
results_df = pd.DataFrame(results_log)
|
| 472 |
-
return status_message, results_df
|
| 473 |
finally:
|
| 474 |
-
# Stelle sicher, dass
|
| 475 |
-
|
|
|
|
|
|
|
| 476 |
|
| 477 |
|
| 478 |
-
# --- Gradio Interface (
|
| 479 |
with gr.Blocks() as demo:
|
| 480 |
-
gr.Markdown("# Smol
|
| 481 |
gr.Markdown(
|
| 482 |
f"""
|
| 483 |
**Instructions:**
|
| 484 |
|
| 485 |
-
1. Ensure
|
| 486 |
-
2.
|
| 487 |
-
3.
|
| 488 |
-
4. Log in
|
| 489 |
-
5. Click 'Run Evaluation & Submit
|
| 490 |
|
| 491 |
---
|
| 492 |
**Agent Details:**
|
| 493 |
-
* Uses
|
| 494 |
-
* Leverages Hugging Face Inference API for LLM calls.
|
| 495 |
* Tools: Web Search ({'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}), File Download, File Reading (PDF/Text).
|
| 496 |
-
* Check the Space console logs for detailed agent behavior.
|
| 497 |
"""
|
| 498 |
)
|
| 499 |
|
| 500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
|
| 502 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 503 |
|
| 504 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
|
| 505 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
|
| 506 |
|
| 507 |
-
#
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
# we might need a different setup using gr.State or gr.Variable.
|
| 517 |
-
# Let's assume Gradio handles passing the profile for now.
|
| 518 |
-
pass # Placeholder
|
| 519 |
|
| 520 |
run_button.click(
|
| 521 |
-
fn=
|
| 522 |
-
inputs=[],
|
| 523 |
-
# If this fails, might need inputs=gr.State(profile_info) setup
|
| 524 |
outputs=[status_output, results_table],
|
| 525 |
api_name="run_evaluation_smol"
|
| 526 |
)
|
| 527 |
|
|
|
|
| 528 |
# --- App Start (unverändert) ---
|
| 529 |
if __name__ == "__main__":
|
| 530 |
-
print("\n" + "-"*30 + " App Starting (Smol
|
| 531 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 532 |
space_id_startup = os.getenv("SPACE_ID")
|
| 533 |
|
| 534 |
-
# (Rest des Startblocks unverändert)
|
| 535 |
if space_host_startup:
|
| 536 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
| 537 |
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
|
@@ -546,8 +599,13 @@ if __name__ == "__main__":
|
|
| 546 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 547 |
|
| 548 |
print(f" Using HF Model via Inference API: {HF_MODEL_ID}")
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
print("
|
| 553 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import tempfile # Für temporäre Dateien
|
| 9 |
import atexit # Zum Aufräumen beim Beenden
|
| 10 |
|
| 11 |
+
# --- Smol Agents und HF Imports (KORRIGIERT) ---
|
| 12 |
+
try:
|
| 13 |
+
from smolagents import Agent
|
| 14 |
+
from smolagents.llm.huggingface import InferenceAPI
|
| 15 |
+
from smolagents.tools import tool
|
| 16 |
+
print("Successfully imported from 'smolagents'")
|
| 17 |
+
except ImportError as e:
|
| 18 |
+
print(f"Error importing from smolagents: {e}")
|
| 19 |
+
print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
|
| 20 |
+
# Exit if core library is missing
|
| 21 |
+
import sys
|
| 22 |
+
sys.exit(f"Fatal Error: Could not import smolagents. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
|
| 23 |
+
|
| 24 |
from huggingface_hub import HfApi, InferenceClient
|
| 25 |
|
| 26 |
# --- Suchtool Imports (wähle eins) ---
|
|
|
|
| 52 |
# --- Konstanten ---
|
| 53 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 54 |
# Wähle ein Instruction-Following Modell von Hugging Face Hub
|
| 55 |
+
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") # Standardmodell
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# --- Globale Variablen für Clients (werden in initialize_agent gesetzt) ---
|
| 58 |
hf_token = None
|
|
|
|
| 64 |
|
| 65 |
def cleanup_temp_files():
|
| 66 |
print("Cleaning up temporary files...")
|
| 67 |
+
for file_path in list(temp_files_to_clean): # Iteriere über Kopie, da Set verändert wird
|
| 68 |
try:
|
| 69 |
if os.path.exists(file_path):
|
| 70 |
os.remove(file_path)
|
| 71 |
print(f"Removed temporary file: {file_path}")
|
| 72 |
+
if file_path in temp_files_to_clean: # Prüfe erneut, falls Fehler auftrat
|
| 73 |
+
temp_files_to_clean.remove(file_path)
|
| 74 |
except OSError as e:
|
| 75 |
print(f"Error removing temporary file {file_path}: {e}")
|
| 76 |
+
except KeyError:
|
| 77 |
+
print(f"Warning: File path {file_path} already removed from cleanup set.")
|
| 78 |
|
| 79 |
# Registriere die Cleanup-Funktion für das Beenden des Skripts
|
| 80 |
atexit.register(cleanup_temp_files)
|
| 81 |
|
| 82 |
|
| 83 |
+
# --- Tool Definitionen für smolagents ---
|
| 84 |
|
| 85 |
@tool
|
| 86 |
def search_web(query: str, max_results: int = 3) -> str:
|
|
|
|
| 107 |
if not results: return "No search results found."
|
| 108 |
return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
|
| 109 |
else:
|
| 110 |
+
# Dies sollte nicht passieren, wenn search_client gesetzt ist, aber als Absicherung
|
| 111 |
+
return "No compatible search client configured or available."
|
| 112 |
except Exception as e:
|
| 113 |
+
print(f"Search API Error ({type(e).__name__}): {e}")
|
| 114 |
return f"Error during search: {e}"
|
| 115 |
|
| 116 |
@tool
|
|
|
|
| 121 |
Args:
|
| 122 |
task_id (str): The unique identifier for the task whose file needs to be downloaded.
|
| 123 |
Returns:
|
| 124 |
+
str: The local path to the downloaded file (e.g., '/tmp/gaia_task_abc-123.pdf') if successful,
|
| 125 |
+
otherwise an error message starting with 'Error:'.
|
| 126 |
"""
|
| 127 |
print(f"Tool: download_task_file(task_id='{task_id}')")
|
| 128 |
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 129 |
|
| 130 |
try:
|
| 131 |
+
response = requests.get(file_url, stream=True, timeout=30)
|
| 132 |
+
response.raise_for_status()
|
| 133 |
|
|
|
|
| 134 |
content_type = response.headers.get('content-type', '').lower()
|
| 135 |
+
suffix = ".tmp"
|
| 136 |
+
if 'pdf' in content_type: suffix = ".pdf"
|
| 137 |
+
elif 'png' in content_type: suffix = ".png"
|
| 138 |
+
elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
|
| 139 |
+
elif 'csv' in content_type: suffix = ".csv"
|
| 140 |
+
elif 'plain' in content_type or 'text' in content_type: suffix = ".txt"
|
| 141 |
+
# Füge ggf. weitere Mappings hinzu
|
| 142 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
temp_dir = tempfile.gettempdir()
|
| 144 |
+
safe_task_id = re.sub(r'[^\w\-]+', '_', task_id)
|
| 145 |
+
# Erzeuge eindeutigeren Dateinamen, um Konflikte bei schnellen Läufen zu minimieren
|
| 146 |
+
timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
|
| 147 |
+
temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}_{timestamp}{suffix}")
|
| 148 |
+
|
| 149 |
|
| 150 |
with open(temp_file_path, 'wb') as f:
|
| 151 |
for chunk in response.iter_content(chunk_size=8192):
|
| 152 |
f.write(chunk)
|
| 153 |
|
| 154 |
print(f"File downloaded successfully to {temp_file_path}")
|
| 155 |
+
temp_files_to_clean.add(temp_file_path)
|
| 156 |
+
return temp_file_path # Erfolg: Gib Pfad zurück
|
| 157 |
|
| 158 |
except requests.exceptions.HTTPError as e:
|
| 159 |
if e.response.status_code == 404:
|
| 160 |
print(f"No file found on server for task_id {task_id}.")
|
| 161 |
+
return "Error: No file found for this task ID." # Fehler: Gib Fehlermeldung zurück
|
| 162 |
else:
|
| 163 |
print(f"HTTP Error downloading file for task {task_id}: {e}")
|
| 164 |
+
return f"Error: Failed to download file (HTTP {e.response.status_code})." # Fehler
|
| 165 |
except requests.exceptions.RequestException as e:
|
| 166 |
print(f"Network Error downloading file for task {task_id}: {e}")
|
| 167 |
+
return f"Error: Failed to download file due to network issue: {e}" # Fehler
|
| 168 |
except Exception as e:
|
| 169 |
+
print(f"Unexpected error downloading file for task {task_id} ({type(e).__name__}): {e}")
|
| 170 |
+
return f"Error: Unexpected error during file download: {e}" # Fehler
|
| 171 |
|
| 172 |
@tool
|
| 173 |
def read_file_content(file_path: str) -> str:
|
| 174 |
"""
|
| 175 |
Reads the text content of a previously downloaded file (PDF or plain text).
|
| 176 |
+
Use this tool AFTER 'download_task_file' has successfully returned a file path (not an error message).
|
| 177 |
Args:
|
| 178 |
file_path (str): The local path to the file (must be a path returned by 'download_task_file').
|
| 179 |
Returns:
|
| 180 |
+
str: The extracted text content (truncated if very long), or an error message starting with 'Error:'.
|
| 181 |
"""
|
| 182 |
print(f"Tool: read_file_content(file_path='{file_path}')")
|
| 183 |
|
| 184 |
+
# Überprüfung des Inputs: Ist es überhaupt ein Pfad?
|
| 185 |
+
if not isinstance(file_path, str) or not os.path.isabs(file_path):
|
| 186 |
+
print(f"Invalid input for read_file_content: '{file_path}'. Expected an absolute file path.")
|
| 187 |
+
return "Error: Invalid input. Provide the absolute file path returned by download_task_file."
|
| 188 |
+
|
| 189 |
+
# Sicherheitscheck: Erlaube nur Lesen aus dem Temp-Verzeichnis (bleibt wichtig)
|
| 190 |
+
if not file_path.startswith(tempfile.gettempdir()):
|
| 191 |
print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
|
| 192 |
return "Error: Invalid file path provided. Only downloaded files can be read."
|
| 193 |
|
| 194 |
if not os.path.exists(file_path):
|
| 195 |
print(f"Error: File not found at path: {file_path}")
|
| 196 |
+
return f"Error: File not found at the specified path '{os.path.basename(file_path)}'." # Gib Dateinamen im Fehler an
|
| 197 |
|
| 198 |
try:
|
| 199 |
+
file_size = os.path.getsize(file_path)
|
| 200 |
+
print(f"Reading file: {os.path.basename(file_path)}, Size: {file_size} bytes")
|
| 201 |
+
if file_size == 0:
|
| 202 |
+
print(f"Warning: File {os.path.basename(file_path)} is empty.")
|
| 203 |
+
return f"Observation: The file '{os.path.basename(file_path)}' is empty."
|
| 204 |
+
|
| 205 |
if file_path.lower().endswith(".pdf"):
|
| 206 |
if not PDF_READER_AVAILABLE:
|
| 207 |
return "Error: Cannot read PDF file because PyPDF2 library is not installed."
|
| 208 |
text = ""
|
| 209 |
with open(file_path, 'rb') as f:
|
| 210 |
reader = PyPDF2.PdfReader(f)
|
| 211 |
+
num_pages = len(reader.pages)
|
| 212 |
+
print(f"Reading {num_pages} pages from PDF...")
|
| 213 |
+
for page_num in range(num_pages):
|
| 214 |
+
# Prüfe ob Seite Text enthält bevor Extraktion versucht wird
|
| 215 |
+
if reader.pages[page_num].extract_text():
|
| 216 |
+
page_text = reader.pages[page_num].extract_text()
|
| 217 |
+
text += page_text + "\n" # Füge Zeilenumbruch zwischen Seiten hinzu
|
| 218 |
+
if len(text) > 7000: # Begrenze die Länge
|
| 219 |
text = text[:7000] + "\n... (content truncated)"
|
| 220 |
+
print(f"Text truncated at {len(text)} chars.")
|
| 221 |
break
|
| 222 |
+
if not text:
|
| 223 |
+
print(f"Warning: Could not extract text from PDF: {os.path.basename(file_path)}")
|
| 224 |
+
return f"Observation: Could not extract any text content from the PDF file '{os.path.basename(file_path)}'."
|
| 225 |
print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
|
| 226 |
return f"Content of '{os.path.basename(file_path)}':\n{text}"
|
| 227 |
|
| 228 |
+
elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
|
| 229 |
print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
|
| 230 |
+
return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content with this tool."
|
| 231 |
+
|
| 232 |
+
else: # Versuche als Text zu lesen (TXT, CSV, etc.)
|
| 233 |
+
# Lese in Chunks um Speicher zu schonen bei großen Textdateien
|
| 234 |
+
content = ""
|
| 235 |
+
chunk_size = 4096
|
| 236 |
+
max_len = 7000
|
| 237 |
+
truncated = False
|
| 238 |
+
try:
|
| 239 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 240 |
+
while len(content) < max_len:
|
| 241 |
+
chunk = f.read(chunk_size)
|
| 242 |
+
if not chunk:
|
| 243 |
+
break
|
| 244 |
+
content += chunk
|
| 245 |
+
if len(content) > max_len:
|
| 246 |
+
content = content[:max_len]
|
| 247 |
+
truncated = True
|
| 248 |
+
print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
|
| 249 |
+
result = f"Content of '{os.path.basename(file_path)}':\n{content}"
|
| 250 |
+
if truncated:
|
| 251 |
+
result += "\n... (content truncated)"
|
| 252 |
+
return result
|
| 253 |
+
except Exception as read_err: # Fange Lesefehler ab
|
| 254 |
+
print(f"Error reading file {file_path} as text: {read_err}")
|
| 255 |
+
return f"Error: Failed to read file '{os.path.basename(file_path)}' as text: {read_err}"
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
except Exception as e:
|
| 259 |
+
print(f"Error reading file {file_path} ({type(e).__name__}): {e}")
|
| 260 |
return f"Error: Failed to read file content: {e}"
|
| 261 |
|
| 262 |
|
| 263 |
# --- Agent Initialisierung ---
|
| 264 |
def initialize_agent():
|
| 265 |
+
"""Initialisiert den smolagents Agent und die benötigten Clients."""
|
| 266 |
global hf_token, search_client, agent_instance
|
| 267 |
print("Initializing agent and clients...")
|
| 268 |
|
| 269 |
+
# Token und Clients nur einmal initialisieren, wenn nicht vorhanden
|
| 270 |
if not hf_token:
|
| 271 |
+
hf_token = os.getenv("HUGGINGFACE_TOKEN")
|
| 272 |
+
if not hf_token:
|
| 273 |
+
raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden!")
|
| 274 |
+
|
| 275 |
+
if not search_client:
|
| 276 |
+
if USE_TAVILY:
|
| 277 |
+
tavily_key = os.getenv("TAVILY_API_KEY")
|
| 278 |
+
if tavily_key:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
try:
|
| 280 |
+
search_client = TavilyClient(api_key=tavily_key)
|
| 281 |
+
print("Using Tavily for search.")
|
| 282 |
except NameError:
|
| 283 |
+
print("WARNUNG: TavilyClient Klasse nicht gefunden, obwohl USE_TAVILY=True.")
|
| 284 |
+
search_client = None # Verhindere Nutzung
|
| 285 |
else:
|
| 286 |
+
print("WARNUNG: TAVILY_API_KEY nicht gefunden, obwohl USE_TAVILY=True.")
|
| 287 |
+
# Fallback nur wenn Tavily nicht genutzt werden konnte
|
| 288 |
+
if USE_DUCKDUCKGO:
|
| 289 |
+
try:
|
| 290 |
+
search_client = DDGS()
|
| 291 |
+
print("Falling back to DuckDuckGo for search.")
|
| 292 |
+
except NameError:
|
| 293 |
+
search_client = None
|
| 294 |
+
print("WARNUNG: DuckDuckGo auch nicht verfügbar. Suche deaktiviert.")
|
| 295 |
+
else:
|
| 296 |
+
search_client = None
|
| 297 |
+
print("WARNUNG: Suche deaktiviert (Tavily Key fehlt).")
|
| 298 |
+
elif USE_DUCKDUCKGO:
|
| 299 |
+
try:
|
| 300 |
+
search_client = DDGS()
|
| 301 |
+
print("Using DuckDuckGo for search.")
|
| 302 |
+
except NameError:
|
| 303 |
search_client = None
|
| 304 |
+
print("WARNUNG: duckduckgo-search nicht installiert/verfügbar. Suche deaktiviert.")
|
| 305 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
search_client = None
|
| 307 |
+
print("Web search is disabled by configuration.")
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
+
# Agent Instanz immer neu erstellen oder nur wenn nicht vorhanden?
|
| 310 |
+
# Für diesen Use Case: Erstelle sie immer neu, um sicherzustellen,
|
| 311 |
+
# dass sie den neuesten Stand der Tools hat (obwohl sie hier global sind).
|
| 312 |
+
# Besser wäre es, die tools direkt in der run-Methode zu übergeben.
|
| 313 |
|
| 314 |
# --- LLM Client (Hugging Face Inference API) ---
|
| 315 |
llm = InferenceAPI(
|
| 316 |
model_id=HF_MODEL_ID,
|
| 317 |
token=hf_token,
|
| 318 |
+
max_new_tokens=1500, # Max Tokens, die das Modell generieren darf
|
| 319 |
+
temperature=0.1,
|
| 320 |
+
# stop_sequences=["Observation:", "\nObservation:", "\nTool:", "\nThought:"], # Optional: Hilft manchmal, das Abschneiden zu verbessern
|
| 321 |
+
# top_p=0.9, # Optional
|
| 322 |
)
|
| 323 |
print(f"LLM configured with model: {HF_MODEL_ID}")
|
| 324 |
|
| 325 |
# --- Agent Instanz ---
|
| 326 |
available_tools = [search_web, download_task_file, read_file_content]
|
| 327 |
+
# Filter out None tools if search failed to initialize
|
| 328 |
+
active_tools = [t for t in available_tools if t is not None]
|
| 329 |
+
|
| 330 |
+
# Stelle sicher, dass 'tool' importiert wurde
|
| 331 |
+
if 'tool' not in globals():
|
| 332 |
+
raise NameError("Die 'tool' Funktion von smolagents konnte nicht importiert werden.")
|
| 333 |
+
|
| 334 |
agent_instance = Agent(
|
| 335 |
llm=llm,
|
| 336 |
+
tools=active_tools, # Übergebe die aktiven Tools bei der Initialisierung
|
| 337 |
+
# system_prompt=... # Kann hier oder im run() prompt definiert werden
|
| 338 |
)
|
| 339 |
+
print(f"Smol Agent initialized with {len(active_tools)} tools.")
|
| 340 |
+
if len(active_tools) < len(available_tools):
|
| 341 |
+
print(f"Warning: Some tools might be inactive due to configuration or missing libraries.")
|
| 342 |
|
| 343 |
|
| 344 |
# --- Angepasste Hauptfunktion ---
|
| 345 |
+
def run_and_submit_all( profile: gr.OAuthProfile | None, progress=gr.Progress(track_tqdm=True)):
|
| 346 |
"""
|
| 347 |
+
Fetches all questions, runs the smolagents agent on them, submits all answers,
|
| 348 |
+
and displays the results. Includes Gradio progress tracking.
|
| 349 |
"""
|
| 350 |
space_id = os.getenv("SPACE_ID")
|
| 351 |
|
|
|
|
| 354 |
print(f"User logged in: {username}")
|
| 355 |
else:
|
| 356 |
print("User not logged in.")
|
| 357 |
+
# Gib None für DataFrame zurück, um Fehler in Gradio zu vermeiden
|
| 358 |
return "Please Login to Hugging Face with the button.", None
|
| 359 |
|
| 360 |
api_url = DEFAULT_API_URL
|
| 361 |
questions_url = f"{api_url}/questions"
|
| 362 |
submit_url = f"{api_url}/submit"
|
| 363 |
|
| 364 |
+
# 1. Initialisiere Agent und Clients
|
| 365 |
+
progress(0, desc="Initializing Agent...")
|
| 366 |
try:
|
| 367 |
initialize_agent()
|
| 368 |
+
if not agent_instance:
|
| 369 |
raise RuntimeError("Agent instance could not be initialized.")
|
| 370 |
except ValueError as e:
|
| 371 |
print(f"Error during initialization: {e}")
|
| 372 |
return f"Configuration Error: {e}", None
|
| 373 |
except Exception as e:
|
| 374 |
+
print(f"Error initializing agent/clients ({type(e).__name__}): {e}")
|
| 375 |
return f"Error initializing agent: {e}", None
|
| 376 |
|
| 377 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
|
| 378 |
print(f"Agent Code Link: {agent_code}")
|
| 379 |
|
| 380 |
+
# 2. Fetch Questions
|
| 381 |
+
progress(0.1, desc="Fetching questions...")
|
| 382 |
print(f"Fetching questions from: {questions_url}")
|
| 383 |
try:
|
| 384 |
response = requests.get(questions_url, timeout=30)
|
|
|
|
| 387 |
if not questions_data or not isinstance(questions_data, list):
|
| 388 |
print(f"Fetched questions list is empty or invalid format: {questions_data}")
|
| 389 |
return "Fetched questions list is empty or invalid format.", None
|
| 390 |
+
num_questions = len(questions_data)
|
| 391 |
+
print(f"Fetched {num_questions} questions.")
|
| 392 |
except Exception as e:
|
|
|
|
| 393 |
print(f"Error fetching questions ({type(e).__name__}): {e}")
|
| 394 |
return f"Error fetching questions: {e}", None
|
| 395 |
|
| 396 |
|
| 397 |
+
# 3. Run your Smol Agent with progress tracking
|
| 398 |
start_time = datetime.now()
|
| 399 |
results_log = []
|
| 400 |
answers_payload = []
|
| 401 |
+
print(f"Running smolagents on {num_questions} questions using {HF_MODEL_ID}...")
|
|
|
|
| 402 |
|
| 403 |
+
# Verwende tqdm für die Iteration mit Gradio-Fortschritt
|
| 404 |
+
# for i, item in enumerate(tqdm(questions_data, desc="Processing Questions")): # Standard tqdm
|
| 405 |
+
for i, item in enumerate(progress.tqdm(questions_data, desc="Processing Questions")): # Gradio tqdm
|
| 406 |
task_id = item.get("task_id")
|
| 407 |
question_text = item.get("question")
|
| 408 |
|
| 409 |
if not task_id or question_text is None:
|
| 410 |
+
print(f"Skipping item {i+1} with missing task_id or question: {item}")
|
| 411 |
continue
|
| 412 |
|
| 413 |
+
# --- Prompt für smolagents ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
agent_prompt = f"""
|
| 415 |
+
You are an expert AI assistant solving a challenge question based on the GAIA benchmark.
|
| 416 |
Your task is to answer the following question accurately and concisely.
|
| 417 |
Use the available tools ONLY when necessary to find information or access required files.
|
| 418 |
+
Think step-by-step before deciding on an action or the final answer.
|
| 419 |
|
| 420 |
+
**Available Tools:** (These are the functions you can call)
|
| 421 |
+
* `search_web(query: str, max_results: int = 3)`: Searches the web for information.
|
| 422 |
+
* `download_task_file(task_id: str)`: Downloads the specific file FOR THIS TASK ONLY. Use the task_id '{task_id}'. Returns the local file path needed for 'read_file_content'.
|
| 423 |
+
* `read_file_content(file_path: str)`: Reads text from a file previously downloaded with 'download_task_file'. Requires the exact file path returned by that tool.
|
| 424 |
|
| 425 |
**Current Task:**
|
| 426 |
* Task ID: {task_id}
|
| 427 |
* Question: {question_text}
|
| 428 |
|
| 429 |
+
**Instructions & Output Format:**
|
| 430 |
+
1. Carefully analyze the question.
|
| 431 |
+
2. Think step-by-step. Outline your plan if needed.
|
| 432 |
+
3. Execute tools sequentially if information depends on previous steps (e.g., download then read).
|
| 433 |
+
4. Review the gathered information and your reasoning.
|
| 434 |
+
5. **Crucially**: Provide ONLY the final answer. Do not include your reasoning, steps, tool calls, introductions (like "The answer is..."), or any other conversational text in the final output. The answer must be exact and stand-alone. Format it as requested by the question (e.g., just a number, a comma-separated list 'apple,banana,orange', etc.).
|
| 435 |
|
| 436 |
+
Let's begin the thinking process for Task {task_id}.
|
| 437 |
"""
|
| 438 |
|
| 439 |
+
submitted_answer = f"Error: Agent failed for task {task_id}" # Default error
|
| 440 |
try:
|
| 441 |
+
# Führe den Agenten aus (übergebe Tools nicht erneut, wenn sie im Konstruktor sind)
|
| 442 |
+
agent_response = agent_instance.run(prompt=agent_prompt)
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
if agent_response:
|
| 445 |
+
# Einfache Bereinigung: Entferne häufige Präfixe und überflüssige Leerzeichen
|
| 446 |
+
# Manchmal geben Modelle trotz Anweisung Präfixe aus.
|
| 447 |
+
cleaned_response = re.sub(r"^(Final Answer:|Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
|
| 448 |
+
# Entferne auch Anführungszeichen am Anfang/Ende, falls das Modell sie hinzufügt
|
| 449 |
+
cleaned_response = cleaned_response.strip('"').strip("'")
|
| 450 |
+
submitted_answer = cleaned_response if cleaned_response else "Error: Agent returned empty response after cleaning."
|
| 451 |
else:
|
| 452 |
+
submitted_answer = "Error: Agent returned an empty or None response."
|
| 453 |
|
| 454 |
|
| 455 |
+
print(f"Task {task_id} completed. Submitted Answer: '{submitted_answer}'")
|
| 456 |
|
| 457 |
+
# Handle specific exceptions if needed, e.g., RateLimitError from HF
|
| 458 |
except Exception as e:
|
| 459 |
+
error_msg = f"AGENT_RUN_ERROR on task {task_id} ({type(e).__name__}): {e}"
|
| 460 |
print(error_msg)
|
| 461 |
+
submitted_answer = f"ERROR: Agent failed ({type(e).__name__})" # Kürzere Fehlermeldung
|
|
|
|
| 462 |
|
| 463 |
finally:
|
| 464 |
# Füge das Ergebnis (oder den Fehler) hinzu
|
| 465 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 466 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 467 |
+
# Optional: Sofortige Bereinigung (kann Laufzeit verlängern)
|
| 468 |
+
# cleanup_temp_files()
|
|
|
|
| 469 |
|
| 470 |
end_time = datetime.now()
|
| 471 |
duration = end_time - start_time
|
| 472 |
print(f"Agent processing finished in {duration}.")
|
| 473 |
+
progress(0.9, desc="Submitting answers...")
|
| 474 |
|
| 475 |
+
# 4. Prepare Submission
|
| 476 |
if not answers_payload:
|
| 477 |
print("Agent did not produce any answers to submit.")
|
| 478 |
+
cleanup_temp_files() # Aufräumen
|
|
|
|
| 479 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 480 |
|
| 481 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 482 |
status_update = f"Agent finished in {duration}. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 483 |
print(status_update)
|
| 484 |
|
| 485 |
+
# 5. Submit
|
| 486 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 487 |
+
final_status = "Submission attempt finished." # Default status
|
| 488 |
+
results_df = pd.DataFrame(results_log) # Erstelle DataFrame vor dem Try-Block
|
| 489 |
+
|
| 490 |
try:
|
| 491 |
response = requests.post(submit_url, json=submission_data, timeout=120)
|
| 492 |
response.raise_for_status()
|
|
|
|
| 499 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 500 |
)
|
| 501 |
print("Submission successful.")
|
|
|
|
|
|
|
| 502 |
except requests.exceptions.HTTPError as e:
|
|
|
|
| 503 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 504 |
try:
|
| 505 |
error_json = e.response.json()
|
|
|
|
| 510 |
error_detail += f" Detail: {str(api_error)}"
|
| 511 |
except requests.exceptions.JSONDecodeError:
|
| 512 |
error_detail += f" Response: {e.response.text[:500]}"
|
| 513 |
+
final_status = f"Submission Failed: {error_detail}"
|
| 514 |
+
print(final_status)
|
|
|
|
|
|
|
| 515 |
except requests.exceptions.Timeout:
|
| 516 |
+
final_status = "Submission Failed: The request timed out."
|
| 517 |
+
print(final_status)
|
|
|
|
|
|
|
|
|
|
| 518 |
except requests.exceptions.RequestException as e:
|
| 519 |
+
final_status = f"Submission Failed: Network error - {e}"
|
| 520 |
+
print(final_status)
|
|
|
|
|
|
|
| 521 |
except Exception as e:
|
| 522 |
+
final_status = f"An unexpected error occurred during submission ({type(e).__name__}): {e}"
|
| 523 |
+
print(final_status)
|
|
|
|
|
|
|
| 524 |
finally:
|
| 525 |
+
cleanup_temp_files() # Stelle sicher, dass aufgeräumt wird
|
| 526 |
+
|
| 527 |
+
progress(1, desc="Done.")
|
| 528 |
+
return final_status, results_df
|
| 529 |
|
| 530 |
|
| 531 |
+
# --- Gradio Interface (mit Progress Bar) ---
|
| 532 |
with gr.Blocks() as demo:
|
| 533 |
+
gr.Markdown("# Smol Agents Evaluation Runner (Hugging Face)")
|
| 534 |
gr.Markdown(
|
| 535 |
f"""
|
| 536 |
**Instructions:**
|
| 537 |
|
| 538 |
+
1. Ensure `HUGGINGFACE_TOKEN` (write access) is a Secret in Space settings. Add `TAVILY_API_KEY` if using Tavily.
|
| 539 |
+
2. Verify `requirements.txt` includes `smolagents[huggingface]`, search libs, `pypdf2`.
|
| 540 |
+
3. Agent uses HF Inference API model: **{HF_MODEL_ID}** (change via `HF_MODEL_ID` env var).
|
| 541 |
+
4. Log in below.
|
| 542 |
+
5. Click 'Run Evaluation & Submit'. **This will take time.** Monitor progress below and console logs.
|
| 543 |
|
| 544 |
---
|
| 545 |
**Agent Details:**
|
| 546 |
+
* Uses `smolagents` library.
|
|
|
|
| 547 |
* Tools: Web Search ({'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}), File Download, File Reading (PDF/Text).
|
|
|
|
| 548 |
"""
|
| 549 |
)
|
| 550 |
|
| 551 |
+
# Platzhalter für Login-Status (vereinfacht)
|
| 552 |
+
# Gradio's LoginButton handhabt das meiste intern
|
| 553 |
+
# profile_info = gr.State(None) # Nicht unbedingt nötig, wenn LoginButton direkt genutzt wird
|
| 554 |
+
|
| 555 |
+
with gr.Row():
|
| 556 |
+
login_button = gr.LoginButton()
|
| 557 |
+
# Logout nicht direkt implementiert, User kann sich auf HF ausloggen
|
| 558 |
|
| 559 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 560 |
|
| 561 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
|
| 562 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
|
| 563 |
|
| 564 |
+
# --- Event Handler für den Button ---
|
| 565 |
+
# Diese Funktion wird aufgerufen, wenn der Button geklickt wird.
|
| 566 |
+
# Sie erhält das OAuth-Profil, wenn der User eingeloggt ist.
|
| 567 |
+
def handle_run(request: gr.Request):
|
| 568 |
+
# Das Profil wird aus dem Request-Objekt extrahiert, wenn eingeloggt
|
| 569 |
+
profile = getattr(request, 'profile', None)
|
| 570 |
+
# Rufe die Hauptfunktion auf und gib ihre Ausgaben zurück
|
| 571 |
+
# Füge das gr.Progress() Objekt hinzu, das von Gradio verwaltet wird
|
| 572 |
+
return run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True))
|
|
|
|
|
|
|
|
|
|
| 573 |
|
| 574 |
run_button.click(
|
| 575 |
+
fn=handle_run, # Verwende die Wrapper-Funktion
|
| 576 |
+
inputs=[], # Keine expliziten Inputs nötig, Profil kommt vom Request
|
|
|
|
| 577 |
outputs=[status_output, results_table],
|
| 578 |
api_name="run_evaluation_smol"
|
| 579 |
)
|
| 580 |
|
| 581 |
+
|
| 582 |
# --- App Start (unverändert) ---
|
| 583 |
if __name__ == "__main__":
|
| 584 |
+
print("\n" + "-"*30 + " App Starting (Smol Agents Version - Corrected Imports) " + "-"*30)
|
| 585 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 586 |
space_id_startup = os.getenv("SPACE_ID")
|
| 587 |
|
|
|
|
| 588 |
if space_host_startup:
|
| 589 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
| 590 |
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
|
|
|
| 599 |
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 600 |
|
| 601 |
print(f" Using HF Model via Inference API: {HF_MODEL_ID}")
|
| 602 |
+
search_tool_status = 'Disabled'
|
| 603 |
+
if USE_TAVILY: search_tool_status = 'Tavily'
|
| 604 |
+
elif USE_DUCKDUCKGO: search_tool_status = 'DuckDuckGo'
|
| 605 |
+
print(f" Search Tool: {search_tool_status}")
|
| 606 |
+
print(f" PDF Reading: {'Enabled' if PDF_READER_AVAILABLE else 'Disabled (PyPDF2 missing)'}")
|
| 607 |
+
print("-"*(60 + len(" App Starting (Smol Agents Version - Corrected Imports) ")) + "\n")
|
| 608 |
+
|
| 609 |
+
print("Launching Gradio Interface for Smol Agents Evaluation...")
|
| 610 |
+
# Setze queue=True für bessere Handhabung langer Läufe
|
| 611 |
+
demo.queue().launch(debug=False, share=False)
|