pmeyhoefer commited on
Commit
1a535c5
·
verified ·
1 Parent(s): 85e2c6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -283
app.py CHANGED
@@ -5,43 +5,42 @@ import pandas as pd
5
  import re
6
  from datetime import datetime
7
  import time
8
- import tempfile # Für temporäre Dateien
9
- import atexit # Zum Aufräumen beim Beenden
 
10
 
11
- # --- Smol Agents und HF Imports (KORRIGIERT) ---
12
  try:
13
- from smolagents import Agent
14
- from smolagents.llm.huggingface import InferenceAPI
15
- from smolagents.tools import tool
16
- print("Successfully imported from 'smolagents'")
17
  except ImportError as e:
18
  print(f"Error importing from smolagents: {e}")
19
  print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
20
- # Exit if core library is missing
21
- import sys
22
- sys.exit(f"Fatal Error: Could not import smolagents. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
23
 
24
- from huggingface_hub import HfApi, InferenceClient
 
25
 
26
- # --- Suchtool Imports (wähle eins) ---
27
- USE_TAVILY = False # Setze auf True, wenn du Tavily bevorzugst (benötigt TAVILY_API_KEY)
28
- USE_DUCKDUCKGO = True # Setze auf True für DuckDuckGo (kein Key nötig)
29
 
30
  if USE_TAVILY:
31
  try:
32
  from tavily import TavilyClient
33
  except ImportError:
34
- print("WARNUNG: TavilyClient nicht installiert. Führe 'pip install tavily-python' aus.")
35
  USE_TAVILY = False
36
  USE_DUCKDUCKGO = True # Fallback
37
  if USE_DUCKDUCKGO:
38
  try:
39
  from duckduckgo_search import DDGS
40
  except ImportError:
41
- print("WARNUNG: duckduckgo-search nicht installiert. Führe 'pip install duckduckgo-search' aus.")
42
  USE_DUCKDUCKGO = False
43
 
44
- # --- PDF Reader Import ---
45
  try:
46
  import PyPDF2
47
  PDF_READER_AVAILABLE = True
@@ -51,36 +50,36 @@ except ImportError:
51
 
52
  # --- Konstanten ---
53
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
54
- # Wähle ein Instruction-Following Modell von Hugging Face Hub
55
- HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") # Standardmodell
 
56
 
57
- # --- Globale Variablen für Clients (werden in initialize_agent gesetzt) ---
58
- hf_token = None
59
  search_client = None
60
- agent_instance = None # Wird pro Lauf initialisiert
 
61
 
62
- # --- Temporäre Datei Verwaltung ---
63
  temp_files_to_clean = set()
64
-
65
  def cleanup_temp_files():
 
66
  print("Cleaning up temporary files...")
67
- for file_path in list(temp_files_to_clean): # Iteriere über Kopie, da Set verändert wird
68
  try:
69
  if os.path.exists(file_path):
70
  os.remove(file_path)
71
  print(f"Removed temporary file: {file_path}")
72
- if file_path in temp_files_to_clean: # Prüfe erneut, falls Fehler auftrat
73
  temp_files_to_clean.remove(file_path)
74
  except OSError as e:
75
  print(f"Error removing temporary file {file_path}: {e}")
76
  except KeyError:
77
  print(f"Warning: File path {file_path} already removed from cleanup set.")
78
-
79
- # Registriere die Cleanup-Funktion für das Beenden des Skripts
80
  atexit.register(cleanup_temp_files)
81
 
82
-
83
- # --- Tool Definitionen für smolagents ---
84
 
85
  @tool
86
  def search_web(query: str, max_results: int = 3) -> str:
@@ -93,6 +92,7 @@ def search_web(query: str, max_results: int = 3) -> str:
93
  Returns:
94
  str: A string containing the search results, or an error message.
95
  """
 
96
  print(f"Tool: search_web(query='{query}', max_results={max_results})")
97
  if not search_client:
98
  return "Search tool is not available/configured."
@@ -107,7 +107,6 @@ def search_web(query: str, max_results: int = 3) -> str:
107
  if not results: return "No search results found."
108
  return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
109
  else:
110
- # Dies sollte nicht passieren, wenn search_client gesetzt ist, aber als Absicherung
111
  return "No compatible search client configured or available."
112
  except Exception as e:
113
  print(f"Search API Error ({type(e).__name__}): {e}")
@@ -124,13 +123,12 @@ def download_task_file(task_id: str) -> str:
124
  str: The local path to the downloaded file (e.g., '/tmp/gaia_task_abc-123.pdf') if successful,
125
  otherwise an error message starting with 'Error:'.
126
  """
 
127
  print(f"Tool: download_task_file(task_id='{task_id}')")
128
  file_url = f"{DEFAULT_API_URL}/files/{task_id}"
129
-
130
  try:
131
  response = requests.get(file_url, stream=True, timeout=30)
132
  response.raise_for_status()
133
-
134
  content_type = response.headers.get('content-type', '').lower()
135
  suffix = ".tmp"
136
  if 'pdf' in content_type: suffix = ".pdf"
@@ -138,36 +136,29 @@ def download_task_file(task_id: str) -> str:
138
  elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
139
  elif 'csv' in content_type: suffix = ".csv"
140
  elif 'plain' in content_type or 'text' in content_type: suffix = ".txt"
141
- # Füge ggf. weitere Mappings hinzu
142
-
143
  temp_dir = tempfile.gettempdir()
144
  safe_task_id = re.sub(r'[^\w\-]+', '_', task_id)
145
- # Erzeuge eindeutigeren Dateinamen, um Konflikte bei schnellen Läufen zu minimieren
146
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
147
  temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}_{timestamp}{suffix}")
148
-
149
-
150
  with open(temp_file_path, 'wb') as f:
151
  for chunk in response.iter_content(chunk_size=8192):
152
  f.write(chunk)
153
-
154
  print(f"File downloaded successfully to {temp_file_path}")
155
  temp_files_to_clean.add(temp_file_path)
156
- return temp_file_path # Erfolg: Gib Pfad zurück
157
-
158
  except requests.exceptions.HTTPError as e:
159
  if e.response.status_code == 404:
160
  print(f"No file found on server for task_id {task_id}.")
161
- return "Error: No file found for this task ID." # Fehler: Gib Fehlermeldung zurück
162
  else:
163
  print(f"HTTP Error downloading file for task {task_id}: {e}")
164
- return f"Error: Failed to download file (HTTP {e.response.status_code})." # Fehler
165
  except requests.exceptions.RequestException as e:
166
  print(f"Network Error downloading file for task {task_id}: {e}")
167
- return f"Error: Failed to download file due to network issue: {e}" # Fehler
168
  except Exception as e:
169
  print(f"Unexpected error downloading file for task {task_id} ({type(e).__name__}): {e}")
170
- return f"Error: Unexpected error during file download: {e}" # Fehler
171
 
172
  @tool
173
  def read_file_content(file_path: str) -> str:
@@ -179,43 +170,35 @@ def read_file_content(file_path: str) -> str:
179
  Returns:
180
  str: The extracted text content (truncated if very long), or an error message starting with 'Error:'.
181
  """
 
182
  print(f"Tool: read_file_content(file_path='{file_path}')")
183
-
184
- # Überprüfung des Inputs: Ist es überhaupt ein Pfad?
185
  if not isinstance(file_path, str) or not os.path.isabs(file_path):
186
  print(f"Invalid input for read_file_content: '{file_path}'. Expected an absolute file path.")
187
  return "Error: Invalid input. Provide the absolute file path returned by download_task_file."
188
-
189
- # Sicherheitscheck: Erlaube nur Lesen aus dem Temp-Verzeichnis (bleibt wichtig)
190
  if not file_path.startswith(tempfile.gettempdir()):
191
  print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
192
  return "Error: Invalid file path provided. Only downloaded files can be read."
193
-
194
  if not os.path.exists(file_path):
195
  print(f"Error: File not found at path: {file_path}")
196
- return f"Error: File not found at the specified path '{os.path.basename(file_path)}'." # Gib Dateinamen im Fehler an
197
-
198
  try:
199
  file_size = os.path.getsize(file_path)
200
  print(f"Reading file: {os.path.basename(file_path)}, Size: {file_size} bytes")
201
  if file_size == 0:
202
  print(f"Warning: File {os.path.basename(file_path)} is empty.")
203
  return f"Observation: The file '{os.path.basename(file_path)}' is empty."
204
-
205
  if file_path.lower().endswith(".pdf"):
206
- if not PDF_READER_AVAILABLE:
207
- return "Error: Cannot read PDF file because PyPDF2 library is not installed."
208
  text = ""
209
  with open(file_path, 'rb') as f:
210
  reader = PyPDF2.PdfReader(f)
211
  num_pages = len(reader.pages)
212
  print(f"Reading {num_pages} pages from PDF...")
213
  for page_num in range(num_pages):
214
- # Prüfe ob Seite Text enthält bevor Extraktion versucht wird
215
  if reader.pages[page_num].extract_text():
216
  page_text = reader.pages[page_num].extract_text()
217
- text += page_text + "\n" # Füge Zeilenumbruch zwischen Seiten hinzu
218
- if len(text) > 7000: # Begrenze die Länge
219
  text = text[:7000] + "\n... (content truncated)"
220
  print(f"Text truncated at {len(text)} chars.")
221
  break
@@ -224,155 +207,120 @@ def read_file_content(file_path: str) -> str:
224
  return f"Observation: Could not extract any text content from the PDF file '{os.path.basename(file_path)}'."
225
  print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
226
  return f"Content of '{os.path.basename(file_path)}':\n{text}"
227
-
228
  elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
229
  print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
230
  return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content with this tool."
231
-
232
- else: # Versuche als Text zu lesen (TXT, CSV, etc.)
233
- # Lese in Chunks um Speicher zu schonen bei großen Textdateien
234
  content = ""
235
- chunk_size = 4096
236
- max_len = 7000
237
- truncated = False
238
  try:
239
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
240
  while len(content) < max_len:
241
- chunk = f.read(chunk_size)
242
- if not chunk:
243
- break
244
  content += chunk
245
- if len(content) > max_len:
246
- content = content[:max_len]
247
- truncated = True
248
  print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
249
  result = f"Content of '{os.path.basename(file_path)}':\n{content}"
250
- if truncated:
251
- result += "\n... (content truncated)"
252
  return result
253
- except Exception as read_err: # Fange Lesefehler ab
254
  print(f"Error reading file {file_path} as text: {read_err}")
255
  return f"Error: Failed to read file '{os.path.basename(file_path)}' as text: {read_err}"
256
-
257
-
258
  except Exception as e:
259
  print(f"Error reading file {file_path} ({type(e).__name__}): {e}")
260
  return f"Error: Failed to read file content: {e}"
261
 
262
 
263
- # --- Agent Initialisierung ---
264
  def initialize_agent():
265
- """Initialisiert den smolagents Agent und die benötigten Clients."""
266
- global hf_token, search_client, agent_instance
267
  print("Initializing agent and clients...")
268
 
269
- # Token und Clients nur einmal initialisieren, wenn nicht vorhanden
270
- if not hf_token:
271
- hf_token = os.getenv("HUGGINGFACE_TOKEN")
272
- if not hf_token:
273
- raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden!")
274
-
275
- if not search_client:
276
  if USE_TAVILY:
277
  tavily_key = os.getenv("TAVILY_API_KEY")
278
  if tavily_key:
279
- try:
280
- search_client = TavilyClient(api_key=tavily_key)
281
- print("Using Tavily for search.")
282
- except NameError:
283
- print("WARNUNG: TavilyClient Klasse nicht gefunden, obwohl USE_TAVILY=True.")
284
- search_client = None # Verhindere Nutzung
285
  else:
286
- print("WARNUNG: TAVILY_API_KEY nicht gefunden, obwohl USE_TAVILY=True.")
287
- # Fallback nur wenn Tavily nicht genutzt werden konnte
288
- if USE_DUCKDUCKGO:
289
- try:
290
- search_client = DDGS()
291
- print("Falling back to DuckDuckGo for search.")
292
- except NameError:
293
- search_client = None
294
- print("WARNUNG: DuckDuckGo auch nicht verfügbar. Suche deaktiviert.")
295
- else:
296
- search_client = None
297
- print("WARNUNG: Suche deaktiviert (Tavily Key fehlt).")
298
  elif USE_DUCKDUCKGO:
299
- try:
300
- search_client = DDGS()
301
- print("Using DuckDuckGo for search.")
302
- except NameError:
303
- search_client = None
304
- print("WARNUNG: duckduckgo-search nicht installiert/verfügbar. Suche deaktiviert.")
305
  else:
306
- search_client = None
307
  print("Web search is disabled by configuration.")
308
-
309
- # Agent Instanz immer neu erstellen oder nur wenn nicht vorhanden?
310
- # Für diesen Use Case: Erstelle sie immer neu, um sicherzustellen,
311
- # dass sie den neuesten Stand der Tools hat (obwohl sie hier global sind).
312
- # Besser wäre es, die tools direkt in der run-Methode zu übergeben.
313
-
314
- # --- LLM Client (Hugging Face Inference API) ---
315
- llm = InferenceAPI(
316
- model_id=HF_MODEL_ID,
317
- token=hf_token,
318
- max_new_tokens=1500, # Max Tokens, die das Modell generieren darf
319
- temperature=0.1,
320
- # stop_sequences=["Observation:", "\nObservation:", "\nTool:", "\nThought:"], # Optional: Hilft manchmal, das Abschneiden zu verbessern
321
- # top_p=0.9, # Optional
322
- )
323
- print(f"LLM configured with model: {HF_MODEL_ID}")
 
 
 
 
 
 
 
324
 
325
  # --- Agent Instanz ---
326
  available_tools = [search_web, download_task_file, read_file_content]
327
- # Filter out None tools if search failed to initialize
328
- active_tools = [t for t in available_tools if t is not None]
329
-
330
- # Stelle sicher, dass 'tool' importiert wurde
331
- if 'tool' not in globals():
332
- raise NameError("Die 'tool' Funktion von smolagents konnte nicht importiert werden.")
333
 
334
- agent_instance = Agent(
335
- llm=llm,
336
- tools=active_tools, # Übergebe die aktiven Tools bei der Initialisierung
337
- # system_prompt=... # Kann hier oder im run() prompt definiert werden
338
  )
339
- print(f"Smol Agent initialized with {len(active_tools)} tools.")
340
  if len(active_tools) < len(available_tools):
341
- print(f"Warning: Some tools might be inactive due to configuration or missing libraries.")
342
 
343
 
344
- # --- Angepasste Hauptfunktion ---
 
 
345
  def run_and_submit_all( profile: gr.OAuthProfile | None, progress=gr.Progress(track_tqdm=True)):
346
  """
347
- Fetches all questions, runs the smolagents agent on them, submits all answers,
348
  and displays the results. Includes Gradio progress tracking.
349
  """
350
  space_id = os.getenv("SPACE_ID")
351
-
352
- if profile:
353
- username= f"{profile.username}"
354
- print(f"User logged in: {username}")
355
- else:
356
  print("User not logged in.")
357
- # Gib None für DataFrame zurück, um Fehler in Gradio zu vermeiden
358
  return "Please Login to Hugging Face with the button.", None
 
 
359
 
360
  api_url = DEFAULT_API_URL
361
  questions_url = f"{api_url}/questions"
362
  submit_url = f"{api_url}/submit"
363
 
364
- # 1. Initialisiere Agent und Clients
365
  progress(0, desc="Initializing Agent...")
366
  try:
367
  initialize_agent()
368
- if not agent_instance:
369
- raise RuntimeError("Agent instance could not be initialized.")
370
- except ValueError as e:
371
- print(f"Error during initialization: {e}")
372
- return f"Configuration Error: {e}", None
373
- except Exception as e:
374
- print(f"Error initializing agent/clients ({type(e).__name__}): {e}")
375
- return f"Error initializing agent: {e}", None
376
 
377
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
378
  print(f"Agent Code Link: {agent_code}")
@@ -389,28 +337,22 @@ def run_and_submit_all( profile: gr.OAuthProfile | None, progress=gr.Progress(tr
389
  return "Fetched questions list is empty or invalid format.", None
390
  num_questions = len(questions_data)
391
  print(f"Fetched {num_questions} questions.")
392
- except Exception as e:
393
- print(f"Error fetching questions ({type(e).__name__}): {e}")
394
- return f"Error fetching questions: {e}", None
395
 
396
-
397
- # 3. Run your Smol Agent with progress tracking
398
  start_time = datetime.now()
399
  results_log = []
400
  answers_payload = []
401
- print(f"Running smolagents on {num_questions} questions using {HF_MODEL_ID}...")
402
 
403
- # Verwende tqdm für die Iteration mit Gradio-Fortschritt
404
- # for i, item in enumerate(tqdm(questions_data, desc="Processing Questions")): # Standard tqdm
405
- for i, item in enumerate(progress.tqdm(questions_data, desc="Processing Questions")): # Gradio tqdm
406
  task_id = item.get("task_id")
407
  question_text = item.get("question")
408
-
409
  if not task_id or question_text is None:
410
  print(f"Skipping item {i+1} with missing task_id or question: {item}")
411
  continue
412
 
413
- # --- Prompt für smolagents ---
414
  agent_prompt = f"""
415
  You are an expert AI assistant solving a challenge question based on the GAIA benchmark.
416
  Your task is to answer the following question accurately and concisely.
@@ -435,177 +377,110 @@ Think step-by-step before deciding on an action or the final answer.
435
 
436
  Let's begin the thinking process for Task {task_id}.
437
  """
438
-
439
- submitted_answer = f"Error: Agent failed for task {task_id}" # Default error
440
  try:
441
- # Führe den Agenten aus (übergebe Tools nicht erneut, wenn sie im Konstruktor sind)
442
- agent_response = agent_instance.run(prompt=agent_prompt)
443
 
444
  if agent_response:
445
- # Einfache Bereinigung: Entferne häufige Präfixe und überflüssige Leerzeichen
446
- # Manchmal geben Modelle trotz Anweisung Präfixe aus.
447
  cleaned_response = re.sub(r"^(Final Answer:|Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
448
- # Entferne auch Anführungszeichen am Anfang/Ende, falls das Modell sie hinzufügt
449
  cleaned_response = cleaned_response.strip('"').strip("'")
450
  submitted_answer = cleaned_response if cleaned_response else "Error: Agent returned empty response after cleaning."
451
  else:
452
  submitted_answer = "Error: Agent returned an empty or None response."
453
-
454
-
455
  print(f"Task {task_id} completed. Submitted Answer: '{submitted_answer}'")
456
 
457
- # Handle specific exceptions if needed, e.g., RateLimitError from HF
458
  except Exception as e:
459
  error_msg = f"AGENT_RUN_ERROR on task {task_id} ({type(e).__name__}): {e}"
460
  print(error_msg)
461
- submitted_answer = f"ERROR: Agent failed ({type(e).__name__})" # Kürzere Fehlermeldung
 
462
 
463
  finally:
464
- # Füge das Ergebnis (oder den Fehler) hinzu
465
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
466
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
467
- # Optional: Sofortige Bereinigung (kann Laufzeit verlängern)
468
- # cleanup_temp_files()
469
 
470
- end_time = datetime.now()
471
- duration = end_time - start_time
472
  print(f"Agent processing finished in {duration}.")
473
  progress(0.9, desc="Submitting answers...")
474
 
475
- # 4. Prepare Submission
476
  if not answers_payload:
477
- print("Agent did not produce any answers to submit.")
478
- cleanup_temp_files() # Aufräumen
479
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
480
-
481
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
482
- status_update = f"Agent finished in {duration}. Submitting {len(answers_payload)} answers for user '{username}'..."
483
- print(status_update)
484
-
485
- # 5. Submit
486
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
487
- final_status = "Submission attempt finished." # Default status
488
- results_df = pd.DataFrame(results_log) # Erstelle DataFrame vor dem Try-Block
489
 
 
 
 
490
  try:
491
  response = requests.post(submit_url, json=submission_data, timeout=120)
492
  response.raise_for_status()
493
  result_data = response.json()
494
- final_status = (
495
- f"Submission Successful!\n"
496
- f"User: {result_data.get('username')}\n"
497
- f"Score: {result_data.get('score', 'N/A'):.2f}% "
498
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
499
- f"Message: {result_data.get('message', 'No message received.')}"
500
- )
501
  print("Submission successful.")
502
  except requests.exceptions.HTTPError as e:
503
- error_detail = f"Server responded with status {e.response.status_code}."
 
504
  try:
505
- error_json = e.response.json()
506
- api_error = error_json.get('detail', e.response.text)
507
- if isinstance(api_error, list) and len(api_error) > 0 and isinstance(api_error[0], dict):
508
- error_detail += f" Detail: {api_error[0].get('msg', str(api_error))}"
509
- else:
510
- error_detail += f" Detail: {str(api_error)}"
511
- except requests.exceptions.JSONDecodeError:
512
- error_detail += f" Response: {e.response.text[:500]}"
513
- final_status = f"Submission Failed: {error_detail}"
514
- print(final_status)
515
- except requests.exceptions.Timeout:
516
- final_status = "Submission Failed: The request timed out."
517
- print(final_status)
518
- except requests.exceptions.RequestException as e:
519
- final_status = f"Submission Failed: Network error - {e}"
520
- print(final_status)
521
- except Exception as e:
522
- final_status = f"An unexpected error occurred during submission ({type(e).__name__}): {e}"
523
- print(final_status)
524
- finally:
525
- cleanup_temp_files() # Stelle sicher, dass aufgeräumt wird
526
 
527
  progress(1, desc="Done.")
528
  return final_status, results_df
529
 
530
 
531
- # --- Gradio Interface (mit Progress Bar) ---
532
  with gr.Blocks() as demo:
533
- gr.Markdown("# Smol Agents Evaluation Runner (Hugging Face)")
534
- gr.Markdown(
535
- f"""
536
  **Instructions:**
537
-
538
- 1. Ensure `HUGGINGFACE_TOKEN` (write access) is a Secret in Space settings. Add `TAVILY_API_KEY` if using Tavily.
539
- 2. Verify `requirements.txt` includes `smolagents[huggingface]`, search libs, `pypdf2`.
540
- 3. Agent uses HF Inference API model: **{HF_MODEL_ID}** (change via `HF_MODEL_ID` env var).
541
- 4. Log in below.
542
- 5. Click 'Run Evaluation & Submit'. **This will take time.** Monitor progress below and console logs.
543
-
544
  ---
545
- **Agent Details:**
546
- * Uses `smolagents` library.
547
- * Tools: Web Search ({'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}), File Download, File Reading (PDF/Text).
548
- """
549
- )
550
-
551
- # Platzhalter für Login-Status (vereinfacht)
552
- # Gradio's LoginButton handhabt das meiste intern
553
- # profile_info = gr.State(None) # Nicht unbedingt nötig, wenn LoginButton direkt genutzt wird
554
-
555
- with gr.Row():
556
- login_button = gr.LoginButton()
557
- # Logout nicht direkt implementiert, User kann sich auf HF ausloggen
558
-
559
  run_button = gr.Button("Run Evaluation & Submit All Answers")
560
-
561
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
562
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
563
 
564
- # --- Event Handler für den Button ---
565
- # Diese Funktion wird aufgerufen, wenn der Button geklickt wird.
566
- # Sie erhält das OAuth-Profil, wenn der User eingeloggt ist.
567
  def handle_run(request: gr.Request):
568
- # Das Profil wird aus dem Request-Objekt extrahiert, wenn eingeloggt
569
  profile = getattr(request, 'profile', None)
570
- # Rufe die Hauptfunktion auf und gib ihre Ausgaben zurück
571
- # Füge das gr.Progress() Objekt hinzu, das von Gradio verwaltet wird
572
  return run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True))
573
 
574
- run_button.click(
575
- fn=handle_run, # Verwende die Wrapper-Funktion
576
- inputs=[], # Keine expliziten Inputs nötig, Profil kommt vom Request
577
- outputs=[status_output, results_table],
578
- api_name="run_evaluation_smol"
579
- )
580
-
581
 
582
  # --- App Start (unverändert) ---
583
  if __name__ == "__main__":
584
- print("\n" + "-"*30 + " App Starting (Smol Agents Version - Corrected Imports) " + "-"*30)
 
585
  space_host_startup = os.getenv("SPACE_HOST")
586
  space_id_startup = os.getenv("SPACE_ID")
587
-
588
- if space_host_startup:
589
- print(f"✅ SPACE_HOST found: {space_host_startup}")
590
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
591
- else:
592
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
593
-
594
- if space_id_startup:
595
- print(f"✅ SPACE_ID found: {space_id_startup}")
596
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
597
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
598
- else:
599
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
600
-
601
- print(f" Using HF Model via Inference API: {HF_MODEL_ID}")
602
- search_tool_status = 'Disabled'
603
  if USE_TAVILY: search_tool_status = 'Tavily'
604
  elif USE_DUCKDUCKGO: search_tool_status = 'DuckDuckGo'
605
  print(f" Search Tool: {search_tool_status}")
606
  print(f" PDF Reading: {'Enabled' if PDF_READER_AVAILABLE else 'Disabled (PyPDF2 missing)'}")
607
- print("-"*(60 + len(" App Starting (Smol Agents Version - Corrected Imports) ")) + "\n")
608
-
609
- print("Launching Gradio Interface for Smol Agents Evaluation...")
610
- # Setze queue=True für bessere Handhabung langer Läufe
611
  demo.queue().launch(debug=False, share=False)
 
5
  import re
6
  from datetime import datetime
7
  import time
8
+ import tempfile
9
+ import atexit
10
+ import sys # Für sys.exit bei Importfehlern
11
 
12
+ # --- Smol Agents und HF Imports (angepasst an Beispiel) ---
13
  try:
14
+ # Verwende CodeAgent und HfApiModel wie im Beispiel
15
+ from smolagents import CodeAgent, tool, HfApiModel
16
+ print("Successfully imported CodeAgent, tool, HfApiModel from 'smolagents'")
 
17
  except ImportError as e:
18
  print(f"Error importing from smolagents: {e}")
19
  print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
20
+ sys.exit(f"Fatal Error: Could not import smolagents components. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
 
 
21
 
22
+ # huggingface_hub wird möglicherweise von HfApiModel intern genutzt
23
+ from huggingface_hub import HfApi
24
 
25
+ # --- Suchtool Imports (wie zuvor) ---
26
+ USE_TAVILY = False
27
+ USE_DUCKDUCKGO = True
28
 
29
  if USE_TAVILY:
30
  try:
31
  from tavily import TavilyClient
32
  except ImportError:
33
+ print("WARNUNG: TavilyClient nicht installiert.")
34
  USE_TAVILY = False
35
  USE_DUCKDUCKGO = True # Fallback
36
  if USE_DUCKDUCKGO:
37
  try:
38
  from duckduckgo_search import DDGS
39
  except ImportError:
40
+ print("WARNUNG: duckduckgo-search nicht installiert.")
41
  USE_DUCKDUCKGO = False
42
 
43
+ # --- PDF Reader Import (wie zuvor) ---
44
  try:
45
  import PyPDF2
46
  PDF_READER_AVAILABLE = True
 
50
 
51
  # --- Konstanten ---
52
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
53
+ # HfApiModel liest dies wahrscheinlich aus der Umgebungsvariable HF_MODEL_ID
54
+ # oder hat einen internen Default. Wir setzen sie weiterhin als Fallback/Info.
55
+ HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
56
 
57
+ # --- Globale Variablen ---
58
+ # search_client wird weiterhin benötigt, da die Tools darauf zugreifen
59
  search_client = None
60
+ # agent_instance wird pro Lauf initialisiert
61
+ agent_instance = None
62
 
63
+ # --- Temporäre Datei Verwaltung (wie zuvor) ---
64
  temp_files_to_clean = set()
 
65
  def cleanup_temp_files():
66
+ # (Code unverändert)
67
  print("Cleaning up temporary files...")
68
+ for file_path in list(temp_files_to_clean):
69
  try:
70
  if os.path.exists(file_path):
71
  os.remove(file_path)
72
  print(f"Removed temporary file: {file_path}")
73
+ if file_path in temp_files_to_clean:
74
  temp_files_to_clean.remove(file_path)
75
  except OSError as e:
76
  print(f"Error removing temporary file {file_path}: {e}")
77
  except KeyError:
78
  print(f"Warning: File path {file_path} already removed from cleanup set.")
 
 
79
  atexit.register(cleanup_temp_files)
80
 
81
+ # --- Tool Definitionen (unverändert) ---
82
+ # Die @tool-Deklarationen und Funktionslogik bleiben gleich.
83
 
84
  @tool
85
  def search_web(query: str, max_results: int = 3) -> str:
 
92
  Returns:
93
  str: A string containing the search results, or an error message.
94
  """
95
+ # (Code unverändert)
96
  print(f"Tool: search_web(query='{query}', max_results={max_results})")
97
  if not search_client:
98
  return "Search tool is not available/configured."
 
107
  if not results: return "No search results found."
108
  return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
109
  else:
 
110
  return "No compatible search client configured or available."
111
  except Exception as e:
112
  print(f"Search API Error ({type(e).__name__}): {e}")
 
123
  str: The local path to the downloaded file (e.g., '/tmp/gaia_task_abc-123.pdf') if successful,
124
  otherwise an error message starting with 'Error:'.
125
  """
126
+ # (Code unverändert)
127
  print(f"Tool: download_task_file(task_id='{task_id}')")
128
  file_url = f"{DEFAULT_API_URL}/files/{task_id}"
 
129
  try:
130
  response = requests.get(file_url, stream=True, timeout=30)
131
  response.raise_for_status()
 
132
  content_type = response.headers.get('content-type', '').lower()
133
  suffix = ".tmp"
134
  if 'pdf' in content_type: suffix = ".pdf"
 
136
  elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
137
  elif 'csv' in content_type: suffix = ".csv"
138
  elif 'plain' in content_type or 'text' in content_type: suffix = ".txt"
 
 
139
  temp_dir = tempfile.gettempdir()
140
  safe_task_id = re.sub(r'[^\w\-]+', '_', task_id)
 
141
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
142
  temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}_{timestamp}{suffix}")
 
 
143
  with open(temp_file_path, 'wb') as f:
144
  for chunk in response.iter_content(chunk_size=8192):
145
  f.write(chunk)
 
146
  print(f"File downloaded successfully to {temp_file_path}")
147
  temp_files_to_clean.add(temp_file_path)
148
+ return temp_file_path
 
149
  except requests.exceptions.HTTPError as e:
150
  if e.response.status_code == 404:
151
  print(f"No file found on server for task_id {task_id}.")
152
+ return "Error: No file found for this task ID."
153
  else:
154
  print(f"HTTP Error downloading file for task {task_id}: {e}")
155
+ return f"Error: Failed to download file (HTTP {e.response.status_code})."
156
  except requests.exceptions.RequestException as e:
157
  print(f"Network Error downloading file for task {task_id}: {e}")
158
+ return f"Error: Failed to download file due to network issue: {e}"
159
  except Exception as e:
160
  print(f"Unexpected error downloading file for task {task_id} ({type(e).__name__}): {e}")
161
+ return f"Error: Unexpected error during file download: {e}"
162
 
163
  @tool
164
  def read_file_content(file_path: str) -> str:
 
170
  Returns:
171
  str: The extracted text content (truncated if very long), or an error message starting with 'Error:'.
172
  """
173
+ # (Code weitgehend unverändert, ggf. kleine Optimierungen wie zuvor)
174
  print(f"Tool: read_file_content(file_path='{file_path}')")
 
 
175
  if not isinstance(file_path, str) or not os.path.isabs(file_path):
176
  print(f"Invalid input for read_file_content: '{file_path}'. Expected an absolute file path.")
177
  return "Error: Invalid input. Provide the absolute file path returned by download_task_file."
 
 
178
  if not file_path.startswith(tempfile.gettempdir()):
179
  print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
180
  return "Error: Invalid file path provided. Only downloaded files can be read."
 
181
  if not os.path.exists(file_path):
182
  print(f"Error: File not found at path: {file_path}")
183
+ return f"Error: File not found at the specified path '{os.path.basename(file_path)}'."
 
184
  try:
185
  file_size = os.path.getsize(file_path)
186
  print(f"Reading file: {os.path.basename(file_path)}, Size: {file_size} bytes")
187
  if file_size == 0:
188
  print(f"Warning: File {os.path.basename(file_path)} is empty.")
189
  return f"Observation: The file '{os.path.basename(file_path)}' is empty."
 
190
  if file_path.lower().endswith(".pdf"):
191
+ if not PDF_READER_AVAILABLE: return "Error: Cannot read PDF file because PyPDF2 library is not installed."
 
192
  text = ""
193
  with open(file_path, 'rb') as f:
194
  reader = PyPDF2.PdfReader(f)
195
  num_pages = len(reader.pages)
196
  print(f"Reading {num_pages} pages from PDF...")
197
  for page_num in range(num_pages):
 
198
  if reader.pages[page_num].extract_text():
199
  page_text = reader.pages[page_num].extract_text()
200
+ text += page_text + "\n"
201
+ if len(text) > 7000:
202
  text = text[:7000] + "\n... (content truncated)"
203
  print(f"Text truncated at {len(text)} chars.")
204
  break
 
207
  return f"Observation: Could not extract any text content from the PDF file '{os.path.basename(file_path)}'."
208
  print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
209
  return f"Content of '{os.path.basename(file_path)}':\n{text}"
 
210
  elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
211
  print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
212
  return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content with this tool."
213
+ else:
 
 
214
  content = ""
215
+ chunk_size = 4096; max_len = 7000; truncated = False
 
 
216
  try:
217
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
218
  while len(content) < max_len:
219
+ chunk = f.read(chunk_size);
220
+ if not chunk: break
 
221
  content += chunk
222
+ if len(content) > max_len: content = content[:max_len]; truncated = True
 
 
223
  print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
224
  result = f"Content of '{os.path.basename(file_path)}':\n{content}"
225
+ if truncated: result += "\n... (content truncated)"
 
226
  return result
227
+ except Exception as read_err:
228
  print(f"Error reading file {file_path} as text: {read_err}")
229
  return f"Error: Failed to read file '{os.path.basename(file_path)}' as text: {read_err}"
 
 
230
  except Exception as e:
231
  print(f"Error reading file {file_path} ({type(e).__name__}): {e}")
232
  return f"Error: Failed to read file content: {e}"
233
 
234
 
235
+ # --- Agent Initialisierung (VEREINFACHT) ---
236
  def initialize_agent():
237
+ """Initialisiert den smolagents CodeAgent und die Clients."""
238
+ global search_client, agent_instance
239
  print("Initializing agent and clients...")
240
 
241
+ # Initialisiere Search Client (wenn nicht bereits geschehen)
242
+ if search_client is None:
243
+ print("Initializing search client...")
 
 
 
 
244
  if USE_TAVILY:
245
  tavily_key = os.getenv("TAVILY_API_KEY")
246
  if tavily_key:
247
+ try: search_client = TavilyClient(api_key=tavily_key); print("Using Tavily for search.")
248
+ except NameError: print("WARNUNG: TavilyClient Klasse nicht gefunden."); search_client = None
 
 
 
 
249
  else:
250
+ print("WARNUNG: TAVILY_API_KEY nicht gefunden.")
251
+ if USE_DUCKDUCKGO: # Fallback nur wenn Tavily nicht initialisiert werden konnte
252
+ try: search_client = DDGS(); print("Falling back to DuckDuckGo for search.")
253
+ except NameError: print("WARNUNG: DuckDuckGo nicht verfügbar."); search_client = None
254
+ else: search_client = None
 
 
 
 
 
 
 
255
  elif USE_DUCKDUCKGO:
256
+ try: search_client = DDGS(); print("Using DuckDuckGo for search.")
257
+ except NameError: print("WARNUNG: duckduckgo-search nicht installiert/verfügbar."); search_client = None
 
 
 
 
258
  else:
 
259
  print("Web search is disabled by configuration.")
260
+ search_client = False # Setze auf False, um erneute Initialisierung zu verhindern
261
+
262
+ # --- LLM Model (vereinfacht via HfApiModel) ---
263
+ # HfApiModel sollte HUGGINGFACE_TOKEN und HF_MODEL_ID aus Umgebungsvariablen lesen.
264
+ # Wir prüfen hier nur, ob das Token vorhanden ist, da HfApiModel es benötigt.
265
+ hf_token_check = os.getenv("HUGGINGFACE_TOKEN")
266
+ if not hf_token_check:
267
+ raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden! HfApiModel benötigt dies.")
268
+
269
+ # Optional: Explizit Model ID übergeben, falls Umgebungsvariable nicht reicht
270
+ model_config = {}
271
+ if HF_MODEL_ID:
272
+ # Prüfe, ob HfApiModel 'model_id' als Argument akzeptiert (Annahme: ja)
273
+ # Falls nicht, muss man sich auf die Umgebungsvariable verlassen.
274
+ # Man könnte hier versuchen, das Modell explizit zu setzen:
275
+ # model_config['model_id'] = HF_MODEL_ID
276
+ # Wir versuchen es erstmal ohne explizite Übergabe:
277
+ print(f"HfApiModel will attempt to use model specified by HF_MODEL_ID env var (or its default): {HF_MODEL_ID}")
278
+ # Man kann auch Parameter direkt übergeben, falls unterstützt:
279
+ # model_config['max_new_tokens'] = 1500
280
+ # model_config['temperature'] = 0.1
281
+
282
+ hf_model = HfApiModel(**model_config) # Initialisiere mit optionalen Configs
283
 
284
  # --- Agent Instanz ---
285
  available_tools = [search_web, download_task_file, read_file_content]
286
+ active_tools = [t for t in available_tools if t is not None] # Filter out None tools (falls search nicht ging)
 
 
 
 
 
287
 
288
+ # Verwende CodeAgent wie im Beispiel
289
+ agent_instance = CodeAgent(
290
+ tools=active_tools,
291
+ model=hf_model
292
  )
293
+ print(f"Smol CodeAgent initialized with {len(active_tools)} tools and HfApiModel.")
294
  if len(active_tools) < len(available_tools):
295
+ print(f"Warning: Some tools might be inactive.")
296
 
297
 
298
+ # --- Hauptfunktion run_and_submit_all (weitgehend unverändert) ---
299
+ # Die Logik zum Holen der Fragen, Iterieren, Prompt erstellen, Agent aufrufen,
300
+ # Antworten sammeln und Submitten bleibt gleich. Nur die Initialisierung oben ist anders.
301
  def run_and_submit_all( profile: gr.OAuthProfile | None, progress=gr.Progress(track_tqdm=True)):
302
  """
303
+ Fetches all questions, runs the smolagents CodeAgent on them, submits all answers,
304
  and displays the results. Includes Gradio progress tracking.
305
  """
306
  space_id = os.getenv("SPACE_ID")
307
+ if not profile:
 
 
 
 
308
  print("User not logged in.")
 
309
  return "Please Login to Hugging Face with the button.", None
310
+ username = f"{profile.username}"
311
+ print(f"User logged in: {username}")
312
 
313
  api_url = DEFAULT_API_URL
314
  questions_url = f"{api_url}/questions"
315
  submit_url = f"{api_url}/submit"
316
 
317
+ # 1. Initialisiere Agent (vereinfacht)
318
  progress(0, desc="Initializing Agent...")
319
  try:
320
  initialize_agent()
321
+ if not agent_instance: raise RuntimeError("Agent instance could not be initialized.")
322
+ except ValueError as e: return f"Configuration Error: {e}", None
323
+ except Exception as e: return f"Error initializing agent ({type(e).__name__}): {e}", None
 
 
 
 
 
324
 
325
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
326
  print(f"Agent Code Link: {agent_code}")
 
337
  return "Fetched questions list is empty or invalid format.", None
338
  num_questions = len(questions_data)
339
  print(f"Fetched {num_questions} questions.")
340
+ except Exception as e: return f"Error fetching questions ({type(e).__name__}): {e}", None
 
 
341
 
342
+ # 3. Run your Smol CodeAgent
 
343
  start_time = datetime.now()
344
  results_log = []
345
  answers_payload = []
346
+ print(f"Running smol CodeAgent on {num_questions} questions using HfApiModel...")
347
 
348
+ for i, item in enumerate(progress.tqdm(questions_data, desc="Processing Questions")):
 
 
349
  task_id = item.get("task_id")
350
  question_text = item.get("question")
 
351
  if not task_id or question_text is None:
352
  print(f"Skipping item {i+1} with missing task_id or question: {item}")
353
  continue
354
 
355
+ # --- Prompt für smolagents (unverändert) ---
356
  agent_prompt = f"""
357
  You are an expert AI assistant solving a challenge question based on the GAIA benchmark.
358
  Your task is to answer the following question accurately and concisely.
 
377
 
378
  Let's begin the thinking process for Task {task_id}.
379
  """
380
+ submitted_answer = f"Error: Agent failed for task {task_id}"
 
381
  try:
382
+ # Führe den Agenten aus
383
+ agent_response = agent_instance.run(prompt=agent_prompt) # Der Aufruf bleibt gleich
384
 
385
  if agent_response:
 
 
386
  cleaned_response = re.sub(r"^(Final Answer:|Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
 
387
  cleaned_response = cleaned_response.strip('"').strip("'")
388
  submitted_answer = cleaned_response if cleaned_response else "Error: Agent returned empty response after cleaning."
389
  else:
390
  submitted_answer = "Error: Agent returned an empty or None response."
 
 
391
  print(f"Task {task_id} completed. Submitted Answer: '{submitted_answer}'")
392
 
 
393
  except Exception as e:
394
  error_msg = f"AGENT_RUN_ERROR on task {task_id} ({type(e).__name__}): {e}"
395
  print(error_msg)
396
+ # Hier könnte man spezifischere Fehler von HfApiModel abfangen, falls bekannt
397
+ submitted_answer = f"ERROR: Agent failed ({type(e).__name__})"
398
 
399
  finally:
 
400
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
401
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
402
 
403
+ end_time = datetime.now(); duration = end_time - start_time
 
404
  print(f"Agent processing finished in {duration}.")
405
  progress(0.9, desc="Submitting answers...")
406
 
407
+ # 4. Prepare Submission (unverändert)
408
  if not answers_payload:
409
+ print("Agent did not produce any answers to submit."); cleanup_temp_files()
 
410
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
411
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
412
+ print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
 
 
 
 
 
 
413
 
414
+ # 5. Submit (unverändert)
415
+ final_status = "Submission attempt finished."
416
+ results_df = pd.DataFrame(results_log)
417
  try:
418
  response = requests.post(submit_url, json=submission_data, timeout=120)
419
  response.raise_for_status()
420
  result_data = response.json()
421
+ final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
422
+ f"Score: {result_data.get('score', 'N/A'):.2f}% ({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
423
+ f"Message: {result_data.get('message', 'No message received.')}")
 
 
 
 
424
  print("Submission successful.")
425
  except requests.exceptions.HTTPError as e:
426
+ error_detail = f"... {e.response.status_code}." # Gekürzte Fehlermeldung für Code-Lesbarkeit
427
+ # (Vollständige Fehlerbehandlung wie zuvor)
428
  try:
429
+ error_json = e.response.json(); api_error = error_json.get('detail', e.response.text)
430
+ if isinstance(api_error, list) and len(api_error) > 0: error_detail += f" Detail: {api_error[0].get('msg', str(api_error))}"
431
+ else: error_detail += f" Detail: {str(api_error)}"
432
+ except: error_detail += f" Response: {e.response.text[:200]}"
433
+ final_status = f"Submission Failed: {error_detail}"; print(final_status)
434
+ except requests.exceptions.Timeout: final_status = "Submission Failed: Timeout."; print(final_status)
435
+ except requests.exceptions.RequestException as e: final_status = f"Submission Failed: Network error - {e}"; print(final_status)
436
+ except Exception as e: final_status = f"Submission Failed: Unexpected error ({type(e).__name__}): {e}"; print(final_status)
437
+ finally: cleanup_temp_files()
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
  progress(1, desc="Done.")
440
  return final_status, results_df
441
 
442
 
443
+ # --- Gradio Interface (weitgehend unverändert) ---
444
  with gr.Blocks() as demo:
445
+ gr.Markdown("# Smol CodeAgent Evaluation Runner (Hugging Face)") # Titel angepasst
446
+ gr.Markdown(f"""
 
447
  **Instructions:**
448
+ 1. Ensure `HUGGINGFACE_TOKEN` is a Secret. Add `TAVILY_API_KEY` if using Tavily.
449
+ 2. Verify `requirements.txt` includes `smolagents[huggingface]`, etc.
450
+ 3. Agent uses `CodeAgent` with `HfApiModel`. Target Model (via env var or default): **{HF_MODEL_ID}**.
451
+ 4. Log in below.
452
+ 5. Click 'Run Evaluation & Submit'. Expect a potentially long runtime.
 
 
453
  ---
454
+ **Agent Details:** Uses `smolagents.CodeAgent`. Search: {'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}.
455
+ """)
456
+ with gr.Row(): login_button = gr.LoginButton()
 
 
 
 
 
 
 
 
 
 
 
457
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
458
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
459
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
460
 
 
 
 
461
  def handle_run(request: gr.Request):
 
462
  profile = getattr(request, 'profile', None)
 
 
463
  return run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True))
464
 
465
+ run_button.click(fn=handle_run, inputs=[], outputs=[status_output, results_table], api_name="run_evaluation_smol_codeagent")
 
 
 
 
 
 
466
 
467
  # --- App Start (unverändert) ---
468
  if __name__ == "__main__":
469
+ print("\n" + "-"*30 + " App Starting (Smol CodeAgent Version) " + "-"*30)
470
+ # (Rest des Startblocks unverändert)
471
  space_host_startup = os.getenv("SPACE_HOST")
472
  space_id_startup = os.getenv("SPACE_ID")
473
+ if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup} -> Runtime URL: https://{space_host_startup}.hf.space")
474
+ else: print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
475
+ if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup} -> Repo: https://huggingface.co/spaces/{space_id_startup}")
476
+ else: print("ℹ️ SPACE_ID environment variable not found (running locally?).")
477
+ print(f" Using Smol CodeAgent with HfApiModel.")
478
+ print(f" Target HF Model (via env var or default): {HF_MODEL_ID}")
479
+ search_tool_status = 'Disabled';
 
 
 
 
 
 
 
 
 
480
  if USE_TAVILY: search_tool_status = 'Tavily'
481
  elif USE_DUCKDUCKGO: search_tool_status = 'DuckDuckGo'
482
  print(f" Search Tool: {search_tool_status}")
483
  print(f" PDF Reading: {'Enabled' if PDF_READER_AVAILABLE else 'Disabled (PyPDF2 missing)'}")
484
+ print("-"*(60 + len(" App Starting (Smol CodeAgent Version) ")) + "\n")
485
+ print("Launching Gradio Interface for Smol CodeAgent Evaluation...")
 
 
486
  demo.queue().launch(debug=False, share=False)