pmeyhoefer commited on
Commit
e777122
·
verified ·
1 Parent(s): c58c21b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -385
app.py CHANGED
@@ -7,11 +7,10 @@ from datetime import datetime
7
  import time
8
  import tempfile
9
  import atexit
10
- import sys # Für sys.exit bei Importfehlern
11
 
12
  # --- Smol Agents und HF Imports (angepasst an Beispiel) ---
13
  try:
14
- # Verwende CodeAgent und HfApiModel wie im Beispiel
15
  from smolagents import CodeAgent, tool, HfApiModel
16
  print("Successfully imported CodeAgent, tool, HfApiModel from 'smolagents'")
17
  except ImportError as e:
@@ -19,28 +18,26 @@ except ImportError as e:
19
  print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
20
  sys.exit(f"Fatal Error: Could not import smolagents components. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
21
 
22
- # huggingface_hub wird möglicherweise von HfApiModel intern genutzt
23
  from huggingface_hub import HfApi
24
 
25
- # --- Suchtool Imports (wie zuvor) ---
26
- USE_TAVILY = False # Setze auf True, wenn du Tavily bevorzugst (benötigt TAVILY_API_KEY)
27
- USE_DUCKDUCKGO = True # Setze auf True für DuckDuckGo (kein Key nötig)
28
-
29
  if USE_TAVILY:
30
  try:
31
  from tavily import TavilyClient
32
  except ImportError:
33
  print("WARNUNG: TavilyClient nicht installiert.")
34
  USE_TAVILY = False
35
- USE_DUCKDUCKGO = True # Fallback
36
  if USE_DUCKDUCKGO:
37
  try:
38
  from duckduckgo_search import DDGS
39
  except ImportError:
40
- print("WARNUNG: duckduckgo-search nicht installiert.")
41
- USE_DUCKDUCKGO = False
42
 
43
- # --- PDF Reader Import (wie zuvor) ---
44
  try:
45
  import PyPDF2
46
  PDF_READER_AVAILABLE = True
@@ -50,68 +47,49 @@ except ImportError:
50
 
51
  # --- Konstanten ---
52
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
53
- # HfApiModel liest dies wahrscheinlich aus der Umgebungsvariable HF_MODEL_ID
54
- # oder hat einen internen Default. Wir setzen sie weiterhin als Fallback/Info.
55
  HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
56
 
57
  # --- Globale Variablen ---
58
- # search_client wird weiterhin benötigt, da die Tools darauf zugreifen
59
  search_client = None
60
- # agent_instance wird pro Lauf initialisiert
61
  agent_instance = None
62
 
63
- # --- Temporäre Datei Verwaltung (wie zuvor) ---
64
  temp_files_to_clean = set()
 
65
  def cleanup_temp_files():
66
- # (Code unverändert)
67
  print("Cleaning up temporary files...")
68
  for file_path in list(temp_files_to_clean):
69
  try:
70
  if os.path.exists(file_path):
71
  os.remove(file_path)
72
  print(f"Removed temporary file: {file_path}")
73
- if file_path in temp_files_to_clean:
74
- temp_files_to_clean.remove(file_path)
75
- except OSError as e:
76
  print(f"Error removing temporary file {file_path}: {e}")
77
- except KeyError:
78
- print(f"Warning: File path {file_path} already removed from cleanup set.")
79
- atexit.register(cleanup_temp_files)
80
 
81
- # --- Tool Definitionen (unverändert) ---
82
- # Die @tool-Deklarationen und Funktionslogik bleiben gleich.
83
 
 
84
  @tool
85
  def search_web(query: str, max_results: int = 3) -> str:
86
- """
87
- Searches the web for the given query and returns a summary of the top results.
88
- Use this to find recent information or facts not readily available.
89
- Args:
90
- query (str): The search query.
91
- max_results (int): The maximum number of results to return (default 3).
92
- Returns:
93
- str: A string containing the search results, or an error message.
94
- """
95
- # (Code unverändert)
96
  print(f"Tool: search_web(query='{query}', max_results={max_results})")
97
  if not search_client:
98
- # Extra Check, ob search_client explizit auf False gesetzt wurde (Initialisierung fehlgeschlagen)
99
- if search_client is False:
100
- return "Search tool is disabled by configuration or missing libraries."
101
- else:
102
- # Sollte nicht passieren, wenn initialize_agent korrekt läuft, aber zur Sicherheit
103
- print("Warning: Search client not initialized before tool use.")
104
- return "Search tool is not available/configured."
105
  try:
106
  if USE_TAVILY and isinstance(search_client, TavilyClient):
107
  response = search_client.search(query=query, search_depth="basic", max_results=max_results)
108
- context = [{"url": obj["url"], "content": obj["content"]} for obj in response.get('results', [])]
109
- if not context: return "No search results found."
110
- return "\n".join([f"URL: {c['url']}\nContent: {c['content'][:500]}..." for c in context])
 
 
 
111
  elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
112
- results = search_client.text(query, max_results=max_results)
113
- if not results: return "No search results found."
114
- return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
 
 
 
115
  else:
116
  return "No compatible search client configured or available."
117
  except Exception as e:
@@ -120,16 +98,6 @@ def search_web(query: str, max_results: int = 3) -> str:
120
 
121
  @tool
122
  def download_task_file(task_id: str) -> str:
123
- """
124
- Downloads a file associated with a specific task ID from the evaluation server.
125
- Use this ONLY if the question requires information from a specific file linked to the task.
126
- Args:
127
- task_id (str): The unique identifier for the task whose file needs to be downloaded.
128
- Returns:
129
- str: The local path to the downloaded file (e.g., '/tmp/gaia_task_abc-123.pdf') if successful,
130
- otherwise an error message starting with 'Error:'.
131
- """
132
- # (Code unverändert)
133
  print(f"Tool: download_task_file(task_id='{task_id}')")
134
  file_url = f"{DEFAULT_API_URL}/files/{task_id}"
135
  try:
@@ -141,381 +109,128 @@ def download_task_file(task_id: str) -> str:
141
  elif 'png' in content_type: suffix = ".png"
142
  elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
143
  elif 'csv' in content_type: suffix = ".csv"
144
- elif 'plain' in content_type or 'text' in content_type: suffix = ".txt"
145
  temp_dir = tempfile.gettempdir()
146
- safe_task_id = re.sub(r'[^\w\-]+', '_', task_id)
147
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
148
- temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}_{timestamp}{suffix}")
149
- with open(temp_file_path, 'wb') as f:
150
- for chunk in response.iter_content(chunk_size=8192):
151
  f.write(chunk)
152
- print(f"File downloaded successfully to {temp_file_path}")
153
- temp_files_to_clean.add(temp_file_path)
154
- return temp_file_path
155
  except requests.exceptions.HTTPError as e:
156
  if e.response.status_code == 404:
157
- print(f"No file found on server for task_id {task_id}.")
158
  return "Error: No file found for this task ID."
159
- else:
160
- print(f"HTTP Error downloading file for task {task_id}: {e}")
161
- return f"Error: Failed to download file (HTTP {e.response.status_code})."
162
- except requests.exceptions.RequestException as e:
163
- print(f"Network Error downloading file for task {task_id}: {e}")
164
- return f"Error: Failed to download file due to network issue: {e}"
165
  except Exception as e:
166
- print(f"Unexpected error downloading file for task {task_id} ({type(e).__name__}): {e}")
167
  return f"Error: Unexpected error during file download: {e}"
168
 
169
  @tool
170
  def read_file_content(file_path: str) -> str:
171
- """
172
- Reads the text content of a previously downloaded file (PDF or plain text).
173
- Use this tool AFTER 'download_task_file' has successfully returned a file path (not an error message).
174
- Args:
175
- file_path (str): The local path to the file (must be a path returned by 'download_task_file').
176
- Returns:
177
- str: The extracted text content (truncated if very long), or an error message starting with 'Error:'.
178
- """
179
- # (Code weitgehend unverändert, ggf. kleine Optimierungen wie zuvor)
180
  print(f"Tool: read_file_content(file_path='{file_path}')")
181
- if not isinstance(file_path, str) or not os.path.isabs(file_path):
182
- print(f"Invalid input for read_file_content: '{file_path}'. Expected an absolute file path.")
183
- return "Error: Invalid input. Provide the absolute file path returned by download_task_file."
184
- if not file_path.startswith(tempfile.gettempdir()):
185
- print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
186
- return "Error: Invalid file path provided. Only downloaded files can be read."
187
  if not os.path.exists(file_path):
188
- print(f"Error: File not found at path: {file_path}")
189
- return f"Error: File not found at the specified path '{os.path.basename(file_path)}'."
190
  try:
191
- file_size = os.path.getsize(file_path)
192
- print(f"Reading file: {os.path.basename(file_path)}, Size: {file_size} bytes")
193
- if file_size == 0:
194
- print(f"Warning: File {os.path.basename(file_path)} is empty.")
195
- return f"Observation: The file '{os.path.basename(file_path)}' is empty."
196
- if file_path.lower().endswith(".pdf"):
197
- if not PDF_READER_AVAILABLE: return "Error: Cannot read PDF file because PyPDF2 library is not installed."
198
  text = ""
199
  with open(file_path, 'rb') as f:
200
  reader = PyPDF2.PdfReader(f)
201
- num_pages = len(reader.pages)
202
- print(f"Reading {num_pages} pages from PDF...")
203
- for page_num in range(num_pages):
204
- if reader.pages[page_num].extract_text():
205
- page_text = reader.pages[page_num].extract_text()
206
- text += page_text + "\n"
207
  if len(text) > 7000:
208
- text = text[:7000] + "\n... (content truncated)"
209
- print(f"Text truncated at {len(text)} chars.")
210
  break
211
- if not text:
212
- print(f"Warning: Could not extract text from PDF: {os.path.basename(file_path)}")
213
- return f"Observation: Could not extract any text content from the PDF file '{os.path.basename(file_path)}'."
214
- print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
215
  return f"Content of '{os.path.basename(file_path)}':\n{text}"
216
- elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
217
- print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
218
- return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content with this tool."
219
  else:
220
- content = ""
221
- chunk_size = 4096; max_len = 7000; truncated = False
222
- try:
223
- with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
224
- while len(content) < max_len:
225
- chunk = f.read(chunk_size);
226
- if not chunk: break
227
- content += chunk
228
- if len(content) > max_len: content = content[:max_len]; truncated = True
229
- print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
230
- result = f"Content of '{os.path.basename(file_path)}':\n{content}"
231
- if truncated: result += "\n... (content truncated)"
232
- return result
233
- except Exception as read_err:
234
- print(f"Error reading file {file_path} as text: {read_err}")
235
- return f"Error: Failed to read file '{os.path.basename(file_path)}' as text: {read_err}"
236
  except Exception as e:
237
- print(f"Error reading file {file_path} ({type(e).__name__}): {e}")
238
- return f"Error: Failed to read file content: {e}"
239
 
240
-
241
- # --- Agent Initialisierung (VEREINFACHT) ---
242
  def initialize_agent():
243
- """Initialisiert den smolagents CodeAgent und die Clients."""
244
  global search_client, agent_instance
245
- print("Initializing agent and clients...")
246
-
247
- # Initialisiere Search Client (wenn nicht bereits geschehen oder fehlgeschlagen)
248
- if search_client is None: # Nur initialisieren, wenn noch nicht versucht
249
- print("Initializing search client...")
250
  if USE_TAVILY:
251
- tavily_key = os.getenv("TAVILY_API_KEY")
252
- if tavily_key:
253
- try: search_client = TavilyClient(api_key=tavily_key); print("Using Tavily for search.")
254
- except NameError: print("WARNUNG: TavilyClient Klasse nicht gefunden."); search_client = False # Fehler markieren
255
  else:
256
- print("WARNUNG: TAVILY_API_KEY nicht gefunden.")
257
- search_client = False # Fehler markieren
258
- if USE_DUCKDUCKGO: # Fallback nur wenn Tavily nicht initialisiert werden konnte
259
- try: search_client = DDGS(); print("Falling back to DuckDuckGo for search.")
260
- except NameError: print("WARNUNG: DuckDuckGo nicht verfügbar."); search_client = False # Fehler markieren
261
  elif USE_DUCKDUCKGO:
262
- try: search_client = DDGS(); print("Using DuckDuckGo for search.")
263
- except NameError: print("WARNUNG: duckduckgo-search nicht installiert/verfügbar."); search_client = False # Fehler markieren
264
  else:
265
- print("Web search is disabled by configuration.")
266
- search_client = False # Explizit deaktiviert
267
-
268
- # --- LLM Model (vereinfacht via HfApiModel) ---
269
- hf_token_check = os.getenv("HUGGINGFACE_TOKEN")
270
- if not hf_token_check:
271
- raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden! HfApiModel benötigt dies.")
272
-
273
- print(f"HfApiModel will attempt to use model specified by HF_MODEL_ID env var (or its default): {HF_MODEL_ID}")
274
- model_config = {}
275
- # Optional: Parameter für HfApiModel setzen, falls nötig und unterstützt
276
- # model_config['max_new_tokens'] = 1500
277
- # model_config['temperature'] = 0.1
278
-
279
- hf_model = HfApiModel(**model_config)
280
-
281
- # --- Agent Instanz ---
282
- available_tools = [search_web, download_task_file, read_file_content]
283
- # Nur aktive Tools übergeben (wenn search_client nicht False ist)
284
- active_tools = [t for t in available_tools if t is not None]
285
  if search_client is False:
286
- active_tools = [t for t in active_tools if t != search_web] # Entferne search_web, wenn Client fehlgeschlagen
 
287
 
288
- agent_instance = CodeAgent(
289
- tools=active_tools,
290
- model=hf_model
291
- )
292
- print(f"Smol CodeAgent initialized with {len(active_tools)} tools and HfApiModel.")
293
- if len(active_tools) < len(available_tools):
294
- print(f"Warning: Some tools might be inactive due to configuration or missing libraries.")
295
-
296
-
297
- # --- Hauptfunktion run_and_submit_all (Nimmt gr.Request) ---
298
- def run_and_submit_all( request: gr.Request, progress=gr.Progress(track_tqdm=True)): # Geänderter Parameter
299
- """
300
- Fetches all questions, runs the smolagents CodeAgent on them, submits all answers,
301
- and displays the results. Includes Gradio progress tracking.
302
- """
303
- # +++ Profil aus Request extrahieren +++
304
- profile = getattr(request, 'profile', None)
305
- # +++ DEBUGGING PRINT (wie zuvor) +++
306
- print(f"--- Entering run_and_submit_all ---")
307
- print(f"Received profile object via request: {profile}")
308
- if profile:
309
- print(f"Profile username: {getattr(profile, 'username', 'N/A')}")
310
- # print(f"Profile details: {vars(profile) if profile else 'N/A'}") # Details können viel loggen
311
- else:
312
- print("Profile object via request is None.")
313
- # +++ END DEBUGGING PRINT +++
314
-
315
- space_id = os.getenv("SPACE_ID")
316
-
317
- # *** HIER die eigentliche Prüfung ***
318
  if not profile:
319
- print("Condition 'if not profile:' is TRUE. Returning login message.")
320
- return "Please Login to Hugging Face with the button.", None
321
-
322
- # Ab hier sollte der Code nur laufen, wenn profile NICHT None ist
323
- username = f"{profile.username}" # Jetzt sicher, da profile nicht None ist
324
- print(f"Proceeding with run for user: {username}")
325
-
326
  api_url = DEFAULT_API_URL
327
- questions_url = f"{api_url}/questions"
328
- submit_url = f"{api_url}/submit"
329
-
330
- # 1. Initialisiere Agent (vereinfacht)
331
- progress(0, desc="Initializing Agent...")
332
  try:
333
  initialize_agent()
334
- if not agent_instance: raise RuntimeError("Agent instance could not be initialized.")
335
- except ValueError as e: return f"Configuration Error: {e}", None
336
- except Exception as e: return f"Error initializing agent ({type(e).__name__}): {e}", None
337
-
338
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
339
- print(f"Agent Code Link: {agent_code}")
340
-
341
- # 2. Fetch Questions
342
- progress(0.1, desc="Fetching questions...")
343
- print(f"Fetching questions from: {questions_url}")
344
- try:
345
- response = requests.get(questions_url, timeout=30)
346
- response.raise_for_status()
347
- questions_data = response.json()
348
- if not questions_data or not isinstance(questions_data, list):
349
- print(f"Fetched questions list is empty or invalid format: {questions_data}")
350
- return "Fetched questions list is empty or invalid format.", None
351
- num_questions = len(questions_data)
352
- print(f"Fetched {num_questions} questions.")
353
- except Exception as e: return f"Error fetching questions ({type(e).__name__}): {e}", None
354
-
355
- # 3. Run your Smol CodeAgent
356
- start_time = datetime.now()
357
- results_log = []
358
- answers_payload = []
359
- print(f"Running smol CodeAgent on {num_questions} questions using HfApiModel...")
360
-
361
- for i, item in enumerate(progress.tqdm(questions_data, desc="Processing Questions")):
362
  task_id = item.get("task_id")
363
- question_text = item.get("question")
364
- if not task_id or question_text is None:
365
- print(f"Skipping item {i+1} with missing task_id or question: {item}")
366
  continue
367
-
368
- # --- Prompt für smolagents (unverändert) ---
369
- agent_prompt = f"""
370
- You are an expert AI assistant solving a challenge question based on the GAIA benchmark.
371
- Your task is to answer the following question accurately and concisely.
372
- Use the available tools ONLY when necessary to find information or access required files.
373
- Think step-by-step before deciding on an action or the final answer.
374
-
375
- **Available Tools:** (These are the functions you can call)
376
- * `search_web(query: str, max_results: int = 3)`: Searches the web for information.
377
- * `download_task_file(task_id: str)`: Downloads the specific file FOR THIS TASK ONLY. Use the task_id '{task_id}'. Returns the local file path needed for 'read_file_content'.
378
- * `read_file_content(file_path: str)`: Reads text from a file previously downloaded with 'download_task_file'. Requires the exact file path returned by that tool.
379
-
380
- **Current Task:**
381
- * Task ID: {task_id}
382
- * Question: {question_text}
383
-
384
- **Instructions & Output Format:**
385
- 1. Carefully analyze the question.
386
- 2. Think step-by-step. Outline your plan if needed.
387
- 3. Execute tools sequentially if information depends on previous steps (e.g., download then read).
388
- 4. Review the gathered information and your reasoning.
389
- 5. **Crucially**: Provide ONLY the final answer. Do not include your reasoning, steps, tool calls, introductions (like "The answer is..."), or any other conversational text in the final output. The answer must be exact and stand-alone. Format it as requested by the question (e.g., just a number, a comma-separated list 'apple,banana,orange', etc.).
390
-
391
- Let's begin the thinking process for Task {task_id}.
392
- """
393
- submitted_answer = f"Error: Agent failed for task {task_id}"
394
  try:
395
- # Führe den Agenten aus
396
- agent_response = agent_instance.run(prompt=agent_prompt) # Der Aufruf bleibt gleich
397
-
398
- if agent_response:
399
- cleaned_response = re.sub(r"^(Final Answer:|Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
400
- cleaned_response = cleaned_response.strip('"').strip("'")
401
- submitted_answer = cleaned_response if cleaned_response else "Error: Agent returned empty response after cleaning."
402
- else:
403
- submitted_answer = "Error: Agent returned an empty or None response."
404
- # Kurze Pause nach jedem Agentenlauf, um Rate Limits etc. zu vermeiden (optional)
405
- # time.sleep(0.5)
406
-
407
  except Exception as e:
408
- error_msg = f"AGENT_RUN_ERROR on task {task_id} ({type(e).__name__}): {e}"
409
- print(error_msg)
410
- # Hier könnte man spezifischere Fehler von HfApiModel abfangen, falls bekannt
411
- submitted_answer = f"ERROR: Agent failed ({type(e).__name__})"
412
- # Bei API Fehlern ggf. kurz warten und erneut versuchen (nicht implementiert)
413
-
414
- finally:
415
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
416
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
417
- # Logge die konkrete Antwort, die hinzugefügt wird
418
- print(f"Task {task_id} logged. Answer added: '{submitted_answer[:100]}...'")
419
-
420
-
421
- end_time = datetime.now(); duration = end_time - start_time
422
- print(f"Agent processing finished in {duration}.")
423
- progress(0.9, desc="Submitting answers...")
424
-
425
- # 4. Prepare Submission (unverändert)
426
- if not answers_payload:
427
- print("Agent did not produce any answers to submit."); cleanup_temp_files()
428
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
429
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
430
- print(f"Submitting {len(answers_payload)} answers for user '{username}'...")
431
- # Debug: Zeige die ersten paar Antworten vor dem Senden
432
- print(f"Sample answers payload: {answers_payload[:2]}")
433
-
434
-
435
- # 5. Submit (unverändert)
436
- final_status = "Submission attempt finished."
437
- results_df = pd.DataFrame(results_log)
438
  try:
439
- response = requests.post(submit_url, json=submission_data, timeout=180) # Längeres Timeout für Submit
440
- response.raise_for_status()
441
- result_data = response.json()
442
- final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
443
- f"Score: {result_data.get('score', 'N/A'):.2f}% ({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
444
- f"Message: {result_data.get('message', 'No message received.')}")
445
- print("Submission successful.")
446
- except requests.exceptions.HTTPError as e:
447
- error_detail = f"Server responded with status {e.response.status_code}."
448
- try:
449
- error_json = e.response.json(); api_error = error_json.get('detail', e.response.text)
450
- # Verbesserte Fehleranzeige für Validierungsfehler
451
- if isinstance(api_error, list) and api_error and isinstance(api_error[0], dict):
452
- error_msgs = [f"{err.get('loc', ['unknown'])[-1]}: {err.get('msg', '')}" for err in api_error]
453
- error_detail += f" Details: {'; '.join(error_msgs)}"
454
- elif isinstance(api_error, str):
455
- error_detail += f" Detail: {api_error[:500]}" # Begrenze Länge
456
- else:
457
- error_detail += f" Detail: {str(api_error)[:500]}"
458
- except requests.exceptions.JSONDecodeError:
459
- error_detail += f" Raw Response: {e.response.text[:500]}" # Begrenze Länge
460
- final_status = f"Submission Failed: {error_detail}"; print(final_status)
461
- except requests.exceptions.Timeout: final_status = "Submission Failed: The request timed out after 180 seconds."; print(final_status)
462
- except requests.exceptions.RequestException as e: final_status = f"Submission Failed: Network error - {e}"; print(final_status)
463
- except Exception as e: final_status = f"Submission Failed: Unexpected error during submission ({type(e).__name__}): {e}"; print(final_status)
464
- finally: cleanup_temp_files()
465
-
466
- progress(1, desc="Done.")
467
- return final_status, results_df
468
-
469
 
470
- # --- Gradio Interface (Angepasster Button Click) ---
471
  with gr.Blocks() as demo:
472
- gr.Markdown("# Smol CodeAgent Evaluation Runner (Hugging Face)") # Titel angepasst
473
- gr.Markdown(f"""
474
- **Instructions:**
475
- 1. Ensure `HUGGINGFACE_TOKEN` is a Secret. Add `TAVILY_API_KEY` if using Tavily.
476
- 2. Verify `requirements.txt` includes `smolagents[huggingface]`, etc.
477
- 3. Agent uses `CodeAgent` with `HfApiModel`. Target Model (via env var or default): **{HF_MODEL_ID}**.
478
- 4. Log in below.
479
- 5. Click 'Run Evaluation & Submit'. Expect a potentially long runtime.
480
- ---
481
- **Agent Details:** Uses `smolagents.CodeAgent`. Search: {'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}.
482
- """)
483
- with gr.Row(): login_button = gr.LoginButton()
484
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
485
 
486
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
487
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) # Ohne 'height'
488
-
489
- # KORREKTUR: run_and_submit_all direkt aufrufen
490
- # inputs=[] damit Gradio den request Parameter injiziert.
491
  run_button.click(
492
  fn=run_and_submit_all,
493
- inputs=[], # Wichtig: Keine Inputs hier angeben
494
  outputs=[status_output, results_table],
495
  api_name="run_evaluation_smol_codeagent"
496
  )
497
 
498
- # --- App Start (unverändert) ---
499
  if __name__ == "__main__":
500
- print("\n" + "-"*30 + " App Starting (Smol CodeAgent Version - Request Fix) " + "-"*30)
501
- # (Rest des Startblocks unverändert)
502
- space_host_startup = os.getenv("SPACE_HOST")
503
- space_id_startup = os.getenv("SPACE_ID")
504
- if space_host_startup: print(f"✅ SPACE_HOST found: {space_host_startup} -> Runtime URL: https://{space_host_startup}.hf.space")
505
- else: print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
506
- if space_id_startup: print(f"✅ SPACE_ID found: {space_id_startup} -> Repo: https://huggingface.co/spaces/{space_id_startup}")
507
- else: print("ℹ️ SPACE_ID environment variable not found (running locally?).")
508
- print(f" Using Smol CodeAgent with HfApiModel.")
509
- print(f" Target HF Model (via env var or default): {HF_MODEL_ID}")
510
- search_tool_status = 'Disabled';
511
- if USE_TAVILY: search_tool_status = 'Tavily'
512
- elif USE_DUCKDUCKGO: search_tool_status = 'DuckDuckGo'
513
- # Check search client status based on initialization logic
514
- if search_client is None and (USE_TAVILY or USE_DUCKDUCKGO): search_tool_status += " (Initialization Pending)"
515
- elif search_client is False: search_tool_status += " (Failed to Initialize / Disabled)"
516
-
517
- print(f" Search Tool: {search_tool_status}")
518
- print(f" PDF Reading: {'Enabled' if PDF_READER_AVAILABLE else 'Disabled (PyPDF2 missing)'}")
519
- print("-"*(60 + len(" App Starting (Smol CodeAgent Version - Request Fix) ")) + "\n")
520
- print("Launching Gradio Interface for Smol CodeAgent Evaluation...")
521
- demo.queue().launch(debug=False, share=False) # queue() ist wichtig
 
7
  import time
8
  import tempfile
9
  import atexit
10
+ import sys # Für sys.exit bei Importfehlern
11
 
12
  # --- Smol Agents und HF Imports (angepasst an Beispiel) ---
13
  try:
 
14
  from smolagents import CodeAgent, tool, HfApiModel
15
  print("Successfully imported CodeAgent, tool, HfApiModel from 'smolagents'")
16
  except ImportError as e:
 
18
  print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
19
  sys.exit(f"Fatal Error: Could not import smolagents components. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
20
 
 
21
  from huggingface_hub import HfApi
22
 
23
+ # --- Suchtool Imports ---
24
+ USE_TAVILY = False
25
+ USE_DUCKDUCKGO = True
 
26
  if USE_TAVILY:
27
  try:
28
  from tavily import TavilyClient
29
  except ImportError:
30
  print("WARNUNG: TavilyClient nicht installiert.")
31
  USE_TAVILY = False
32
+ USE_DUCKDUCKGO = True
33
  if USE_DUCKDUCKGO:
34
  try:
35
  from duckduckgo_search import DDGS
36
  except ImportError:
37
+ print("WARNUNG: duckduckgo-search nicht installiert.")
38
+ USE_DUCKDUCKGO = False
39
 
40
+ # --- PDF Reader Import ---
41
  try:
42
  import PyPDF2
43
  PDF_READER_AVAILABLE = True
 
47
 
48
  # --- Konstanten ---
49
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
50
  HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct")
51
 
52
  # --- Globale Variablen ---
 
53
  search_client = None
 
54
  agent_instance = None
55
 
 
56
  temp_files_to_clean = set()
57
+
58
  def cleanup_temp_files():
 
59
  print("Cleaning up temporary files...")
60
  for file_path in list(temp_files_to_clean):
61
  try:
62
  if os.path.exists(file_path):
63
  os.remove(file_path)
64
  print(f"Removed temporary file: {file_path}")
65
+ temp_files_to_clean.discard(file_path)
66
+ except Exception as e:
 
67
  print(f"Error removing temporary file {file_path}: {e}")
 
 
 
68
 
69
+ atexit.register(cleanup_temp_files)
 
70
 
71
+ # --- Tool Definitionen ---
72
  @tool
73
  def search_web(query: str, max_results: int = 3) -> str:
 
 
 
 
 
 
 
 
 
 
74
  print(f"Tool: search_web(query='{query}', max_results={max_results})")
75
  if not search_client:
76
+ return "Search tool is not available/configured."
 
 
 
 
 
 
77
  try:
78
  if USE_TAVILY and isinstance(search_client, TavilyClient):
79
  response = search_client.search(query=query, search_depth="basic", max_results=max_results)
80
+ context = response.get('results', [])
81
+ if not context:
82
+ return "No search results found."
83
+ return "\n".join(
84
+ [f"URL: {c['url']}\nContent: {c['content'][:500]}..." for c in context]
85
+ )
86
  elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
87
+ results = search_client.text(query, max_results=max_results)
88
+ if not results:
89
+ return "No search results found."
90
+ return "\n".join(
91
+ [f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results]
92
+ )
93
  else:
94
  return "No compatible search client configured or available."
95
  except Exception as e:
 
98
 
99
  @tool
100
  def download_task_file(task_id: str) -> str:
 
 
 
 
 
 
 
 
 
 
101
  print(f"Tool: download_task_file(task_id='{task_id}')")
102
  file_url = f"{DEFAULT_API_URL}/files/{task_id}"
103
  try:
 
109
  elif 'png' in content_type: suffix = ".png"
110
  elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
111
  elif 'csv' in content_type: suffix = ".csv"
 
112
  temp_dir = tempfile.gettempdir()
113
+ safe_id = re.sub(r'[^\w\-]+', '_', task_id)
114
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
115
+ path = os.path.join(temp_dir, f"gaia_task_{safe_id}_{timestamp}{suffix}")
116
+ with open(path, 'wb') as f:
117
+ for chunk in response.iter_content(8192):
118
  f.write(chunk)
119
+ temp_files_to_clean.add(path)
120
+ return path
 
121
  except requests.exceptions.HTTPError as e:
122
  if e.response.status_code == 404:
 
123
  return "Error: No file found for this task ID."
124
+ return f"Error: Failed to download file (HTTP {e.response.status_code})."
 
 
 
 
 
125
  except Exception as e:
 
126
  return f"Error: Unexpected error during file download: {e}"
127
 
128
  @tool
129
  def read_file_content(file_path: str) -> str:
 
 
 
 
 
 
 
 
 
130
  print(f"Tool: read_file_content(file_path='{file_path}')")
131
+ if not os.path.isabs(file_path) or not file_path.startswith(tempfile.gettempdir()):
132
+ return "Error: Invalid file path provided."
 
 
 
 
133
  if not os.path.exists(file_path):
134
+ return f"Error: File not found '{file_path}'."
 
135
  try:
136
+ if file_path.lower().endswith('.pdf'):
137
+ if not PDF_READER_AVAILABLE:
138
+ return "Error: PyPDF2 not installed."
 
 
 
 
139
  text = ""
140
  with open(file_path, 'rb') as f:
141
  reader = PyPDF2.PdfReader(f)
142
+ for p in reader.pages:
143
+ text += p.extract_text() or ''
 
 
 
 
144
  if len(text) > 7000:
145
+ text = text[:7000] + "\n... (truncated)"
 
146
  break
 
 
 
 
147
  return f"Content of '{os.path.basename(file_path)}':\n{text}"
 
 
 
148
  else:
149
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
150
+ content = f.read(7000)
151
+ return f"Content of '{os.path.basename(file_path)}':\n{content}"
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  except Exception as e:
153
+ return f"Error: Failed to read file: {e}"
 
154
 
155
+ # --- Agent Initialisierung ---
 
156
  def initialize_agent():
 
157
  global search_client, agent_instance
158
+ if search_client is None:
 
 
 
 
159
  if USE_TAVILY:
160
+ key = os.getenv("TAVILY_API_KEY")
161
+ if key:
162
+ try: search_client = TavilyClient(api_key=key)
163
+ except: search_client = False
164
  else:
165
+ search_client = False
 
 
 
 
166
  elif USE_DUCKDUCKGO:
167
+ try: search_client = DDGS()
168
+ except: search_client = False
169
  else:
170
+ search_client = False
171
+ token = os.getenv("HUGGINGFACE_TOKEN")
172
+ if not token:
173
+ raise ValueError("HUGGINGFACE_TOKEN Secret nicht gefunden!")
174
+ hf_model = HfApiModel()
175
+ tools = [search_web, download_task_file, read_file_content]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  if search_client is False:
177
+ tools = [t for t in tools if t != search_web]
178
+ agent_instance = CodeAgent(tools=tools, model=hf_model)
179
 
180
+ # --- Hauptfunktion run_and_submit_all ---
181
+ def run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  if not profile:
183
+ return "Bitte zuerst mit Hugging Face einloggen.", None
184
+ username = profile.username if hasattr(profile, 'username') else profile['username']
 
 
 
 
 
185
  api_url = DEFAULT_API_URL
 
 
 
 
 
186
  try:
187
  initialize_agent()
188
+ except Exception as e:
189
+ return f"Fehler bei der Agent-Initialisierung: {e}", None
190
+ questions = requests.get(f"{api_url}/questions", timeout=30).json()
191
+ answers_log = []
192
+ payload = []
193
+ for item in progress.tqdm(questions, desc="Bearbeite Fragen"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  task_id = item.get("task_id")
195
+ question = item.get("question")
196
+ if not task_id or question is None:
 
197
  continue
198
+ prompt = f"... Task {task_id}: {question}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  try:
200
+ resp = agent_instance.run(prompt=prompt)
201
+ ans = re.sub(r"^(Answer:|Final Answer:)", "", resp or "").strip()
 
 
 
 
 
 
 
 
 
 
202
  except Exception as e:
203
+ ans = f"ERROR: {e}"
204
+ answers_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": ans})
205
+ payload.append({"task_id": task_id, "submitted_answer": ans})
206
+ df = pd.DataFrame(answers_log)
207
+ submission = {"username": username, "agent_code": "...", "answers": payload}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  try:
209
+ r = requests.post(f"{api_url}/submit", json=submission, timeout=180)
210
+ r.raise_for_status()
211
+ res = r.json()
212
+ status = f"Erfolg! Score: {res.get('score', 0):.2f}%"
213
+ except Exception as e:
214
+ status = f"Fehler bei der Submission: {e}"
215
+ cleanup_temp_files()
216
+ return status, df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ # --- Gradio Interface ---
219
  with gr.Blocks() as demo:
220
+ gr.Markdown("# Smol CodeAgent Evaluation Runner")
221
+ gr.Markdown("Bitte einloggen und dann auf Ausführen klicken.")
222
+ with gr.Row():
223
+ login_button = gr.LoginButton()
 
 
 
 
 
 
 
 
224
  run_button = gr.Button("Run Evaluation & Submit All Answers")
225
+ status_output = gr.Textbox(label="Status", lines=5)
226
+ results_table = gr.DataFrame(label="Ergebnisse")
227
 
 
 
 
 
 
228
  run_button.click(
229
  fn=run_and_submit_all,
230
+ inputs=[login_button],
231
  outputs=[status_output, results_table],
232
  api_name="run_evaluation_smol_codeagent"
233
  )
234
 
 
235
  if __name__ == "__main__":
236
+ demo.queue().launch(debug=False, share=False)