pmeyhoefer commited on
Commit
70658cb
·
verified ·
1 Parent(s): c081c12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +528 -104
app.py CHANGED
@@ -2,128 +2,552 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from smolagents import CodeAgent, InferenceClientModel
6
-
7
- # --- Constants ---
8
- # API-URL deines Spaces (ohne "/api"-Suffix)
9
- DEFAULT_API_URL = "https://pmeyhoefer-final-assignment-template.hf.space"
10
- # Modell-ID und HF-Token (bitte hier deinen HF Access Token einfügen)
11
- MODEL_ID = os.getenv("SMOL_MODEL_ID", "meta-llama/Llama-3.3-70B-Instruct")
12
- HF_TOKEN = "<DEIN_HF_HUB_TOKEN>" # Ersetze durch deinen echten Hugging Face Token
13
-
14
- # --- Agent-Implementierung mit smolagents ---
15
- class BasicAgent:
16
- def __init__(self):
17
- if not HF_TOKEN or HF_TOKEN.startswith("<"):
18
- raise ValueError("Kein gültiger HF_HUB_TOKEN im Code gesetzt!")
19
- # InferenceClientModel initialisieren
20
- self.model = InferenceClientModel(
21
- model_id=MODEL_ID,
22
- token=HF_TOKEN
23
- )
24
- # CodeAgent mit Basis-Tools
25
- self.agent = CodeAgent(
26
- tools=[],
27
- model=self.model,
28
- add_base_tools=True
29
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- def __call__(self, question: str) -> str:
 
 
32
  try:
33
- return self.agent.run(question)
34
- except Exception as e:
35
- return f"AGENT ERROR: {e}"
36
-
37
- # --- Evaluation & Submission ---
38
- def run_and_submit_all(profile: gr.OAuthProfile | None):
39
- # 1. Authentifizierung
40
- if not profile:
41
- return "Bitte logge dich zuerst bei Hugging Face ein.", None
42
- username = profile.username
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  space_id = os.getenv("SPACE_ID")
44
 
45
- # 2. Endpunkte
46
- questions_url = f"{DEFAULT_API_URL}/questions"
47
- submit_url = f"{DEFAULT_API_URL}/submit"
 
 
 
 
 
 
 
48
 
49
- # 3. Agent instanziieren
50
  try:
51
- agent = BasicAgent()
 
 
 
 
 
52
  except Exception as e:
53
- return f"Fehler beim Initialisieren des Agents: {e}", None
 
54
 
55
- # 4. Fragen abrufen
 
 
 
 
56
  try:
57
- resp = requests.get(questions_url, timeout=15)
58
- resp.raise_for_status()
59
- questions = resp.json()
 
 
 
 
60
  except Exception as e:
61
- return f"Fehler beim Abrufen der Fragen: {e}", None
62
-
63
- # 5. Antworten generieren
64
- records = []
65
- answers = []
66
- for item in questions:
67
- task_id = item.get("task_id")
68
- question_txt = item.get("question") or item.get("instruction", "")
69
- if not task_id or not question_txt:
 
 
 
 
 
 
 
 
 
70
  continue
71
- ans = agent(question_txt)
72
- answers.append({
73
- "task_id": task_id,
74
- "submitted_answer": ans
75
- })
76
- records.append({
77
- "Task ID": task_id,
78
- "Question": question_txt,
79
- "Antwort": ans
80
- })
81
-
82
- if not answers:
83
- return "Der Agent hat keine Antworten produziert.", pd.DataFrame(records)
84
-
85
- # 6. Submission
86
- submission = {
87
- "username": username.strip(),
88
- "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
89
- "answers": answers
90
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  try:
92
- resp = requests.post(submit_url, json=submission, timeout=60)
93
- resp.raise_for_status()
94
- result = resp.json()
95
- status = (
96
- f"Erfolgreich eingereicht!\n"
97
- f"User: {result.get('username')}\n"
98
- f"Score: {result.get('score')}% "
99
- f"({result.get('correct_count')}/{result.get('total_attempted')})\n"
100
- f"Nachricht: {result.get('message')}"
101
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  except Exception as e:
103
- status = f"Fehler bei der Einreichung: {e}"
 
 
 
 
 
 
104
 
105
- df = pd.DataFrame(records)
106
- return status, df
107
 
108
- # --- Gradio UI ---
109
  with gr.Blocks() as demo:
110
- gr.Markdown("# GAIA Agent Evaluation Runner")
111
- gr.Markdown("""
112
- 1. Füge in den Space-Secrets deinen `HF_HUB_TOKEN` ein (oder setze ihn direkt im Code oben).
113
- 2. Optional: Lege `SMOL_MODEL_ID` in den Secrets an (Standard: meta-llama/Llama-3.3-70B-Instruct).
114
- 3. Aktualisiere `requirements.txt` mit:
115
- 4. Commit & Push, warte auf Deployment.
116
- 5. Logge dich mit dem Hugging Face Button ein.
117
- 6. Klicke auf **Run Evaluation & Submit All Answers**.
118
- """)
 
 
 
 
 
 
 
 
 
 
 
119
  gr.LoginButton()
120
- run_btn = gr.Button("Run Evaluation & Submit All Answers")
121
- status_out = gr.Textbox(label="Status / Ergebnis", lines=5, interactive=False)
122
- result_table = gr.DataFrame(label="Fragen & Antworten", wrap=True)
123
- run_btn.click(fn=run_and_submit_all, inputs=[], outputs=[status_out, result_table])
124
 
125
- if __name__ == "__main__":
126
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
 
 
 
 
 
 
 
 
 
128
  if __name__ == "__main__":
129
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import re
6
+ from datetime import datetime
7
+ import time
8
+ import tempfile # Für temporäre Dateien
9
+ import atexit # Zum Aufräumen beim Beenden
10
+
11
+ # --- Smol Agent und HF Imports ---
12
+ from smol_agent import Agent
13
+ from smol_agent.llm.huggingface import InferenceAPI
14
+ from smol_agent.tools import tool
15
+ from huggingface_hub import HfApi, InferenceClient
16
+
17
+ # --- Suchtool Imports (wähle eins) ---
18
+ USE_TAVILY = False # Setze auf True, wenn du Tavily bevorzugst (benötigt TAVILY_API_KEY)
19
+ USE_DUCKDUCKGO = True # Setze auf True für DuckDuckGo (kein Key nötig)
20
+
21
+ if USE_TAVILY:
22
+ try:
23
+ from tavily import TavilyClient
24
+ except ImportError:
25
+ print("WARNUNG: TavilyClient nicht installiert. Führe 'pip install tavily-python' aus.")
26
+ USE_TAVILY = False
27
+ USE_DUCKDUCKGO = True # Fallback
28
+ if USE_DUCKDUCKGO:
29
+ try:
30
+ from duckduckgo_search import DDGS
31
+ except ImportError:
32
+ print("WARNUNG: duckduckgo-search nicht installiert. Führe 'pip install duckduckgo-search' aus.")
33
+ USE_DUCKDUCKGO = False
34
+
35
+ # --- PDF Reader Import ---
36
+ try:
37
+ import PyPDF2
38
+ PDF_READER_AVAILABLE = True
39
+ except ImportError:
40
+ PDF_READER_AVAILABLE = False
41
+ print("WARNUNG: PyPDF2 nicht installiert. PDF-Lesefunktion ist deaktiviert.")
42
+
43
+ # --- Konstanten ---
44
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
45
+ # Wähle ein Instruction-Following Modell von Hugging Face Hub
46
+ # Beispiele: "meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "google/gemma-1.1-7b-it"
47
+ # Stelle sicher, dass das Modell über die kostenlose Inference API verfügbar ist oder du Inference Endpoints verwendest.
48
+ HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") # Standardmodell, kann über Env Var überschrieben werden
49
+
50
+ # --- Globale Variablen für Clients (werden in initialize_agent gesetzt) ---
51
+ hf_token = None
52
+ search_client = None
53
+ agent_instance = None # Wird pro Lauf initialisiert
54
+
55
+ # --- Temporäre Datei Verwaltung ---
56
+ temp_files_to_clean = set()
57
 
58
+ def cleanup_temp_files():
59
+ print("Cleaning up temporary files...")
60
+ for file_path in list(temp_files_to_clean):
61
  try:
62
+ if os.path.exists(file_path):
63
+ os.remove(file_path)
64
+ print(f"Removed temporary file: {file_path}")
65
+ temp_files_to_clean.remove(file_path)
66
+ except OSError as e:
67
+ print(f"Error removing temporary file {file_path}: {e}")
68
+
69
+ # Registriere die Cleanup-Funktion für das Beenden des Skripts
70
+ atexit.register(cleanup_temp_files)
71
+
72
+
73
+ # --- Tool Definitionen für smol-agent ---
74
+
75
+ @tool
76
+ def search_web(query: str, max_results: int = 3) -> str:
77
+ """
78
+ Searches the web for the given query and returns a summary of the top results.
79
+ Use this to find recent information or facts not readily available.
80
+ Args:
81
+ query (str): The search query.
82
+ max_results (int): The maximum number of results to return (default 3).
83
+ Returns:
84
+ str: A string containing the search results, or an error message.
85
+ """
86
+ print(f"Tool: search_web(query='{query}', max_results={max_results})")
87
+ if not search_client:
88
+ return "Search tool is not available/configured."
89
+ try:
90
+ if USE_TAVILY and isinstance(search_client, TavilyClient):
91
+ response = search_client.search(query=query, search_depth="basic", max_results=max_results)
92
+ context = [{"url": obj["url"], "content": obj["content"]} for obj in response.get('results', [])]
93
+ if not context: return "No search results found."
94
+ return "\n".join([f"URL: {c['url']}\nContent: {c['content'][:500]}..." for c in context])
95
+ elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
96
+ results = search_client.text(query, max_results=max_results)
97
+ if not results: return "No search results found."
98
+ return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
99
+ else:
100
+ return "No compatible search client configured."
101
+ except Exception as e:
102
+ print(f"Search API Error: {e}")
103
+ return f"Error during search: {e}"
104
+
105
+ @tool
106
+ def download_task_file(task_id: str) -> str:
107
+ """
108
+ Downloads a file associated with a specific task ID from the evaluation server.
109
+ Use this ONLY if the question requires information from a specific file linked to the task.
110
+ Args:
111
+ task_id (str): The unique identifier for the task whose file needs to be downloaded.
112
+ Returns:
113
+ str: The local path to the downloaded file (e.g., '/tmp/tmpXYZ.pdf') if successful,
114
+ otherwise an error message or 'No file found'.
115
+ """
116
+ print(f"Tool: download_task_file(task_id='{task_id}')")
117
+ file_url = f"{DEFAULT_API_URL}/files/{task_id}"
118
+
119
+ try:
120
+ response = requests.get(file_url, stream=True, timeout=30) # Erhöhtes Timeout
121
+ response.raise_for_status() # Löst Fehler für 4xx/5xx aus
122
+
123
+ # Bestimme Dateiendung aus Content-Type
124
+ content_type = response.headers.get('content-type', '').lower()
125
+ suffix = ".tmp" # Standard-Suffix
126
+ if 'pdf' in content_type:
127
+ suffix = ".pdf"
128
+ elif 'png' in content_type:
129
+ suffix = ".png"
130
+ elif 'jpeg' in content_type or 'jpg' in content_type:
131
+ suffix = ".jpg"
132
+ elif 'csv' in content_type:
133
+ suffix = ".csv"
134
+ elif 'plain' in content_type or 'text' in content_type:
135
+ suffix = ".txt"
136
+
137
+ # Erstelle eine sichere temporäre Datei
138
+ temp_dir = tempfile.gettempdir()
139
+ # Verwende task_id im Dateinamen für bessere Nachverfolgbarkeit (optional)
140
+ safe_task_id = re.sub(r'[^\w\-]+', '_', task_id) # Bereinige task_id für Dateinamen
141
+ temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}{suffix}")
142
+
143
+ with open(temp_file_path, 'wb') as f:
144
+ for chunk in response.iter_content(chunk_size=8192):
145
+ f.write(chunk)
146
+
147
+ print(f"File downloaded successfully to {temp_file_path}")
148
+ temp_files_to_clean.add(temp_file_path) # Füge zur Cleanup-Liste hinzu
149
+ return temp_file_path # Gib den Pfad zurück
150
+
151
+ except requests.exceptions.HTTPError as e:
152
+ if e.response.status_code == 404:
153
+ print(f"No file found on server for task_id {task_id}.")
154
+ return "Error: No file found for this task ID."
155
+ else:
156
+ print(f"HTTP Error downloading file for task {task_id}: {e}")
157
+ return f"Error: Failed to download file (HTTP {e.response.status_code})."
158
+ except requests.exceptions.RequestException as e:
159
+ print(f"Network Error downloading file for task {task_id}: {e}")
160
+ return f"Error: Failed to download file due to network issue: {e}"
161
+ except Exception as e:
162
+ print(f"Unexpected error downloading file for task {task_id}: {e}")
163
+ return f"Error: Unexpected error during file download: {e}"
164
+
165
+ @tool
166
+ def read_file_content(file_path: str) -> str:
167
+ """
168
+ Reads the text content of a previously downloaded file (PDF or plain text).
169
+ Use this tool AFTER 'download_task_file' has successfully returned a file path.
170
+ Args:
171
+ file_path (str): The local path to the file (must be a path returned by 'download_task_file').
172
+ Returns:
173
+ str: The extracted text content (truncated if very long), or an error message.
174
+ """
175
+ print(f"Tool: read_file_content(file_path='{file_path}')")
176
+
177
+ # Sicherheitscheck: Erlaube nur Lesen aus dem Temp-Verzeichnis
178
+ if not file_path or not file_path.startswith(tempfile.gettempdir()):
179
+ print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
180
+ return "Error: Invalid file path provided. Only downloaded files can be read."
181
+
182
+ if not os.path.exists(file_path):
183
+ print(f"Error: File not found at path: {file_path}")
184
+ return "Error: File not found at the specified path."
185
+
186
+ try:
187
+ if file_path.lower().endswith(".pdf"):
188
+ if not PDF_READER_AVAILABLE:
189
+ return "Error: Cannot read PDF file because PyPDF2 library is not installed."
190
+ text = ""
191
+ with open(file_path, 'rb') as f:
192
+ reader = PyPDF2.PdfReader(f)
193
+ for page_num in range(len(reader.pages)):
194
+ page = reader.pages[page_num]
195
+ text += page.extract_text() or "" # Füge leeren String hinzu, falls extract_text None zurückgibt
196
+ if len(text) > 7000: # Begrenze die Länge stärker
197
+ text = text[:7000] + "\n... (content truncated)"
198
+ break
199
+ print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
200
+ return f"Content of '{os.path.basename(file_path)}':\n{text}"
201
+
202
+ elif file_path.lower().endswith((".png", ".jpg", ".jpeg")):
203
+ print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
204
+ # Hier könnte man später ein VLM-Tool einbinden
205
+ return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content."
206
+
207
+ else: # Versuche als Text zu lesen
208
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
209
+ content = f.read(7000) # Begrenze auf 7000 Zeichen
210
+ if len(content) == 7000:
211
+ content += "\n... (content truncated)"
212
+ print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
213
+ return f"Content of '{os.path.basename(file_path)}':\n{content}"
214
+
215
+ except Exception as e:
216
+ print(f"Error reading file {file_path}: {e}")
217
+ return f"Error: Failed to read file content: {e}"
218
+
219
+
220
+ # --- Agent Initialisierung ---
221
+ def initialize_agent():
222
+ """Initialisiert den smol-agent und die benötigten Clients."""
223
+ global hf_token, search_client, agent_instance
224
+ print("Initializing agent and clients...")
225
+
226
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
227
+ if not hf_token:
228
+ raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden!")
229
+
230
+ # --- Search Client ---
231
+ if USE_TAVILY:
232
+ tavily_key = os.getenv("TAVILY_API_KEY")
233
+ if tavily_key:
234
+ search_client = TavilyClient(api_key=tavily_key)
235
+ print("Using Tavily for search.")
236
+ else:
237
+ print("WARNUNG: TAVILY_API_KEY nicht gefunden, obwohl USE_TAVILY=True.")
238
+ # Fallback auf DuckDuckGo wenn möglich
239
+ if USE_DUCKDUCKGO:
240
+ try:
241
+ search_client = DDGS()
242
+ print("Falling back to DuckDuckGo for search.")
243
+ except NameError:
244
+ search_client = None
245
+ print("WARNUNG: DuckDuckGo auch nicht verfügbar. Suche deaktiviert.")
246
+ else:
247
+ search_client = None
248
+ print("WARNUNG: Suche deaktiviert.")
249
+ elif USE_DUCKDUCKGO:
250
+ try:
251
+ search_client = DDGS()
252
+ print("Using DuckDuckGo for search.")
253
+ except NameError:
254
+ search_client = None
255
+ print("WARNUNG: DuckDuckGo nicht verfügbar. Suche deaktiviert.")
256
+ else:
257
+ search_client = None
258
+ print("Web search is disabled.")
259
+
260
+
261
+ # --- LLM Client (Hugging Face Inference API) ---
262
+ llm = InferenceAPI(
263
+ model_id=HF_MODEL_ID,
264
+ token=hf_token,
265
+ max_new_tokens=1500, # Erhöhe ggf. die max. Token für komplexe Antworten
266
+ temperature=0.1, # Niedrige Temperatur für Fakten
267
+ # Weitere Parameter nach Bedarf: top_p, top_k, repetition_penalty etc.
268
+ )
269
+ print(f"LLM configured with model: {HF_MODEL_ID}")
270
+
271
+ # --- Agent Instanz ---
272
+ available_tools = [search_web, download_task_file, read_file_content]
273
+ agent_instance = Agent(
274
+ llm=llm,
275
+ # tools=available_tools, # Tools werden dynamisch in run() übergeben
276
+ # system_prompt=... (optional, kann auch im run() Prompt sein)
277
+ )
278
+ print(f"Smol Agent initialized with {len(available_tools)} tools.")
279
+
280
+
281
+ # --- Angepasste Hauptfunktion ---
282
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
283
+ """
284
+ Fetches all questions, runs the smol-agent on them, submits all answers,
285
+ and displays the results.
286
+ """
287
  space_id = os.getenv("SPACE_ID")
288
 
289
+ if profile:
290
+ username= f"{profile.username}"
291
+ print(f"User logged in: {username}")
292
+ else:
293
+ print("User not logged in.")
294
+ return "Please Login to Hugging Face with the button.", None
295
+
296
+ api_url = DEFAULT_API_URL
297
+ questions_url = f"{api_url}/questions"
298
+ submit_url = f"{api_url}/submit"
299
 
300
+ # 1. Initialisiere Agent und Clients (bei jedem Lauf)
301
  try:
302
+ initialize_agent()
303
+ if not agent_instance: # Zusätzliche Prüfung
304
+ raise RuntimeError("Agent instance could not be initialized.")
305
+ except ValueError as e:
306
+ print(f"Error during initialization: {e}")
307
+ return f"Configuration Error: {e}", None
308
  except Exception as e:
309
+ print(f"Error initializing agent/clients: {e}")
310
+ return f"Error initializing agent: {e}", None
311
 
312
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
313
+ print(f"Agent Code Link: {agent_code}")
314
+
315
+ # 2. Fetch Questions (wie zuvor)
316
+ print(f"Fetching questions from: {questions_url}")
317
  try:
318
+ response = requests.get(questions_url, timeout=30)
319
+ response.raise_for_status()
320
+ questions_data = response.json()
321
+ if not questions_data or not isinstance(questions_data, list):
322
+ print(f"Fetched questions list is empty or invalid format: {questions_data}")
323
+ return "Fetched questions list is empty or invalid format.", None
324
+ print(f"Fetched {len(questions_data)} questions.")
325
  except Exception as e:
326
+ # Detailiertere Fehlermeldung
327
+ print(f"Error fetching questions ({type(e).__name__}): {e}")
328
+ return f"Error fetching questions: {e}", None
329
+
330
+
331
+ # 3. Run your Smol Agent
332
+ start_time = datetime.now()
333
+ results_log = []
334
+ answers_payload = []
335
+ print(f"Running smol-agent on {len(questions_data)} questions using {HF_MODEL_ID}...")
336
+ status_updates = []
337
+
338
+ for i, item in enumerate(questions_data):
339
+ task_id = item.get("task_id")
340
+ question_text = item.get("question")
341
+
342
+ if not task_id or question_text is None:
343
+ print(f"Skipping item with missing task_id or question: {item}")
344
  continue
345
+
346
+ current_status = f"Processing question {i+1}/{len(questions_data)} (Task ID: {task_id})..."
347
+ print(current_status)
348
+ status_updates.append(current_status)
349
+
350
+ # --- Prompt für smol-agent ---
351
+ # Wichtig: Klare Anweisung für das Endformat geben!
352
+ # Gib dem Agenten den Task-ID Kontext mit!
353
+ agent_prompt = f"""
354
+ You are an expert AI assistant solving a challenge question.
355
+ Your task is to answer the following question accurately and concisely.
356
+ Use the available tools ONLY when necessary to find information or access required files.
357
+
358
+ **Available Tools:**
359
+ * `search_web(query: str, max_results: int = 3)`: Searches the web.
360
+ * `download_task_file(task_id: str)`: Downloads the specific file for a task. Use the task_id '{task_id}' if you need the file for THIS question. Returns the local file path.
361
+ * `read_file_content(file_path: str)`: Reads text from a downloaded file using the path returned by download_task_file.
362
+
363
+ **Current Task:**
364
+ * Task ID: {task_id}
365
+ * Question: {question_text}
366
+
367
+ **Instructions:**
368
+ 1. Think step-by-step to break down the question.
369
+ 2. Use the tools provided if you need external information or file content. Make sure to use the correct task_id ('{task_id}') for `download_task_file`.
370
+ 3. Reason through the information obtained.
371
+ 4. Provide ONLY the final answer to the question, without any introductory phrases, explanations, or conversational text like "The answer is..." or "Based on my analysis...".
372
+ 5. Format the answer exactly as requested by the question (e.g., just a year, a comma-separated list, etc.).
373
+
374
+ Begin!
375
+ """
376
+
377
+ submitted_answer = f"Error: Agent failed to produce a result for task {task_id}" # Default error
378
+ try:
379
+ # Führe den Agenten aus
380
+ agent_response = agent_instance.run(
381
+ prompt=agent_prompt,
382
+ tools=[search_web, download_task_file, read_file_content] # Übergebe Tools hier
383
+ )
384
+
385
+ if agent_response:
386
+ # Versuche, die Antwort zu bereinigen (optional, je nach Modellverhalten)
387
+ # Einfache Bereinigung: Entferne häufige Präfixe
388
+ cleaned_response = re.sub(r"^(Final Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
389
+ submitted_answer = cleaned_response
390
+ else:
391
+ submitted_answer = "Error: Agent returned an empty response."
392
+
393
+
394
+ print(f"Task {task_id} completed by agent. Raw Response: '{agent_response[:100]}...' | Submitted Answer: '{submitted_answer}'")
395
+
396
+ except Exception as e:
397
+ error_msg = f"SMOL_AGENT ERROR on task {task_id} ({type(e).__name__}): {e}"
398
+ print(error_msg)
399
+ # Gib einen Fehler als Antwort ein
400
+ submitted_answer = f"ERROR: {type(e).__name__}" # Kürzere Fehlermeldung für die Payload
401
+
402
+ finally:
403
+ # Füge das Ergebnis (oder den Fehler) hinzu
404
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
405
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
406
+ # Bereinige temporäre Dateien *sofort* nach Bearbeitung der Aufgabe
407
+ # (Optional, atexit macht es auch am Ende)
408
+ # cleanup_temp_files() # Kann hier aufgerufen werden, wenn Ressourcen knapp sind
409
+
410
+ end_time = datetime.now()
411
+ duration = end_time - start_time
412
+ print(f"Agent processing finished in {duration}.")
413
+
414
+ # 4. Prepare Submission (wie zuvor)
415
+ if not answers_payload:
416
+ print("Agent did not produce any answers to submit.")
417
+ # Lösche übrig gebliebene Temp-Dateien
418
+ cleanup_temp_files()
419
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
420
+
421
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
422
+ status_update = f"Agent finished in {duration}. Submitting {len(answers_payload)} answers for user '{username}'..."
423
+ print(status_update)
424
+
425
+ # 5. Submit (wie zuvor, mit Timeout)
426
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
427
  try:
428
+ response = requests.post(submit_url, json=submission_data, timeout=120)
429
+ response.raise_for_status()
430
+ result_data = response.json()
431
+ final_status = (
432
+ f"Submission Successful!\n"
433
+ f"User: {result_data.get('username')}\n"
434
+ f"Score: {result_data.get('score', 'N/A'):.2f}% "
435
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
436
+ f"Message: {result_data.get('message', 'No message received.')}"
437
  )
438
+ print("Submission successful.")
439
+ results_df = pd.DataFrame(results_log)
440
+ return final_status + f"\n\nAgent ({HF_MODEL_ID}) Processing Log:\n" + "\n".join(status_updates[-5:]), results_df
441
+ except requests.exceptions.HTTPError as e:
442
+ # (Fehlerbehandlung wie zuvor)
443
+ error_detail = f"Server responded with status {e.response.status_code}."
444
+ try:
445
+ error_json = e.response.json()
446
+ api_error = error_json.get('detail', e.response.text)
447
+ if isinstance(api_error, list) and len(api_error) > 0 and isinstance(api_error[0], dict):
448
+ error_detail += f" Detail: {api_error[0].get('msg', str(api_error))}"
449
+ else:
450
+ error_detail += f" Detail: {str(api_error)}"
451
+ except requests.exceptions.JSONDecodeError:
452
+ error_detail += f" Response: {e.response.text[:500]}"
453
+ status_message = f"Submission Failed: {error_detail}"
454
+ print(status_message)
455
+ results_df = pd.DataFrame(results_log)
456
+ return status_message, results_df
457
+ except requests.exceptions.Timeout:
458
+ status_message = "Submission Failed: The request timed out."
459
+ # ... (Rest der Fehlerbehandlung wie zuvor) ...
460
+ print(status_message)
461
+ results_df = pd.DataFrame(results_log)
462
+ return status_message, results_df
463
+ except requests.exceptions.RequestException as e:
464
+ status_message = f"Submission Failed: Network error - {e}"
465
+ print(status_message)
466
+ results_df = pd.DataFrame(results_log)
467
+ return status_message, results_df
468
  except Exception as e:
469
+ status_message = f"An unexpected error occurred during submission: {e}"
470
+ print(status_message)
471
+ results_df = pd.DataFrame(results_log)
472
+ return status_message, results_df
473
+ finally:
474
+ # Stelle sicher, dass alle Temp-Dateien am Ende gelöscht werden
475
+ cleanup_temp_files()
476
 
 
 
477
 
478
+ # --- Gradio Interface (angepasst für smol-agent) ---
479
  with gr.Blocks() as demo:
480
+ gr.Markdown("# Smol Agent Evaluation Runner (Hugging Face)")
481
+ gr.Markdown(
482
+ f"""
483
+ **Instructions:**
484
+
485
+ 1. Ensure you have added your `HUGGINGFACE_TOKEN` (with write access) as a Secret in your Space settings. Optionally add `TAVILY_API_KEY` if using Tavily search.
486
+ 2. Make sure `requirements.txt` includes `smol-agent[huggingface]`, search libraries (`duckduckgo-search` or `tavily-python`), and `pypdf2`.
487
+ 3. The agent uses the Hugging Face Inference API with the model: **{HF_MODEL_ID}**. You can change this by setting the `HF_MODEL_ID` environment variable in your Space settings.
488
+ 4. Log in to your Hugging Face account below.
489
+ 5. Click 'Run Evaluation & Submit All Answers'. **This will take time** as the agent processes each question using the Inference API.
490
+
491
+ ---
492
+ **Agent Details:**
493
+ * Uses the `smol-agent` library.
494
+ * Leverages Hugging Face Inference API for LLM calls.
495
+ * Tools: Web Search ({'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}), File Download, File Reading (PDF/Text).
496
+ * Check the Space console logs for detailed agent behavior.
497
+ """
498
+ )
499
+
500
  gr.LoginButton()
 
 
 
 
501
 
502
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
503
+
504
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
505
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
506
+
507
+ # Verwende profile als Input für die Funktion
508
+ def get_profile(request: gr.Request):
509
+ # Helper function to potentially extract profile info if needed later,
510
+ # Gradio's LoginButton might handle profile implicitly now.
511
+ # For now, just pass None if not logged in via button state.
512
+ # This part might need adjustment based on how Gradio passes OAuthProfile.
513
+ # The current run_and_submit_all signature expects OAuthProfile | None
514
+ # which Gradio should provide when the button is clicked if logged in.
515
+ # If run_button.click doesn't automatically pass the profile,
516
+ # we might need a different setup using gr.State or gr.Variable.
517
+ # Let's assume Gradio handles passing the profile for now.
518
+ pass # Placeholder
519
 
520
+ run_button.click(
521
+ fn=run_and_submit_all,
522
+ inputs=[], # Gradio's LoginButton should implicitly provide profile context
523
+ # If this fails, might need inputs=gr.State(profile_info) setup
524
+ outputs=[status_output, results_table],
525
+ api_name="run_evaluation_smol"
526
+ )
527
+
528
+ # --- App Start (unverändert) ---
529
  if __name__ == "__main__":
530
+ print("\n" + "-"*30 + " App Starting (Smol Agent Version) " + "-"*30)
531
+ space_host_startup = os.getenv("SPACE_HOST")
532
+ space_id_startup = os.getenv("SPACE_ID")
533
+
534
+ # (Rest des Startblocks unverändert)
535
+ if space_host_startup:
536
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
537
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
538
+ else:
539
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
540
+
541
+ if space_id_startup:
542
+ print(f"✅ SPACE_ID found: {space_id_startup}")
543
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
544
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
545
+ else:
546
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
547
+
548
+ print(f" Using HF Model via Inference API: {HF_MODEL_ID}")
549
+ print(f" Search Tool: {'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}")
550
+ print("-"*(60 + len(" App Starting (Smol Agent Version) ")) + "\n")
551
+
552
+ print("Launching Gradio Interface for Smol Agent Evaluation...")
553
+ demo.launch(debug=False, share=False) # Debug=False für normalen Betrieb