Final_Assignment_Template

Sleeping

App Files Files Community

pmeyhoefer commited on Apr 30, 2025

Commit

85e2c6c

verified ·

1 Parent(s): 70658cb

Update app.py

Browse files

Files changed (1) hide show

app.py +262 -204

app.py CHANGED Viewed

@@ -8,10 +8,19 @@ import time
 import tempfile # Für temporäre Dateien
 import atexit # Zum Aufräumen beim Beenden
-# --- Smol Agent und HF Imports ---
-from smol_agent import Agent
-from smol_agent.llm.huggingface import InferenceAPI
-from smol_agent.tools import tool
 from huggingface_hub import HfApi, InferenceClient
 # --- Suchtool Imports (wähle eins) ---
@@ -43,9 +52,7 @@ except ImportError:
 # --- Konstanten ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # Wähle ein Instruction-Following Modell von Hugging Face Hub
-# Beispiele: "meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "google/gemma-1.1-7b-it"
-# Stelle sicher, dass das Modell über die kostenlose Inference API verfügbar ist oder du Inference Endpoints verwendest.
-HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") # Standardmodell, kann über Env Var überschrieben werden
 # --- Globale Variablen für Clients (werden in initialize_agent gesetzt) ---
 hf_token = None
@@ -57,20 +64,23 @@ temp_files_to_clean = set()
 def cleanup_temp_files():
     print("Cleaning up temporary files...")
-    for file_path in list(temp_files_to_clean):
         try:
             if os.path.exists(file_path):
                 os.remove(file_path)
                 print(f"Removed temporary file: {file_path}")
-            temp_files_to_clean.remove(file_path)
         except OSError as e:
             print(f"Error removing temporary file {file_path}: {e}")
 # Registriere die Cleanup-Funktion für das Beenden des Skripts
 atexit.register(cleanup_temp_files)
-# --- Tool Definitionen für smol-agent ---
 @tool
 def search_web(query: str, max_results: int = 3) -> str:
@@ -97,9 +107,10 @@ def search_web(query: str, max_results: int = 3) -> str:
              if not results: return "No search results found."
              return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
         else:
-            return "No compatible search client configured."
     except Exception as e:
-        print(f"Search API Error: {e}")
         return f"Error during search: {e}"
 @tool
@@ -110,179 +121,231 @@ def download_task_file(task_id: str) -> str:
     Args:
         task_id (str): The unique identifier for the task whose file needs to be downloaded.
     Returns:
-        str: The local path to the downloaded file (e.g., '/tmp/tmpXYZ.pdf') if successful,
-             otherwise an error message or 'No file found'.
     """
     print(f"Tool: download_task_file(task_id='{task_id}')")
     file_url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
-        response = requests.get(file_url, stream=True, timeout=30) # Erhöhtes Timeout
-        response.raise_for_status() # Löst Fehler für 4xx/5xx aus
-        # Bestimme Dateiendung aus Content-Type
         content_type = response.headers.get('content-type', '').lower()
-        suffix = ".tmp" # Standard-Suffix
-        if 'pdf' in content_type:
-            suffix = ".pdf"
-        elif 'png' in content_type:
-            suffix = ".png"
-        elif 'jpeg' in content_type or 'jpg' in content_type:
-            suffix = ".jpg"
-        elif 'csv' in content_type:
-             suffix = ".csv"
-        elif 'plain' in content_type or 'text' in content_type:
-             suffix = ".txt"
-        # Erstelle eine sichere temporäre Datei
         temp_dir = tempfile.gettempdir()
-        # Verwende task_id im Dateinamen für bessere Nachverfolgbarkeit (optional)
-        safe_task_id = re.sub(r'[^\w\-]+', '_', task_id) # Bereinige task_id für Dateinamen
-        temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}{suffix}")
         with open(temp_file_path, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
         print(f"File downloaded successfully to {temp_file_path}")
-        temp_files_to_clean.add(temp_file_path) # Füge zur Cleanup-Liste hinzu
-        return temp_file_path # Gib den Pfad zurück
     except requests.exceptions.HTTPError as e:
         if e.response.status_code == 404:
             print(f"No file found on server for task_id {task_id}.")
-            return "Error: No file found for this task ID."
         else:
             print(f"HTTP Error downloading file for task {task_id}: {e}")
-            return f"Error: Failed to download file (HTTP {e.response.status_code})."
     except requests.exceptions.RequestException as e:
         print(f"Network Error downloading file for task {task_id}: {e}")
-        return f"Error: Failed to download file due to network issue: {e}"
     except Exception as e:
-        print(f"Unexpected error downloading file for task {task_id}: {e}")
-        return f"Error: Unexpected error during file download: {e}"
 @tool
 def read_file_content(file_path: str) -> str:
     """
     Reads the text content of a previously downloaded file (PDF or plain text).
-    Use this tool AFTER 'download_task_file' has successfully returned a file path.
     Args:
         file_path (str): The local path to the file (must be a path returned by 'download_task_file').
     Returns:
-        str: The extracted text content (truncated if very long), or an error message.
     """
     print(f"Tool: read_file_content(file_path='{file_path}')")
-    # Sicherheitscheck: Erlaube nur Lesen aus dem Temp-Verzeichnis
-    if not file_path or not file_path.startswith(tempfile.gettempdir()):
          print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
          return "Error: Invalid file path provided. Only downloaded files can be read."
     if not os.path.exists(file_path):
         print(f"Error: File not found at path: {file_path}")
-        return "Error: File not found at the specified path."
     try:
         if file_path.lower().endswith(".pdf"):
             if not PDF_READER_AVAILABLE:
                 return "Error: Cannot read PDF file because PyPDF2 library is not installed."
             text = ""
             with open(file_path, 'rb') as f:
                 reader = PyPDF2.PdfReader(f)
-                for page_num in range(len(reader.pages)):
-                    page = reader.pages[page_num]
-                    text += page.extract_text() or "" # Füge leeren String hinzu, falls extract_text None zurückgibt
-                    if len(text) > 7000: # Begrenze die Länge stärker
                         text = text[:7000] + "\n... (content truncated)"
                         break
             print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
             return f"Content of '{os.path.basename(file_path)}':\n{text}"
-        elif file_path.lower().endswith((".png", ".jpg", ".jpeg")):
              print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
-             # Hier könnte man später ein VLM-Tool einbinden
-             return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content."
-        else: # Versuche als Text zu lesen
-             with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
-                 content = f.read(7000) # Begrenze auf 7000 Zeichen
-                 if len(content) == 7000:
-                     content += "\n... (content truncated)"
-             print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
-             return f"Content of '{os.path.basename(file_path)}':\n{content}"
     except Exception as e:
-        print(f"Error reading file {file_path}: {e}")
         return f"Error: Failed to read file content: {e}"
 # --- Agent Initialisierung ---
 def initialize_agent():
-    """Initialisiert den smol-agent und die benötigten Clients."""
     global hf_token, search_client, agent_instance
     print("Initializing agent and clients...")
-    hf_token = os.getenv("HUGGINGFACE_TOKEN")
     if not hf_token:
-        raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden!")
-    # --- Search Client ---
-    if USE_TAVILY:
-        tavily_key = os.getenv("TAVILY_API_KEY")
-        if tavily_key:
-            search_client = TavilyClient(api_key=tavily_key)
-            print("Using Tavily for search.")
-        else:
-            print("WARNUNG: TAVILY_API_KEY nicht gefunden, obwohl USE_TAVILY=True.")
-            # Fallback auf DuckDuckGo wenn möglich
-            if USE_DUCKDUCKGO:
                 try:
-                    search_client = DDGS()
-                    print("Falling back to DuckDuckGo for search.")
                 except NameError:
-                     search_client = None
-                     print("WARNUNG: DuckDuckGo auch nicht verfügbar. Suche deaktiviert.")
             else:
                 search_client = None
-                print("WARNUNG: Suche deaktiviert.")
-    elif USE_DUCKDUCKGO:
-        try:
-            search_client = DDGS()
-            print("Using DuckDuckGo for search.")
-        except NameError:
             search_client = None
-            print("WARNUNG: DuckDuckGo nicht verfügbar. Suche deaktiviert.")
-    else:
-        search_client = None
-        print("Web search is disabled.")
     # --- LLM Client (Hugging Face Inference API) ---
     llm = InferenceAPI(
         model_id=HF_MODEL_ID,
         token=hf_token,
-        max_new_tokens=1500, # Erhöhe ggf. die max. Token für komplexe Antworten
-        temperature=0.1,    # Niedrige Temperatur für Fakten
-        # Weitere Parameter nach Bedarf: top_p, top_k, repetition_penalty etc.
     )
     print(f"LLM configured with model: {HF_MODEL_ID}")
     # --- Agent Instanz ---
     available_tools = [search_web, download_task_file, read_file_content]
     agent_instance = Agent(
         llm=llm,
-        # tools=available_tools, # Tools werden dynamisch in run() übergeben
-        # system_prompt=... (optional, kann auch im run() Prompt sein)
     )
-    print(f"Smol Agent initialized with {len(available_tools)} tools.")
 # --- Angepasste Hauptfunktion ---
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the smol-agent on them, submits all answers,
-    and displays the results.
     """
     space_id = os.getenv("SPACE_ID")
@@ -291,28 +354,31 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Initialisiere Agent und Clients (bei jedem Lauf)
     try:
         initialize_agent()
-        if not agent_instance: # Zusätzliche Prüfung
              raise RuntimeError("Agent instance could not be initialized.")
     except ValueError as e:
          print(f"Error during initialization: {e}")
          return f"Configuration Error: {e}", None
     except Exception as e:
-        print(f"Error initializing agent/clients: {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
     print(f"Agent Code Link: {agent_code}")
-    # 2. Fetch Questions (wie zuvor)
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=30)
@@ -321,109 +387,106 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         if not questions_data or not isinstance(questions_data, list):
              print(f"Fetched questions list is empty or invalid format: {questions_data}")
              return "Fetched questions list is empty or invalid format.", None
-        print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
-        # Detailiertere Fehlermeldung
         print(f"Error fetching questions ({type(e).__name__}): {e}")
         return f"Error fetching questions: {e}", None
-    # 3. Run your Smol Agent
     start_time = datetime.now()
     results_log = []
     answers_payload = []
-    print(f"Running smol-agent on {len(questions_data)} questions using {HF_MODEL_ID}...")
-    status_updates = []
-    for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
-            print(f"Skipping item with missing task_id or question: {item}")
             continue
-        current_status = f"Processing question {i+1}/{len(questions_data)} (Task ID: {task_id})..."
-        print(current_status)
-        status_updates.append(current_status)
-        # --- Prompt für smol-agent ---
-        # Wichtig: Klare Anweisung für das Endformat geben!
-        # Gib dem Agenten den Task-ID Kontext mit!
         agent_prompt = f"""
-You are an expert AI assistant solving a challenge question.
 Your task is to answer the following question accurately and concisely.
 Use the available tools ONLY when necessary to find information or access required files.
-**Available Tools:**
-*   `search_web(query: str, max_results: int = 3)`: Searches the web.
-*   `download_task_file(task_id: str)`: Downloads the specific file for a task. Use the task_id '{task_id}' if you need the file for THIS question. Returns the local file path.
-*   `read_file_content(file_path: str)`: Reads text from a downloaded file using the path returned by download_task_file.
 **Current Task:**
 *   Task ID: {task_id}
 *   Question: {question_text}
-**Instructions:**
-1.  Think step-by-step to break down the question.
-2.  Use the tools provided if you need external information or file content. Make sure to use the correct task_id ('{task_id}') for `download_task_file`.
-3.  Reason through the information obtained.
-4.  Provide ONLY the final answer to the question, without any introductory phrases, explanations, or conversational text like "The answer is..." or "Based on my analysis...".
-5.  Format the answer exactly as requested by the question (e.g., just a year, a comma-separated list, etc.).
-Begin!
 """
-        submitted_answer = f"Error: Agent failed to produce a result for task {task_id}" # Default error
         try:
-            # Führe den Agenten aus
-            agent_response = agent_instance.run(
-                prompt=agent_prompt,
-                tools=[search_web, download_task_file, read_file_content] # Übergebe Tools hier
-            )
             if agent_response:
-                 # Versuche, die Antwort zu bereinigen (optional, je nach Modellverhalten)
-                 # Einfache Bereinigung: Entferne häufige Präfixe
-                 cleaned_response = re.sub(r"^(Final Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
-                 submitted_answer = cleaned_response
             else:
-                 submitted_answer = "Error: Agent returned an empty response."
-            print(f"Task {task_id} completed by agent. Raw Response: '{agent_response[:100]}...' | Submitted Answer: '{submitted_answer}'")
         except Exception as e:
-             error_msg = f"SMOL_AGENT ERROR on task {task_id} ({type(e).__name__}): {e}"
              print(error_msg)
-             # Gib einen Fehler als Antwort ein
-             submitted_answer = f"ERROR: {type(e).__name__}" # Kürzere Fehlermeldung für die Payload
         finally:
             # Füge das Ergebnis (oder den Fehler) hinzu
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
-            # Bereinige temporäre Dateien *sofort* nach Bearbeitung der Aufgabe
-            # (Optional, atexit macht es auch am Ende)
-            # cleanup_temp_files() # Kann hier aufgerufen werden, wenn Ressourcen knapp sind
     end_time = datetime.now()
     duration = end_time - start_time
     print(f"Agent processing finished in {duration}.")
-    # 4. Prepare Submission (wie zuvor)
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
-        # Lösche übrig gebliebene Temp-Dateien
-        cleanup_temp_files()
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished in {duration}. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
-    # 5. Submit (wie zuvor, mit Timeout)
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
@@ -436,10 +499,7 @@ Begin!
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status + f"\n\nAgent ({HF_MODEL_ID}) Processing Log:\n" + "\n".join(status_updates[-5:]), results_df
     except requests.exceptions.HTTPError as e:
-        # (Fehlerbehandlung wie zuvor)
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
@@ -450,88 +510,81 @@ Begin!
                  error_detail += f" Detail: {str(api_error)}"
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        # ... (Rest der Fehlerbehandlung wie zuvor) ...
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     finally:
-         # Stelle sicher, dass alle Temp-Dateien am Ende gelöscht werden
-         cleanup_temp_files()
-# --- Gradio Interface (angepasst für smol-agent) ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Smol Agent Evaluation Runner (Hugging Face)")
     gr.Markdown(
         f"""
         **Instructions:**
-        1.  Ensure you have added your `HUGGINGFACE_TOKEN` (with write access) as a Secret in your Space settings. Optionally add `TAVILY_API_KEY` if using Tavily search.
-        2.  Make sure `requirements.txt` includes `smol-agent[huggingface]`, search libraries (`duckduckgo-search` or `tavily-python`), and `pypdf2`.
-        3.  The agent uses the Hugging Face Inference API with the model: **{HF_MODEL_ID}**. You can change this by setting the `HF_MODEL_ID` environment variable in your Space settings.
-        4.  Log in to your Hugging Face account below.
-        5.  Click 'Run Evaluation & Submit All Answers'. **This will take time** as the agent processes each question using the Inference API.
         ---
         **Agent Details:**
-        *   Uses the `smol-agent` library.
-        *   Leverages Hugging Face Inference API for LLM calls.
         *   Tools: Web Search ({'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}), File Download, File Reading (PDF/Text).
-        *   Check the Space console logs for detailed agent behavior.
         """
     )
-    gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
-    # Verwende profile als Input für die Funktion
-    def get_profile(request: gr.Request):
-        # Helper function to potentially extract profile info if needed later,
-        # Gradio's LoginButton might handle profile implicitly now.
-        # For now, just pass None if not logged in via button state.
-        # This part might need adjustment based on how Gradio passes OAuthProfile.
-        # The current run_and_submit_all signature expects OAuthProfile | None
-        # which Gradio should provide when the button is clicked if logged in.
-        # If run_button.click doesn't automatically pass the profile,
-        # we might need a different setup using gr.State or gr.Variable.
-        # Let's assume Gradio handles passing the profile for now.
-        pass # Placeholder
     run_button.click(
-        fn=run_and_submit_all,
-        inputs=[], # Gradio's LoginButton should implicitly provide profile context
-                   # If this fails, might need inputs=gr.State(profile_info) setup
         outputs=[status_output, results_table],
         api_name="run_evaluation_smol"
     )
 # --- App Start (unverändert) ---
 if __name__ == "__main__":
-    print("\n" + "-"*30 + " App Starting (Smol Agent Version) " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
-    # (Rest des Startblocks unverändert)
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
@@ -546,8 +599,13 @@ if __name__ == "__main__":
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print(f"   Using HF Model via Inference API: {HF_MODEL_ID}")
-    print(f"   Search Tool: {'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}")
-    print("-"*(60 + len(" App Starting (Smol Agent Version) ")) + "\n")
-    print("Launching Gradio Interface for Smol Agent Evaluation...")
-    demo.launch(debug=False, share=False) # Debug=False für normalen Betrieb

 import tempfile # Für temporäre Dateien
 import atexit # Zum Aufräumen beim Beenden
+# --- Smol Agents und HF Imports (KORRIGIERT) ---
+try:
+    from smolagents import Agent
+    from smolagents.llm.huggingface import InferenceAPI
+    from smolagents.tools import tool
+    print("Successfully imported from 'smolagents'")
+except ImportError as e:
+    print(f"Error importing from smolagents: {e}")
+    print("Please ensure 'smolagents[huggingface]' is listed correctly in requirements.txt")
+    # Exit if core library is missing
+    import sys
+    sys.exit(f"Fatal Error: Could not import smolagents. Check requirements.txt and rebuild/restart the Space. Original error: {e}")
 from huggingface_hub import HfApi, InferenceClient
 # --- Suchtool Imports (wähle eins) ---
 # --- Konstanten ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # Wähle ein Instruction-Following Modell von Hugging Face Hub
+HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") # Standardmodell
 # --- Globale Variablen für Clients (werden in initialize_agent gesetzt) ---
 hf_token = None
 def cleanup_temp_files():
     print("Cleaning up temporary files...")
+    for file_path in list(temp_files_to_clean): # Iteriere über Kopie, da Set verändert wird
         try:
             if os.path.exists(file_path):
                 os.remove(file_path)
                 print(f"Removed temporary file: {file_path}")
+            if file_path in temp_files_to_clean: # Prüfe erneut, falls Fehler auftrat
+                 temp_files_to_clean.remove(file_path)
         except OSError as e:
             print(f"Error removing temporary file {file_path}: {e}")
+        except KeyError:
+             print(f"Warning: File path {file_path} already removed from cleanup set.")
 # Registriere die Cleanup-Funktion für das Beenden des Skripts
 atexit.register(cleanup_temp_files)
+# --- Tool Definitionen für smolagents ---
 @tool
 def search_web(query: str, max_results: int = 3) -> str:
              if not results: return "No search results found."
              return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
         else:
+            # Dies sollte nicht passieren, wenn search_client gesetzt ist, aber als Absicherung
+            return "No compatible search client configured or available."
     except Exception as e:
+        print(f"Search API Error ({type(e).__name__}): {e}")
         return f"Error during search: {e}"
 @tool
     Args:
         task_id (str): The unique identifier for the task whose file needs to be downloaded.
     Returns:
+        str: The local path to the downloaded file (e.g., '/tmp/gaia_task_abc-123.pdf') if successful,
+             otherwise an error message starting with 'Error:'.
     """
     print(f"Tool: download_task_file(task_id='{task_id}')")
     file_url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
+        response = requests.get(file_url, stream=True, timeout=30)
+        response.raise_for_status()
         content_type = response.headers.get('content-type', '').lower()
+        suffix = ".tmp"
+        if 'pdf' in content_type: suffix = ".pdf"
+        elif 'png' in content_type: suffix = ".png"
+        elif 'jpeg' in content_type or 'jpg' in content_type: suffix = ".jpg"
+        elif 'csv' in content_type: suffix = ".csv"
+        elif 'plain' in content_type or 'text' in content_type: suffix = ".txt"
+        # Füge ggf. weitere Mappings hinzu
         temp_dir = tempfile.gettempdir()
+        safe_task_id = re.sub(r'[^\w\-]+', '_', task_id)
+        # Erzeuge eindeutigeren Dateinamen, um Konflikte bei schnellen Läufen zu minimieren
+        timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
+        temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}_{timestamp}{suffix}")
         with open(temp_file_path, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
         print(f"File downloaded successfully to {temp_file_path}")
+        temp_files_to_clean.add(temp_file_path)
+        return temp_file_path # Erfolg: Gib Pfad zurück
     except requests.exceptions.HTTPError as e:
         if e.response.status_code == 404:
             print(f"No file found on server for task_id {task_id}.")
+            return "Error: No file found for this task ID." # Fehler: Gib Fehlermeldung zurück
         else:
             print(f"HTTP Error downloading file for task {task_id}: {e}")
+            return f"Error: Failed to download file (HTTP {e.response.status_code})." # Fehler
     except requests.exceptions.RequestException as e:
         print(f"Network Error downloading file for task {task_id}: {e}")
+        return f"Error: Failed to download file due to network issue: {e}" # Fehler
     except Exception as e:
+        print(f"Unexpected error downloading file for task {task_id} ({type(e).__name__}): {e}")
+        return f"Error: Unexpected error during file download: {e}" # Fehler
 @tool
 def read_file_content(file_path: str) -> str:
     """
     Reads the text content of a previously downloaded file (PDF or plain text).
+    Use this tool AFTER 'download_task_file' has successfully returned a file path (not an error message).
     Args:
         file_path (str): The local path to the file (must be a path returned by 'download_task_file').
     Returns:
+        str: The extracted text content (truncated if very long), or an error message starting with 'Error:'.
     """
     print(f"Tool: read_file_content(file_path='{file_path}')")
+    # Überprüfung des Inputs: Ist es überhaupt ein Pfad?
+    if not isinstance(file_path, str) or not os.path.isabs(file_path):
+         print(f"Invalid input for read_file_content: '{file_path}'. Expected an absolute file path.")
+         return "Error: Invalid input. Provide the absolute file path returned by download_task_file."
+    # Sicherheitscheck: Erlaube nur Lesen aus dem Temp-Verzeichnis (bleibt wichtig)
+    if not file_path.startswith(tempfile.gettempdir()):
          print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
          return "Error: Invalid file path provided. Only downloaded files can be read."
     if not os.path.exists(file_path):
         print(f"Error: File not found at path: {file_path}")
+        return f"Error: File not found at the specified path '{os.path.basename(file_path)}'." # Gib Dateinamen im Fehler an
     try:
+        file_size = os.path.getsize(file_path)
+        print(f"Reading file: {os.path.basename(file_path)}, Size: {file_size} bytes")
+        if file_size == 0:
+             print(f"Warning: File {os.path.basename(file_path)} is empty.")
+             return f"Observation: The file '{os.path.basename(file_path)}' is empty."
         if file_path.lower().endswith(".pdf"):
             if not PDF_READER_AVAILABLE:
                 return "Error: Cannot read PDF file because PyPDF2 library is not installed."
             text = ""
             with open(file_path, 'rb') as f:
                 reader = PyPDF2.PdfReader(f)
+                num_pages = len(reader.pages)
+                print(f"Reading {num_pages} pages from PDF...")
+                for page_num in range(num_pages):
+                    # Prüfe ob Seite Text enthält bevor Extraktion versucht wird
+                    if reader.pages[page_num].extract_text():
+                         page_text = reader.pages[page_num].extract_text()
+                         text += page_text + "\n" # Füge Zeilenumbruch zwischen Seiten hinzu
+                    if len(text) > 7000: # Begrenze die Länge
                         text = text[:7000] + "\n... (content truncated)"
+                        print(f"Text truncated at {len(text)} chars.")
                         break
+            if not text:
+                 print(f"Warning: Could not extract text from PDF: {os.path.basename(file_path)}")
+                 return f"Observation: Could not extract any text content from the PDF file '{os.path.basename(file_path)}'."
             print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
             return f"Content of '{os.path.basename(file_path)}':\n{text}"
+        elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp")):
              print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
+             return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content with this tool."
+        else: # Versuche als Text zu lesen (TXT, CSV, etc.)
+             # Lese in Chunks um Speicher zu schonen bei großen Textdateien
+             content = ""
+             chunk_size = 4096
+             max_len = 7000
+             truncated = False
+             try:
+                 with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                     while len(content) < max_len:
+                         chunk = f.read(chunk_size)
+                         if not chunk:
+                             break
+                         content += chunk
+                 if len(content) > max_len:
+                      content = content[:max_len]
+                      truncated = True
+                 print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
+                 result = f"Content of '{os.path.basename(file_path)}':\n{content}"
+                 if truncated:
+                      result += "\n... (content truncated)"
+                 return result
+             except Exception as read_err: # Fange Lesefehler ab
+                 print(f"Error reading file {file_path} as text: {read_err}")
+                 return f"Error: Failed to read file '{os.path.basename(file_path)}' as text: {read_err}"
     except Exception as e:
+        print(f"Error reading file {file_path} ({type(e).__name__}): {e}")
         return f"Error: Failed to read file content: {e}"
 # --- Agent Initialisierung ---
 def initialize_agent():
+    """Initialisiert den smolagents Agent und die benötigten Clients."""
     global hf_token, search_client, agent_instance
     print("Initializing agent and clients...")
+    # Token und Clients nur einmal initialisieren, wenn nicht vorhanden
     if not hf_token:
+        hf_token = os.getenv("HUGGINGFACE_TOKEN")
+        if not hf_token:
+            raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden!")
+    if not search_client:
+        if USE_TAVILY:
+            tavily_key = os.getenv("TAVILY_API_KEY")
+            if tavily_key:
                 try:
+                    search_client = TavilyClient(api_key=tavily_key)
+                    print("Using Tavily for search.")
                 except NameError:
+                    print("WARNUNG: TavilyClient Klasse nicht gefunden, obwohl USE_TAVILY=True.")
+                    search_client = None # Verhindere Nutzung
             else:
+                print("WARNUNG: TAVILY_API_KEY nicht gefunden, obwohl USE_TAVILY=True.")
+                # Fallback nur wenn Tavily nicht genutzt werden konnte
+                if USE_DUCKDUCKGO:
+                    try:
+                        search_client = DDGS()
+                        print("Falling back to DuckDuckGo for search.")
+                    except NameError:
+                         search_client = None
+                         print("WARNUNG: DuckDuckGo auch nicht verfügbar. Suche deaktiviert.")
+                else:
+                    search_client = None
+                    print("WARNUNG: Suche deaktiviert (Tavily Key fehlt).")
+        elif USE_DUCKDUCKGO:
+            try:
+                search_client = DDGS()
+                print("Using DuckDuckGo for search.")
+            except NameError:
                 search_client = None
+                print("WARNUNG: duckduckgo-search nicht installiert/verfügbar. Suche deaktiviert.")
+        else:
             search_client = None
+            print("Web search is disabled by configuration.")
+    # Agent Instanz immer neu erstellen oder nur wenn nicht vorhanden?
+    # Für diesen Use Case: Erstelle sie immer neu, um sicherzustellen,
+    # dass sie den neuesten Stand der Tools hat (obwohl sie hier global sind).
+    # Besser wäre es, die tools direkt in der run-Methode zu übergeben.
     # --- LLM Client (Hugging Face Inference API) ---
     llm = InferenceAPI(
         model_id=HF_MODEL_ID,
         token=hf_token,
+        max_new_tokens=1500, # Max Tokens, die das Modell generieren darf
+        temperature=0.1,
+        # stop_sequences=["Observation:", "\nObservation:", "\nTool:", "\nThought:"], # Optional: Hilft manchmal, das Abschneiden zu verbessern
+        # top_p=0.9, # Optional
     )
     print(f"LLM configured with model: {HF_MODEL_ID}")
     # --- Agent Instanz ---
     available_tools = [search_web, download_task_file, read_file_content]
+    # Filter out None tools if search failed to initialize
+    active_tools = [t for t in available_tools if t is not None]
+    # Stelle sicher, dass 'tool' importiert wurde
+    if 'tool' not in globals():
+         raise NameError("Die 'tool' Funktion von smolagents konnte nicht importiert werden.")
     agent_instance = Agent(
         llm=llm,
+        tools=active_tools, # Übergebe die aktiven Tools bei der Initialisierung
+        # system_prompt=... # Kann hier oder im run() prompt definiert werden
     )
+    print(f"Smol Agent initialized with {len(active_tools)} tools.")
+    if len(active_tools) < len(available_tools):
+         print(f"Warning: Some tools might be inactive due to configuration or missing libraries.")
 # --- Angepasste Hauptfunktion ---
+def run_and_submit_all( profile: gr.OAuthProfile | None, progress=gr.Progress(track_tqdm=True)):
     """
+    Fetches all questions, runs the smolagents agent on them, submits all answers,
+    and displays the results. Includes Gradio progress tracking.
     """
     space_id = os.getenv("SPACE_ID")
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
+        # Gib None für DataFrame zurück, um Fehler in Gradio zu vermeiden
         return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Initialisiere Agent und Clients
+    progress(0, desc="Initializing Agent...")
     try:
         initialize_agent()
+        if not agent_instance:
              raise RuntimeError("Agent instance could not be initialized.")
     except ValueError as e:
          print(f"Error during initialization: {e}")
          return f"Configuration Error: {e}", None
     except Exception as e:
+        print(f"Error initializing agent/clients ({type(e).__name__}): {e}")
         return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
     print(f"Agent Code Link: {agent_code}")
+    # 2. Fetch Questions
+    progress(0.1, desc="Fetching questions...")
     print(f"Fetching questions from: {questions_url}")
     try:
         response = requests.get(questions_url, timeout=30)
         if not questions_data or not isinstance(questions_data, list):
              print(f"Fetched questions list is empty or invalid format: {questions_data}")
              return "Fetched questions list is empty or invalid format.", None
+        num_questions = len(questions_data)
+        print(f"Fetched {num_questions} questions.")
     except Exception as e:
         print(f"Error fetching questions ({type(e).__name__}): {e}")
         return f"Error fetching questions: {e}", None
+    # 3. Run your Smol Agent with progress tracking
     start_time = datetime.now()
     results_log = []
     answers_payload = []
+    print(f"Running smolagents on {num_questions} questions using {HF_MODEL_ID}...")
+    # Verwende tqdm für die Iteration mit Gradio-Fortschritt
+    # for i, item in enumerate(tqdm(questions_data, desc="Processing Questions")): # Standard tqdm
+    for i, item in enumerate(progress.tqdm(questions_data, desc="Processing Questions")): # Gradio tqdm
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
+            print(f"Skipping item {i+1} with missing task_id or question: {item}")
             continue
+        # --- Prompt für smolagents ---
         agent_prompt = f"""
+You are an expert AI assistant solving a challenge question based on the GAIA benchmark.
 Your task is to answer the following question accurately and concisely.
 Use the available tools ONLY when necessary to find information or access required files.
+Think step-by-step before deciding on an action or the final answer.
+**Available Tools:** (These are the functions you can call)
+*   `search_web(query: str, max_results: int = 3)`: Searches the web for information.
+*   `download_task_file(task_id: str)`: Downloads the specific file FOR THIS TASK ONLY. Use the task_id '{task_id}'. Returns the local file path needed for 'read_file_content'.
+*   `read_file_content(file_path: str)`: Reads text from a file previously downloaded with 'download_task_file'. Requires the exact file path returned by that tool.
 **Current Task:**
 *   Task ID: {task_id}
 *   Question: {question_text}
+**Instructions & Output Format:**
+1.  Carefully analyze the question.
+2.  Think step-by-step. Outline your plan if needed.
+3.  Execute tools sequentially if information depends on previous steps (e.g., download then read).
+4.  Review the gathered information and your reasoning.
+5.  **Crucially**: Provide ONLY the final answer. Do not include your reasoning, steps, tool calls, introductions (like "The answer is..."), or any other conversational text in the final output. The answer must be exact and stand-alone. Format it as requested by the question (e.g., just a number, a comma-separated list 'apple,banana,orange', etc.).
+Let's begin the thinking process for Task {task_id}.
 """
+        submitted_answer = f"Error: Agent failed for task {task_id}" # Default error
         try:
+            # Führe den Agenten aus (übergebe Tools nicht erneut, wenn sie im Konstruktor sind)
+            agent_response = agent_instance.run(prompt=agent_prompt)
             if agent_response:
+                 # Einfache Bereinigung: Entferne häufige Präfixe und überflüssige Leerzeichen
+                 # Manchmal geben Modelle trotz Anweisung Präfixe aus.
+                 cleaned_response = re.sub(r"^(Final Answer:|Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
+                 # Entferne auch Anführungszeichen am Anfang/Ende, falls das Modell sie hinzufügt
+                 cleaned_response = cleaned_response.strip('"').strip("'")
+                 submitted_answer = cleaned_response if cleaned_response else "Error: Agent returned empty response after cleaning."
             else:
+                 submitted_answer = "Error: Agent returned an empty or None response."
+            print(f"Task {task_id} completed. Submitted Answer: '{submitted_answer}'")
+        # Handle specific exceptions if needed, e.g., RateLimitError from HF
         except Exception as e:
+             error_msg = f"AGENT_RUN_ERROR on task {task_id} ({type(e).__name__}): {e}"
              print(error_msg)
+             submitted_answer = f"ERROR: Agent failed ({type(e).__name__})" # Kürzere Fehlermeldung
         finally:
             # Füge das Ergebnis (oder den Fehler) hinzu
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            # Optional: Sofortige Bereinigung (kann Laufzeit verlängern)
+            # cleanup_temp_files()
     end_time = datetime.now()
     duration = end_time - start_time
     print(f"Agent processing finished in {duration}.")
+    progress(0.9, desc="Submitting answers...")
+    # 4. Prepare Submission
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
+        cleanup_temp_files() # Aufräumen
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished in {duration}. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
+    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    final_status = "Submission attempt finished." # Default status
+    results_df = pd.DataFrame(results_log) # Erstelle DataFrame vor dem Try-Block
     try:
         response = requests.post(submit_url, json=submission_data, timeout=120)
         response.raise_for_status()
             f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
     except requests.exceptions.HTTPError as e:
         error_detail = f"Server responded with status {e.response.status_code}."
         try:
             error_json = e.response.json()
                  error_detail += f" Detail: {str(api_error)}"
         except requests.exceptions.JSONDecodeError:
             error_detail += f" Response: {e.response.text[:500]}"
+        final_status = f"Submission Failed: {error_detail}"
+        print(final_status)
     except requests.exceptions.Timeout:
+        final_status = "Submission Failed: The request timed out."
+        print(final_status)
     except requests.exceptions.RequestException as e:
+        final_status = f"Submission Failed: Network error - {e}"
+        print(final_status)
     except Exception as e:
+        final_status = f"An unexpected error occurred during submission ({type(e).__name__}): {e}"
+        print(final_status)
     finally:
+         cleanup_temp_files() # Stelle sicher, dass aufgeräumt wird
+    progress(1, desc="Done.")
+    return final_status, results_df
+# --- Gradio Interface (mit Progress Bar) ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Smol Agents Evaluation Runner (Hugging Face)")
     gr.Markdown(
         f"""
         **Instructions:**
+        1.  Ensure `HUGGINGFACE_TOKEN` (write access) is a Secret in Space settings. Add `TAVILY_API_KEY` if using Tavily.
+        2.  Verify `requirements.txt` includes `smolagents[huggingface]`, search libs, `pypdf2`.
+        3.  Agent uses HF Inference API model: **{HF_MODEL_ID}** (change via `HF_MODEL_ID` env var).
+        4.  Log in below.
+        5.  Click 'Run Evaluation & Submit'. **This will take time.** Monitor progress below and console logs.
         ---
         **Agent Details:**
+        *   Uses `smolagents` library.
         *   Tools: Web Search ({'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}), File Download, File Reading (PDF/Text).
         """
     )
+    # Platzhalter für Login-Status (vereinfacht)
+    # Gradio's LoginButton handhabt das meiste intern
+    # profile_info = gr.State(None) # Nicht unbedingt nötig, wenn LoginButton direkt genutzt wird
+    with gr.Row():
+        login_button = gr.LoginButton()
+        # Logout nicht direkt implementiert, User kann sich auf HF ausloggen
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
+    # --- Event Handler für den Button ---
+    # Diese Funktion wird aufgerufen, wenn der Button geklickt wird.
+    # Sie erhält das OAuth-Profil, wenn der User eingeloggt ist.
+    def handle_run(request: gr.Request):
+         # Das Profil wird aus dem Request-Objekt extrahiert, wenn eingeloggt
+         profile = getattr(request, 'profile', None)
+         # Rufe die Hauptfunktion auf und gib ihre Ausgaben zurück
+         # Füge das gr.Progress() Objekt hinzu, das von Gradio verwaltet wird
+         return run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True))
     run_button.click(
+        fn=handle_run, # Verwende die Wrapper-Funktion
+        inputs=[],     # Keine expliziten Inputs nötig, Profil kommt vom Request
         outputs=[status_output, results_table],
         api_name="run_evaluation_smol"
     )
 # --- App Start (unverändert) ---
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting (Smol Agents Version - Corrected Imports) " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
         print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
         print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
     print(f"   Using HF Model via Inference API: {HF_MODEL_ID}")
+    search_tool_status = 'Disabled'
+    if USE_TAVILY: search_tool_status = 'Tavily'
+    elif USE_DUCKDUCKGO: search_tool_status = 'DuckDuckGo'
+    print(f"   Search Tool: {search_tool_status}")
+    print(f"   PDF Reading: {'Enabled' if PDF_READER_AVAILABLE else 'Disabled (PyPDF2 missing)'}")
+    print("-"*(60 + len(" App Starting (Smol Agents Version - Corrected Imports) ")) + "\n")
+    print("Launching Gradio Interface for Smol Agents Evaluation...")
+    # Setze queue=True für bessere Handhabung langer Läufe
+    demo.queue().launch(debug=False, share=False)