Update app.py
Browse files
app.py
CHANGED
|
@@ -2,128 +2,552 @@ import os
|
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
import pandas as pd
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
try:
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
space_id = os.getenv("SPACE_ID")
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
#
|
| 50 |
try:
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
except Exception as e:
|
| 53 |
-
|
|
|
|
| 54 |
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
try:
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
except Exception as e:
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
continue
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
try:
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
f"
|
| 97 |
-
f"User: {
|
| 98 |
-
f"Score: {
|
| 99 |
-
f"({
|
| 100 |
-
f"
|
| 101 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
except Exception as e:
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
df = pd.DataFrame(records)
|
| 106 |
-
return status, df
|
| 107 |
|
| 108 |
-
# --- Gradio
|
| 109 |
with gr.Blocks() as demo:
|
| 110 |
-
gr.Markdown("#
|
| 111 |
-
gr.Markdown(
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
gr.LoginButton()
|
| 120 |
-
run_btn = gr.Button("Run Evaluation & Submit All Answers")
|
| 121 |
-
status_out = gr.Textbox(label="Status / Ergebnis", lines=5, interactive=False)
|
| 122 |
-
result_table = gr.DataFrame(label="Fragen & Antworten", wrap=True)
|
| 123 |
-
run_btn.click(fn=run_and_submit_all, inputs=[], outputs=[status_out, result_table])
|
| 124 |
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
if __name__ == "__main__":
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
import pandas as pd
|
| 5 |
+
import re
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
import time
|
| 8 |
+
import tempfile # Für temporäre Dateien
|
| 9 |
+
import atexit # Zum Aufräumen beim Beenden
|
| 10 |
+
|
| 11 |
+
# --- Smol Agent und HF Imports ---
|
| 12 |
+
from smol_agent import Agent
|
| 13 |
+
from smol_agent.llm.huggingface import InferenceAPI
|
| 14 |
+
from smol_agent.tools import tool
|
| 15 |
+
from huggingface_hub import HfApi, InferenceClient
|
| 16 |
+
|
| 17 |
+
# --- Suchtool Imports (wähle eins) ---
|
| 18 |
+
USE_TAVILY = False # Setze auf True, wenn du Tavily bevorzugst (benötigt TAVILY_API_KEY)
|
| 19 |
+
USE_DUCKDUCKGO = True # Setze auf True für DuckDuckGo (kein Key nötig)
|
| 20 |
+
|
| 21 |
+
if USE_TAVILY:
|
| 22 |
+
try:
|
| 23 |
+
from tavily import TavilyClient
|
| 24 |
+
except ImportError:
|
| 25 |
+
print("WARNUNG: TavilyClient nicht installiert. Führe 'pip install tavily-python' aus.")
|
| 26 |
+
USE_TAVILY = False
|
| 27 |
+
USE_DUCKDUCKGO = True # Fallback
|
| 28 |
+
if USE_DUCKDUCKGO:
|
| 29 |
+
try:
|
| 30 |
+
from duckduckgo_search import DDGS
|
| 31 |
+
except ImportError:
|
| 32 |
+
print("WARNUNG: duckduckgo-search nicht installiert. Führe 'pip install duckduckgo-search' aus.")
|
| 33 |
+
USE_DUCKDUCKGO = False
|
| 34 |
+
|
| 35 |
+
# --- PDF Reader Import ---
|
| 36 |
+
try:
|
| 37 |
+
import PyPDF2
|
| 38 |
+
PDF_READER_AVAILABLE = True
|
| 39 |
+
except ImportError:
|
| 40 |
+
PDF_READER_AVAILABLE = False
|
| 41 |
+
print("WARNUNG: PyPDF2 nicht installiert. PDF-Lesefunktion ist deaktiviert.")
|
| 42 |
+
|
| 43 |
+
# --- Konstanten ---
|
| 44 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 45 |
+
# Wähle ein Instruction-Following Modell von Hugging Face Hub
|
| 46 |
+
# Beispiele: "meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "google/gemma-1.1-7b-it"
|
| 47 |
+
# Stelle sicher, dass das Modell über die kostenlose Inference API verfügbar ist oder du Inference Endpoints verwendest.
|
| 48 |
+
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") # Standardmodell, kann über Env Var überschrieben werden
|
| 49 |
+
|
| 50 |
+
# --- Globale Variablen für Clients (werden in initialize_agent gesetzt) ---
|
| 51 |
+
hf_token = None
|
| 52 |
+
search_client = None
|
| 53 |
+
agent_instance = None # Wird pro Lauf initialisiert
|
| 54 |
+
|
| 55 |
+
# --- Temporäre Datei Verwaltung ---
|
| 56 |
+
temp_files_to_clean = set()
|
| 57 |
|
| 58 |
+
def cleanup_temp_files():
|
| 59 |
+
print("Cleaning up temporary files...")
|
| 60 |
+
for file_path in list(temp_files_to_clean):
|
| 61 |
try:
|
| 62 |
+
if os.path.exists(file_path):
|
| 63 |
+
os.remove(file_path)
|
| 64 |
+
print(f"Removed temporary file: {file_path}")
|
| 65 |
+
temp_files_to_clean.remove(file_path)
|
| 66 |
+
except OSError as e:
|
| 67 |
+
print(f"Error removing temporary file {file_path}: {e}")
|
| 68 |
+
|
| 69 |
+
# Registriere die Cleanup-Funktion für das Beenden des Skripts
|
| 70 |
+
atexit.register(cleanup_temp_files)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# --- Tool Definitionen für smol-agent ---
|
| 74 |
+
|
| 75 |
+
@tool
|
| 76 |
+
def search_web(query: str, max_results: int = 3) -> str:
|
| 77 |
+
"""
|
| 78 |
+
Searches the web for the given query and returns a summary of the top results.
|
| 79 |
+
Use this to find recent information or facts not readily available.
|
| 80 |
+
Args:
|
| 81 |
+
query (str): The search query.
|
| 82 |
+
max_results (int): The maximum number of results to return (default 3).
|
| 83 |
+
Returns:
|
| 84 |
+
str: A string containing the search results, or an error message.
|
| 85 |
+
"""
|
| 86 |
+
print(f"Tool: search_web(query='{query}', max_results={max_results})")
|
| 87 |
+
if not search_client:
|
| 88 |
+
return "Search tool is not available/configured."
|
| 89 |
+
try:
|
| 90 |
+
if USE_TAVILY and isinstance(search_client, TavilyClient):
|
| 91 |
+
response = search_client.search(query=query, search_depth="basic", max_results=max_results)
|
| 92 |
+
context = [{"url": obj["url"], "content": obj["content"]} for obj in response.get('results', [])]
|
| 93 |
+
if not context: return "No search results found."
|
| 94 |
+
return "\n".join([f"URL: {c['url']}\nContent: {c['content'][:500]}..." for c in context])
|
| 95 |
+
elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
|
| 96 |
+
results = search_client.text(query, max_results=max_results)
|
| 97 |
+
if not results: return "No search results found."
|
| 98 |
+
return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:500]}..." for r in results])
|
| 99 |
+
else:
|
| 100 |
+
return "No compatible search client configured."
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f"Search API Error: {e}")
|
| 103 |
+
return f"Error during search: {e}"
|
| 104 |
+
|
| 105 |
+
@tool
|
| 106 |
+
def download_task_file(task_id: str) -> str:
|
| 107 |
+
"""
|
| 108 |
+
Downloads a file associated with a specific task ID from the evaluation server.
|
| 109 |
+
Use this ONLY if the question requires information from a specific file linked to the task.
|
| 110 |
+
Args:
|
| 111 |
+
task_id (str): The unique identifier for the task whose file needs to be downloaded.
|
| 112 |
+
Returns:
|
| 113 |
+
str: The local path to the downloaded file (e.g., '/tmp/tmpXYZ.pdf') if successful,
|
| 114 |
+
otherwise an error message or 'No file found'.
|
| 115 |
+
"""
|
| 116 |
+
print(f"Tool: download_task_file(task_id='{task_id}')")
|
| 117 |
+
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 118 |
+
|
| 119 |
+
try:
|
| 120 |
+
response = requests.get(file_url, stream=True, timeout=30) # Erhöhtes Timeout
|
| 121 |
+
response.raise_for_status() # Löst Fehler für 4xx/5xx aus
|
| 122 |
+
|
| 123 |
+
# Bestimme Dateiendung aus Content-Type
|
| 124 |
+
content_type = response.headers.get('content-type', '').lower()
|
| 125 |
+
suffix = ".tmp" # Standard-Suffix
|
| 126 |
+
if 'pdf' in content_type:
|
| 127 |
+
suffix = ".pdf"
|
| 128 |
+
elif 'png' in content_type:
|
| 129 |
+
suffix = ".png"
|
| 130 |
+
elif 'jpeg' in content_type or 'jpg' in content_type:
|
| 131 |
+
suffix = ".jpg"
|
| 132 |
+
elif 'csv' in content_type:
|
| 133 |
+
suffix = ".csv"
|
| 134 |
+
elif 'plain' in content_type or 'text' in content_type:
|
| 135 |
+
suffix = ".txt"
|
| 136 |
+
|
| 137 |
+
# Erstelle eine sichere temporäre Datei
|
| 138 |
+
temp_dir = tempfile.gettempdir()
|
| 139 |
+
# Verwende task_id im Dateinamen für bessere Nachverfolgbarkeit (optional)
|
| 140 |
+
safe_task_id = re.sub(r'[^\w\-]+', '_', task_id) # Bereinige task_id für Dateinamen
|
| 141 |
+
temp_file_path = os.path.join(temp_dir, f"gaia_task_{safe_task_id}{suffix}")
|
| 142 |
+
|
| 143 |
+
with open(temp_file_path, 'wb') as f:
|
| 144 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 145 |
+
f.write(chunk)
|
| 146 |
+
|
| 147 |
+
print(f"File downloaded successfully to {temp_file_path}")
|
| 148 |
+
temp_files_to_clean.add(temp_file_path) # Füge zur Cleanup-Liste hinzu
|
| 149 |
+
return temp_file_path # Gib den Pfad zurück
|
| 150 |
+
|
| 151 |
+
except requests.exceptions.HTTPError as e:
|
| 152 |
+
if e.response.status_code == 404:
|
| 153 |
+
print(f"No file found on server for task_id {task_id}.")
|
| 154 |
+
return "Error: No file found for this task ID."
|
| 155 |
+
else:
|
| 156 |
+
print(f"HTTP Error downloading file for task {task_id}: {e}")
|
| 157 |
+
return f"Error: Failed to download file (HTTP {e.response.status_code})."
|
| 158 |
+
except requests.exceptions.RequestException as e:
|
| 159 |
+
print(f"Network Error downloading file for task {task_id}: {e}")
|
| 160 |
+
return f"Error: Failed to download file due to network issue: {e}"
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f"Unexpected error downloading file for task {task_id}: {e}")
|
| 163 |
+
return f"Error: Unexpected error during file download: {e}"
|
| 164 |
+
|
| 165 |
+
@tool
|
| 166 |
+
def read_file_content(file_path: str) -> str:
|
| 167 |
+
"""
|
| 168 |
+
Reads the text content of a previously downloaded file (PDF or plain text).
|
| 169 |
+
Use this tool AFTER 'download_task_file' has successfully returned a file path.
|
| 170 |
+
Args:
|
| 171 |
+
file_path (str): The local path to the file (must be a path returned by 'download_task_file').
|
| 172 |
+
Returns:
|
| 173 |
+
str: The extracted text content (truncated if very long), or an error message.
|
| 174 |
+
"""
|
| 175 |
+
print(f"Tool: read_file_content(file_path='{file_path}')")
|
| 176 |
+
|
| 177 |
+
# Sicherheitscheck: Erlaube nur Lesen aus dem Temp-Verzeichnis
|
| 178 |
+
if not file_path or not file_path.startswith(tempfile.gettempdir()):
|
| 179 |
+
print(f"Security Alert: Attempted to read file outside temp directory: {file_path}")
|
| 180 |
+
return "Error: Invalid file path provided. Only downloaded files can be read."
|
| 181 |
+
|
| 182 |
+
if not os.path.exists(file_path):
|
| 183 |
+
print(f"Error: File not found at path: {file_path}")
|
| 184 |
+
return "Error: File not found at the specified path."
|
| 185 |
+
|
| 186 |
+
try:
|
| 187 |
+
if file_path.lower().endswith(".pdf"):
|
| 188 |
+
if not PDF_READER_AVAILABLE:
|
| 189 |
+
return "Error: Cannot read PDF file because PyPDF2 library is not installed."
|
| 190 |
+
text = ""
|
| 191 |
+
with open(file_path, 'rb') as f:
|
| 192 |
+
reader = PyPDF2.PdfReader(f)
|
| 193 |
+
for page_num in range(len(reader.pages)):
|
| 194 |
+
page = reader.pages[page_num]
|
| 195 |
+
text += page.extract_text() or "" # Füge leeren String hinzu, falls extract_text None zurückgibt
|
| 196 |
+
if len(text) > 7000: # Begrenze die Länge stärker
|
| 197 |
+
text = text[:7000] + "\n... (content truncated)"
|
| 198 |
+
break
|
| 199 |
+
print(f"Successfully read ~{len(text)} chars from PDF: {os.path.basename(file_path)}")
|
| 200 |
+
return f"Content of '{os.path.basename(file_path)}':\n{text}"
|
| 201 |
+
|
| 202 |
+
elif file_path.lower().endswith((".png", ".jpg", ".jpeg")):
|
| 203 |
+
print(f"File is an image: {os.path.basename(file_path)}. Reading image content not implemented.")
|
| 204 |
+
# Hier könnte man später ein VLM-Tool einbinden
|
| 205 |
+
return f"Observation: File '{os.path.basename(file_path)}' is an image. Cannot read text content."
|
| 206 |
+
|
| 207 |
+
else: # Versuche als Text zu lesen
|
| 208 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 209 |
+
content = f.read(7000) # Begrenze auf 7000 Zeichen
|
| 210 |
+
if len(content) == 7000:
|
| 211 |
+
content += "\n... (content truncated)"
|
| 212 |
+
print(f"Successfully read ~{len(content)} chars from text file: {os.path.basename(file_path)}")
|
| 213 |
+
return f"Content of '{os.path.basename(file_path)}':\n{content}"
|
| 214 |
+
|
| 215 |
+
except Exception as e:
|
| 216 |
+
print(f"Error reading file {file_path}: {e}")
|
| 217 |
+
return f"Error: Failed to read file content: {e}"
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
# --- Agent Initialisierung ---
|
| 221 |
+
def initialize_agent():
|
| 222 |
+
"""Initialisiert den smol-agent und die benötigten Clients."""
|
| 223 |
+
global hf_token, search_client, agent_instance
|
| 224 |
+
print("Initializing agent and clients...")
|
| 225 |
+
|
| 226 |
+
hf_token = os.getenv("HUGGINGFACE_TOKEN")
|
| 227 |
+
if not hf_token:
|
| 228 |
+
raise ValueError("HUGGINGFACE_TOKEN Secret nicht im Hugging Face Space gefunden!")
|
| 229 |
+
|
| 230 |
+
# --- Search Client ---
|
| 231 |
+
if USE_TAVILY:
|
| 232 |
+
tavily_key = os.getenv("TAVILY_API_KEY")
|
| 233 |
+
if tavily_key:
|
| 234 |
+
search_client = TavilyClient(api_key=tavily_key)
|
| 235 |
+
print("Using Tavily for search.")
|
| 236 |
+
else:
|
| 237 |
+
print("WARNUNG: TAVILY_API_KEY nicht gefunden, obwohl USE_TAVILY=True.")
|
| 238 |
+
# Fallback auf DuckDuckGo wenn möglich
|
| 239 |
+
if USE_DUCKDUCKGO:
|
| 240 |
+
try:
|
| 241 |
+
search_client = DDGS()
|
| 242 |
+
print("Falling back to DuckDuckGo for search.")
|
| 243 |
+
except NameError:
|
| 244 |
+
search_client = None
|
| 245 |
+
print("WARNUNG: DuckDuckGo auch nicht verfügbar. Suche deaktiviert.")
|
| 246 |
+
else:
|
| 247 |
+
search_client = None
|
| 248 |
+
print("WARNUNG: Suche deaktiviert.")
|
| 249 |
+
elif USE_DUCKDUCKGO:
|
| 250 |
+
try:
|
| 251 |
+
search_client = DDGS()
|
| 252 |
+
print("Using DuckDuckGo for search.")
|
| 253 |
+
except NameError:
|
| 254 |
+
search_client = None
|
| 255 |
+
print("WARNUNG: DuckDuckGo nicht verfügbar. Suche deaktiviert.")
|
| 256 |
+
else:
|
| 257 |
+
search_client = None
|
| 258 |
+
print("Web search is disabled.")
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
# --- LLM Client (Hugging Face Inference API) ---
|
| 262 |
+
llm = InferenceAPI(
|
| 263 |
+
model_id=HF_MODEL_ID,
|
| 264 |
+
token=hf_token,
|
| 265 |
+
max_new_tokens=1500, # Erhöhe ggf. die max. Token für komplexe Antworten
|
| 266 |
+
temperature=0.1, # Niedrige Temperatur für Fakten
|
| 267 |
+
# Weitere Parameter nach Bedarf: top_p, top_k, repetition_penalty etc.
|
| 268 |
+
)
|
| 269 |
+
print(f"LLM configured with model: {HF_MODEL_ID}")
|
| 270 |
+
|
| 271 |
+
# --- Agent Instanz ---
|
| 272 |
+
available_tools = [search_web, download_task_file, read_file_content]
|
| 273 |
+
agent_instance = Agent(
|
| 274 |
+
llm=llm,
|
| 275 |
+
# tools=available_tools, # Tools werden dynamisch in run() übergeben
|
| 276 |
+
# system_prompt=... (optional, kann auch im run() Prompt sein)
|
| 277 |
+
)
|
| 278 |
+
print(f"Smol Agent initialized with {len(available_tools)} tools.")
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
# --- Angepasste Hauptfunktion ---
|
| 282 |
+
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 283 |
+
"""
|
| 284 |
+
Fetches all questions, runs the smol-agent on them, submits all answers,
|
| 285 |
+
and displays the results.
|
| 286 |
+
"""
|
| 287 |
space_id = os.getenv("SPACE_ID")
|
| 288 |
|
| 289 |
+
if profile:
|
| 290 |
+
username= f"{profile.username}"
|
| 291 |
+
print(f"User logged in: {username}")
|
| 292 |
+
else:
|
| 293 |
+
print("User not logged in.")
|
| 294 |
+
return "Please Login to Hugging Face with the button.", None
|
| 295 |
+
|
| 296 |
+
api_url = DEFAULT_API_URL
|
| 297 |
+
questions_url = f"{api_url}/questions"
|
| 298 |
+
submit_url = f"{api_url}/submit"
|
| 299 |
|
| 300 |
+
# 1. Initialisiere Agent und Clients (bei jedem Lauf)
|
| 301 |
try:
|
| 302 |
+
initialize_agent()
|
| 303 |
+
if not agent_instance: # Zusätzliche Prüfung
|
| 304 |
+
raise RuntimeError("Agent instance could not be initialized.")
|
| 305 |
+
except ValueError as e:
|
| 306 |
+
print(f"Error during initialization: {e}")
|
| 307 |
+
return f"Configuration Error: {e}", None
|
| 308 |
except Exception as e:
|
| 309 |
+
print(f"Error initializing agent/clients: {e}")
|
| 310 |
+
return f"Error initializing agent: {e}", None
|
| 311 |
|
| 312 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local Run - Code Link N/A"
|
| 313 |
+
print(f"Agent Code Link: {agent_code}")
|
| 314 |
+
|
| 315 |
+
# 2. Fetch Questions (wie zuvor)
|
| 316 |
+
print(f"Fetching questions from: {questions_url}")
|
| 317 |
try:
|
| 318 |
+
response = requests.get(questions_url, timeout=30)
|
| 319 |
+
response.raise_for_status()
|
| 320 |
+
questions_data = response.json()
|
| 321 |
+
if not questions_data or not isinstance(questions_data, list):
|
| 322 |
+
print(f"Fetched questions list is empty or invalid format: {questions_data}")
|
| 323 |
+
return "Fetched questions list is empty or invalid format.", None
|
| 324 |
+
print(f"Fetched {len(questions_data)} questions.")
|
| 325 |
except Exception as e:
|
| 326 |
+
# Detailiertere Fehlermeldung
|
| 327 |
+
print(f"Error fetching questions ({type(e).__name__}): {e}")
|
| 328 |
+
return f"Error fetching questions: {e}", None
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
# 3. Run your Smol Agent
|
| 332 |
+
start_time = datetime.now()
|
| 333 |
+
results_log = []
|
| 334 |
+
answers_payload = []
|
| 335 |
+
print(f"Running smol-agent on {len(questions_data)} questions using {HF_MODEL_ID}...")
|
| 336 |
+
status_updates = []
|
| 337 |
+
|
| 338 |
+
for i, item in enumerate(questions_data):
|
| 339 |
+
task_id = item.get("task_id")
|
| 340 |
+
question_text = item.get("question")
|
| 341 |
+
|
| 342 |
+
if not task_id or question_text is None:
|
| 343 |
+
print(f"Skipping item with missing task_id or question: {item}")
|
| 344 |
continue
|
| 345 |
+
|
| 346 |
+
current_status = f"Processing question {i+1}/{len(questions_data)} (Task ID: {task_id})..."
|
| 347 |
+
print(current_status)
|
| 348 |
+
status_updates.append(current_status)
|
| 349 |
+
|
| 350 |
+
# --- Prompt für smol-agent ---
|
| 351 |
+
# Wichtig: Klare Anweisung für das Endformat geben!
|
| 352 |
+
# Gib dem Agenten den Task-ID Kontext mit!
|
| 353 |
+
agent_prompt = f"""
|
| 354 |
+
You are an expert AI assistant solving a challenge question.
|
| 355 |
+
Your task is to answer the following question accurately and concisely.
|
| 356 |
+
Use the available tools ONLY when necessary to find information or access required files.
|
| 357 |
+
|
| 358 |
+
**Available Tools:**
|
| 359 |
+
* `search_web(query: str, max_results: int = 3)`: Searches the web.
|
| 360 |
+
* `download_task_file(task_id: str)`: Downloads the specific file for a task. Use the task_id '{task_id}' if you need the file for THIS question. Returns the local file path.
|
| 361 |
+
* `read_file_content(file_path: str)`: Reads text from a downloaded file using the path returned by download_task_file.
|
| 362 |
+
|
| 363 |
+
**Current Task:**
|
| 364 |
+
* Task ID: {task_id}
|
| 365 |
+
* Question: {question_text}
|
| 366 |
+
|
| 367 |
+
**Instructions:**
|
| 368 |
+
1. Think step-by-step to break down the question.
|
| 369 |
+
2. Use the tools provided if you need external information or file content. Make sure to use the correct task_id ('{task_id}') for `download_task_file`.
|
| 370 |
+
3. Reason through the information obtained.
|
| 371 |
+
4. Provide ONLY the final answer to the question, without any introductory phrases, explanations, or conversational text like "The answer is..." or "Based on my analysis...".
|
| 372 |
+
5. Format the answer exactly as requested by the question (e.g., just a year, a comma-separated list, etc.).
|
| 373 |
+
|
| 374 |
+
Begin!
|
| 375 |
+
"""
|
| 376 |
+
|
| 377 |
+
submitted_answer = f"Error: Agent failed to produce a result for task {task_id}" # Default error
|
| 378 |
+
try:
|
| 379 |
+
# Führe den Agenten aus
|
| 380 |
+
agent_response = agent_instance.run(
|
| 381 |
+
prompt=agent_prompt,
|
| 382 |
+
tools=[search_web, download_task_file, read_file_content] # Übergebe Tools hier
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
if agent_response:
|
| 386 |
+
# Versuche, die Antwort zu bereinigen (optional, je nach Modellverhalten)
|
| 387 |
+
# Einfache Bereinigung: Entferne häufige Präfixe
|
| 388 |
+
cleaned_response = re.sub(r"^(Final Answer:|The answer is:|Here is the answer:)\s*", "", agent_response.strip(), flags=re.IGNORECASE)
|
| 389 |
+
submitted_answer = cleaned_response
|
| 390 |
+
else:
|
| 391 |
+
submitted_answer = "Error: Agent returned an empty response."
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
print(f"Task {task_id} completed by agent. Raw Response: '{agent_response[:100]}...' | Submitted Answer: '{submitted_answer}'")
|
| 395 |
+
|
| 396 |
+
except Exception as e:
|
| 397 |
+
error_msg = f"SMOL_AGENT ERROR on task {task_id} ({type(e).__name__}): {e}"
|
| 398 |
+
print(error_msg)
|
| 399 |
+
# Gib einen Fehler als Antwort ein
|
| 400 |
+
submitted_answer = f"ERROR: {type(e).__name__}" # Kürzere Fehlermeldung für die Payload
|
| 401 |
+
|
| 402 |
+
finally:
|
| 403 |
+
# Füge das Ergebnis (oder den Fehler) hinzu
|
| 404 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 405 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 406 |
+
# Bereinige temporäre Dateien *sofort* nach Bearbeitung der Aufgabe
|
| 407 |
+
# (Optional, atexit macht es auch am Ende)
|
| 408 |
+
# cleanup_temp_files() # Kann hier aufgerufen werden, wenn Ressourcen knapp sind
|
| 409 |
+
|
| 410 |
+
end_time = datetime.now()
|
| 411 |
+
duration = end_time - start_time
|
| 412 |
+
print(f"Agent processing finished in {duration}.")
|
| 413 |
+
|
| 414 |
+
# 4. Prepare Submission (wie zuvor)
|
| 415 |
+
if not answers_payload:
|
| 416 |
+
print("Agent did not produce any answers to submit.")
|
| 417 |
+
# Lösche übrig gebliebene Temp-Dateien
|
| 418 |
+
cleanup_temp_files()
|
| 419 |
+
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 420 |
+
|
| 421 |
+
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 422 |
+
status_update = f"Agent finished in {duration}. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 423 |
+
print(status_update)
|
| 424 |
+
|
| 425 |
+
# 5. Submit (wie zuvor, mit Timeout)
|
| 426 |
+
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
| 427 |
try:
|
| 428 |
+
response = requests.post(submit_url, json=submission_data, timeout=120)
|
| 429 |
+
response.raise_for_status()
|
| 430 |
+
result_data = response.json()
|
| 431 |
+
final_status = (
|
| 432 |
+
f"Submission Successful!\n"
|
| 433 |
+
f"User: {result_data.get('username')}\n"
|
| 434 |
+
f"Score: {result_data.get('score', 'N/A'):.2f}% "
|
| 435 |
+
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 436 |
+
f"Message: {result_data.get('message', 'No message received.')}"
|
| 437 |
)
|
| 438 |
+
print("Submission successful.")
|
| 439 |
+
results_df = pd.DataFrame(results_log)
|
| 440 |
+
return final_status + f"\n\nAgent ({HF_MODEL_ID}) Processing Log:\n" + "\n".join(status_updates[-5:]), results_df
|
| 441 |
+
except requests.exceptions.HTTPError as e:
|
| 442 |
+
# (Fehlerbehandlung wie zuvor)
|
| 443 |
+
error_detail = f"Server responded with status {e.response.status_code}."
|
| 444 |
+
try:
|
| 445 |
+
error_json = e.response.json()
|
| 446 |
+
api_error = error_json.get('detail', e.response.text)
|
| 447 |
+
if isinstance(api_error, list) and len(api_error) > 0 and isinstance(api_error[0], dict):
|
| 448 |
+
error_detail += f" Detail: {api_error[0].get('msg', str(api_error))}"
|
| 449 |
+
else:
|
| 450 |
+
error_detail += f" Detail: {str(api_error)}"
|
| 451 |
+
except requests.exceptions.JSONDecodeError:
|
| 452 |
+
error_detail += f" Response: {e.response.text[:500]}"
|
| 453 |
+
status_message = f"Submission Failed: {error_detail}"
|
| 454 |
+
print(status_message)
|
| 455 |
+
results_df = pd.DataFrame(results_log)
|
| 456 |
+
return status_message, results_df
|
| 457 |
+
except requests.exceptions.Timeout:
|
| 458 |
+
status_message = "Submission Failed: The request timed out."
|
| 459 |
+
# ... (Rest der Fehlerbehandlung wie zuvor) ...
|
| 460 |
+
print(status_message)
|
| 461 |
+
results_df = pd.DataFrame(results_log)
|
| 462 |
+
return status_message, results_df
|
| 463 |
+
except requests.exceptions.RequestException as e:
|
| 464 |
+
status_message = f"Submission Failed: Network error - {e}"
|
| 465 |
+
print(status_message)
|
| 466 |
+
results_df = pd.DataFrame(results_log)
|
| 467 |
+
return status_message, results_df
|
| 468 |
except Exception as e:
|
| 469 |
+
status_message = f"An unexpected error occurred during submission: {e}"
|
| 470 |
+
print(status_message)
|
| 471 |
+
results_df = pd.DataFrame(results_log)
|
| 472 |
+
return status_message, results_df
|
| 473 |
+
finally:
|
| 474 |
+
# Stelle sicher, dass alle Temp-Dateien am Ende gelöscht werden
|
| 475 |
+
cleanup_temp_files()
|
| 476 |
|
|
|
|
|
|
|
| 477 |
|
| 478 |
+
# --- Gradio Interface (angepasst für smol-agent) ---
|
| 479 |
with gr.Blocks() as demo:
|
| 480 |
+
gr.Markdown("# Smol Agent Evaluation Runner (Hugging Face)")
|
| 481 |
+
gr.Markdown(
|
| 482 |
+
f"""
|
| 483 |
+
**Instructions:**
|
| 484 |
+
|
| 485 |
+
1. Ensure you have added your `HUGGINGFACE_TOKEN` (with write access) as a Secret in your Space settings. Optionally add `TAVILY_API_KEY` if using Tavily search.
|
| 486 |
+
2. Make sure `requirements.txt` includes `smol-agent[huggingface]`, search libraries (`duckduckgo-search` or `tavily-python`), and `pypdf2`.
|
| 487 |
+
3. The agent uses the Hugging Face Inference API with the model: **{HF_MODEL_ID}**. You can change this by setting the `HF_MODEL_ID` environment variable in your Space settings.
|
| 488 |
+
4. Log in to your Hugging Face account below.
|
| 489 |
+
5. Click 'Run Evaluation & Submit All Answers'. **This will take time** as the agent processes each question using the Inference API.
|
| 490 |
+
|
| 491 |
+
---
|
| 492 |
+
**Agent Details:**
|
| 493 |
+
* Uses the `smol-agent` library.
|
| 494 |
+
* Leverages Hugging Face Inference API for LLM calls.
|
| 495 |
+
* Tools: Web Search ({'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}), File Download, File Reading (PDF/Text).
|
| 496 |
+
* Check the Space console logs for detailed agent behavior.
|
| 497 |
+
"""
|
| 498 |
+
)
|
| 499 |
+
|
| 500 |
gr.LoginButton()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
|
| 502 |
+
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 503 |
+
|
| 504 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=10, interactive=False)
|
| 505 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True, height=400)
|
| 506 |
+
|
| 507 |
+
# Verwende profile als Input für die Funktion
|
| 508 |
+
def get_profile(request: gr.Request):
|
| 509 |
+
# Helper function to potentially extract profile info if needed later,
|
| 510 |
+
# Gradio's LoginButton might handle profile implicitly now.
|
| 511 |
+
# For now, just pass None if not logged in via button state.
|
| 512 |
+
# This part might need adjustment based on how Gradio passes OAuthProfile.
|
| 513 |
+
# The current run_and_submit_all signature expects OAuthProfile | None
|
| 514 |
+
# which Gradio should provide when the button is clicked if logged in.
|
| 515 |
+
# If run_button.click doesn't automatically pass the profile,
|
| 516 |
+
# we might need a different setup using gr.State or gr.Variable.
|
| 517 |
+
# Let's assume Gradio handles passing the profile for now.
|
| 518 |
+
pass # Placeholder
|
| 519 |
|
| 520 |
+
run_button.click(
|
| 521 |
+
fn=run_and_submit_all,
|
| 522 |
+
inputs=[], # Gradio's LoginButton should implicitly provide profile context
|
| 523 |
+
# If this fails, might need inputs=gr.State(profile_info) setup
|
| 524 |
+
outputs=[status_output, results_table],
|
| 525 |
+
api_name="run_evaluation_smol"
|
| 526 |
+
)
|
| 527 |
+
|
| 528 |
+
# --- App Start (unverändert) ---
|
| 529 |
if __name__ == "__main__":
|
| 530 |
+
print("\n" + "-"*30 + " App Starting (Smol Agent Version) " + "-"*30)
|
| 531 |
+
space_host_startup = os.getenv("SPACE_HOST")
|
| 532 |
+
space_id_startup = os.getenv("SPACE_ID")
|
| 533 |
+
|
| 534 |
+
# (Rest des Startblocks unverändert)
|
| 535 |
+
if space_host_startup:
|
| 536 |
+
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
| 537 |
+
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
| 538 |
+
else:
|
| 539 |
+
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 540 |
+
|
| 541 |
+
if space_id_startup:
|
| 542 |
+
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 543 |
+
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 544 |
+
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 545 |
+
else:
|
| 546 |
+
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 547 |
+
|
| 548 |
+
print(f" Using HF Model via Inference API: {HF_MODEL_ID}")
|
| 549 |
+
print(f" Search Tool: {'Tavily' if USE_TAVILY else 'DuckDuckGo' if USE_DUCKDUCKGO else 'Disabled'}")
|
| 550 |
+
print("-"*(60 + len(" App Starting (Smol Agent Version) ")) + "\n")
|
| 551 |
+
|
| 552 |
+
print("Launching Gradio Interface for Smol Agent Evaluation...")
|
| 553 |
+
demo.launch(debug=False, share=False) # Debug=False für normalen Betrieb
|