Update app.py
Browse files
app.py
CHANGED
|
@@ -66,36 +66,56 @@ atexit.register(cleanup_temp_files)
|
|
| 66 |
@tool
|
| 67 |
def search_web(query: str, max_results: int = 3) -> str:
|
| 68 |
"""
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
"""
|
| 71 |
if not search_client:
|
| 72 |
-
return "Search tool not configured."
|
| 73 |
try:
|
| 74 |
if USE_TAVILY and isinstance(search_client, TavilyClient):
|
| 75 |
res = search_client.search(query=query, search_depth="basic", max_results=max_results)
|
| 76 |
items = res.get('results', [])
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
|
| 79 |
results = search_client.text(query, max_results=max_results)
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
except Exception as e:
|
| 82 |
return f"Error during search: {e}"
|
| 83 |
-
return "No results."
|
| 84 |
|
| 85 |
@tool
|
| 86 |
def download_task_file(task_id: str) -> str:
|
| 87 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 89 |
try:
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
suffix = '.pdf' if 'pdf' in
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
path = os.path.join(
|
| 97 |
with open(path, 'wb') as f:
|
| 98 |
-
for chunk in
|
| 99 |
temp_files_to_clean.add(path)
|
| 100 |
return path
|
| 101 |
except Exception as e:
|
|
@@ -103,21 +123,35 @@ def download_task_file(task_id: str) -> str:
|
|
| 103 |
|
| 104 |
@tool
|
| 105 |
def read_file_content(file_path: str) -> str:
|
| 106 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
if not file_path.startswith(tempfile.gettempdir()):
|
| 108 |
-
return "Error: Invalid path."
|
|
|
|
|
|
|
| 109 |
try:
|
| 110 |
-
if file_path.endswith('.pdf'):
|
| 111 |
-
if not PDF_READER_AVAILABLE:
|
| 112 |
-
|
|
|
|
| 113 |
with open(file_path, 'rb') as f:
|
| 114 |
-
|
| 115 |
-
for
|
| 116 |
-
|
| 117 |
-
if len(
|
| 118 |
-
|
|
|
|
|
|
|
| 119 |
else:
|
| 120 |
-
|
|
|
|
| 121 |
except Exception as e:
|
| 122 |
return f"Error: {e}"
|
| 123 |
|
|
@@ -129,63 +163,79 @@ def initialize_agent():
|
|
| 129 |
key = os.getenv('TAVILY_API_KEY')
|
| 130 |
search_client = TavilyClient(api_key=key) if key else False
|
| 131 |
elif USE_DUCKDUCKGO:
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
| 133 |
else:
|
| 134 |
search_client = False
|
| 135 |
token = os.getenv('HUGGINGFACE_TOKEN')
|
| 136 |
if not token:
|
| 137 |
-
raise ValueError("HUGGINGFACE_TOKEN
|
| 138 |
hf_model = HfApiModel()
|
| 139 |
tools = [search_web, download_task_file, read_file_content]
|
| 140 |
-
if not search_client:
|
|
|
|
| 141 |
agent_instance = CodeAgent(tools=tools, model=hf_model)
|
| 142 |
|
| 143 |
-
# ---
|
| 144 |
def run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True)):
|
| 145 |
-
#
|
| 146 |
if isinstance(profile, str):
|
| 147 |
-
try:
|
| 148 |
-
|
|
|
|
|
|
|
| 149 |
if not profile or 'username' not in profile:
|
| 150 |
return "Bitte zuerst einloggen.", None
|
| 151 |
username = profile['username']
|
|
|
|
| 152 |
try:
|
| 153 |
initialize_agent()
|
| 154 |
except Exception as e:
|
| 155 |
-
return f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
-
# Fragen holen
|
| 158 |
-
questions = requests.get(f"{DEFAULT_API_URL}/questions").json()
|
| 159 |
logs, payload = [], []
|
| 160 |
-
for item in progress.tqdm(questions, desc="Bearbeite"
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
| 164 |
try:
|
| 165 |
-
|
| 166 |
-
|
| 167 |
except Exception as e:
|
| 168 |
-
|
| 169 |
-
logs.append({'Task ID':
|
| 170 |
-
payload.append({'task_id':
|
|
|
|
| 171 |
df = pd.DataFrame(logs)
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
try:
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
| 179 |
except Exception as e:
|
| 180 |
-
status = f"
|
|
|
|
| 181 |
cleanup_temp_files()
|
| 182 |
return status, df
|
| 183 |
|
| 184 |
# --- Gradio UI ---
|
| 185 |
with gr.Blocks() as demo:
|
| 186 |
gr.Markdown("# Smol CodeAgent Evaluation Runner")
|
| 187 |
-
gr.Markdown("Bitte einloggen und dann auf
|
| 188 |
-
with gr.Row():
|
|
|
|
| 189 |
run_btn = gr.Button("Run Evaluation & Submit All Answers")
|
| 190 |
out_status = gr.Textbox(label="Status", lines=5)
|
| 191 |
out_table = gr.DataFrame(label="Ergebnisse")
|
|
|
|
| 66 |
@tool
|
| 67 |
def search_web(query: str, max_results: int = 3) -> str:
|
| 68 |
"""
|
| 69 |
+
Search the web for the given query and return a summary of the top results.
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
query (str): The search query to look up online.
|
| 73 |
+
max_results (int): The maximum number of search results to retrieve.
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
str: A concatenated string summarizing the URLs and snippets of the results.
|
| 77 |
"""
|
| 78 |
if not search_client:
|
| 79 |
+
return "Error: Search tool not configured."
|
| 80 |
try:
|
| 81 |
if USE_TAVILY and isinstance(search_client, TavilyClient):
|
| 82 |
res = search_client.search(query=query, search_depth="basic", max_results=max_results)
|
| 83 |
items = res.get('results', [])
|
| 84 |
+
if not items:
|
| 85 |
+
return "No search results found."
|
| 86 |
+
return "\n".join([f"URL: {i['url']}\nContent: {i.get('content','')[:200]}..." for i in items])
|
| 87 |
elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
|
| 88 |
results = search_client.text(query, max_results=max_results)
|
| 89 |
+
if not results:
|
| 90 |
+
return "No search results found."
|
| 91 |
+
return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:200]}..." for r in results])
|
| 92 |
+
else:
|
| 93 |
+
return "Error: No compatible search client available."
|
| 94 |
except Exception as e:
|
| 95 |
return f"Error during search: {e}"
|
|
|
|
| 96 |
|
| 97 |
@tool
|
| 98 |
def download_task_file(task_id: str) -> str:
|
| 99 |
+
"""
|
| 100 |
+
Download the file associated with a specific task ID from the evaluation server.
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
task_id (str): Unique identifier of the task whose file should be downloaded.
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
str: Local filesystem path to the downloaded file, or an error message.
|
| 107 |
+
"""
|
| 108 |
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 109 |
try:
|
| 110 |
+
response = requests.get(url, stream=True, timeout=30)
|
| 111 |
+
response.raise_for_status()
|
| 112 |
+
content_type = response.headers.get('content-type', '')
|
| 113 |
+
suffix = '.pdf' if 'pdf' in content_type else '.tmp'
|
| 114 |
+
tmp_dir = tempfile.gettempdir()
|
| 115 |
+
filename = f"task_{task_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}{suffix}"
|
| 116 |
+
path = os.path.join(tmp_dir, filename)
|
| 117 |
with open(path, 'wb') as f:
|
| 118 |
+
for chunk in response.iter_content(8192): f.write(chunk)
|
| 119 |
temp_files_to_clean.add(path)
|
| 120 |
return path
|
| 121 |
except Exception as e:
|
|
|
|
| 123 |
|
| 124 |
@tool
|
| 125 |
def read_file_content(file_path: str) -> str:
|
| 126 |
+
"""
|
| 127 |
+
Read the text content of a previously downloaded file (PDF or plain text).
|
| 128 |
+
|
| 129 |
+
Args:
|
| 130 |
+
file_path (str): Absolute local path to the file to read (from download_task_file).
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
str: Extracted text content truncated if necessary, or an error message.
|
| 134 |
+
"""
|
| 135 |
if not file_path.startswith(tempfile.gettempdir()):
|
| 136 |
+
return "Error: Invalid file path."
|
| 137 |
+
if not os.path.exists(file_path):
|
| 138 |
+
return "Error: File does not exist."
|
| 139 |
try:
|
| 140 |
+
if file_path.lower().endswith('.pdf'):
|
| 141 |
+
if not PDF_READER_AVAILABLE:
|
| 142 |
+
return "Error: PyPDF2 not installed."
|
| 143 |
+
text = ''
|
| 144 |
with open(file_path, 'rb') as f:
|
| 145 |
+
reader = PyPDF2.PdfReader(f)
|
| 146 |
+
for page in reader.pages:
|
| 147 |
+
text += page.extract_text() or ''
|
| 148 |
+
if len(text) > 7000:
|
| 149 |
+
text = text[:7000] + '\n... (truncated)'
|
| 150 |
+
break
|
| 151 |
+
return text
|
| 152 |
else:
|
| 153 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 154 |
+
return f.read(7000)
|
| 155 |
except Exception as e:
|
| 156 |
return f"Error: {e}"
|
| 157 |
|
|
|
|
| 163 |
key = os.getenv('TAVILY_API_KEY')
|
| 164 |
search_client = TavilyClient(api_key=key) if key else False
|
| 165 |
elif USE_DUCKDUCKGO:
|
| 166 |
+
try:
|
| 167 |
+
search_client = DDGS()
|
| 168 |
+
except:
|
| 169 |
+
search_client = False
|
| 170 |
else:
|
| 171 |
search_client = False
|
| 172 |
token = os.getenv('HUGGINGFACE_TOKEN')
|
| 173 |
if not token:
|
| 174 |
+
raise ValueError("HUGGINGFACE_TOKEN environment variable is required.")
|
| 175 |
hf_model = HfApiModel()
|
| 176 |
tools = [search_web, download_task_file, read_file_content]
|
| 177 |
+
if not search_client:
|
| 178 |
+
tools.remove(search_web)
|
| 179 |
agent_instance = CodeAgent(tools=tools, model=hf_model)
|
| 180 |
|
| 181 |
+
# --- Main Logic ---
|
| 182 |
def run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True)):
|
| 183 |
+
# Parse profile if passed as JSON string
|
| 184 |
if isinstance(profile, str):
|
| 185 |
+
try:
|
| 186 |
+
profile = json.loads(profile)
|
| 187 |
+
except json.JSONDecodeError:
|
| 188 |
+
return "Error: Invalid profile format.", None
|
| 189 |
if not profile or 'username' not in profile:
|
| 190 |
return "Bitte zuerst einloggen.", None
|
| 191 |
username = profile['username']
|
| 192 |
+
|
| 193 |
try:
|
| 194 |
initialize_agent()
|
| 195 |
except Exception as e:
|
| 196 |
+
return f"Initialization Error: {e}", None
|
| 197 |
+
|
| 198 |
+
# Fetch questions
|
| 199 |
+
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
|
| 200 |
+
resp.raise_for_status()
|
| 201 |
+
questions = resp.json()
|
| 202 |
|
|
|
|
|
|
|
| 203 |
logs, payload = [], []
|
| 204 |
+
for item in progress.tqdm(questions, desc="Bearbeite Fragen"):
|
| 205 |
+
task_id = item.get('task_id')
|
| 206 |
+
question = item.get('question')
|
| 207 |
+
if not task_id or question is None:
|
| 208 |
+
continue
|
| 209 |
+
prompt = f"Task {task_id}: {question}"
|
| 210 |
try:
|
| 211 |
+
result = agent_instance.run(prompt=prompt)
|
| 212 |
+
answer = re.sub(r"^(Answer:|Final Answer:)", "", result or "").strip()
|
| 213 |
except Exception as e:
|
| 214 |
+
answer = f"ERROR: {e}"
|
| 215 |
+
logs.append({'Task ID': task_id, 'Question': question, 'Submitted Answer': answer})
|
| 216 |
+
payload.append({'task_id': task_id, 'submitted_answer': answer})
|
| 217 |
+
|
| 218 |
df = pd.DataFrame(logs)
|
| 219 |
|
| 220 |
+
# Submit answers
|
| 221 |
+
submission = {'username': username, 'agent_code': '...', 'answers': payload}
|
| 222 |
try:
|
| 223 |
+
post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=180)
|
| 224 |
+
post.raise_for_status()
|
| 225 |
+
score = post.json().get('score', 0)
|
| 226 |
+
status = f"Erfolg! Score: {score:.2f}%"
|
| 227 |
except Exception as e:
|
| 228 |
+
status = f"Submission Error: {e}"
|
| 229 |
+
|
| 230 |
cleanup_temp_files()
|
| 231 |
return status, df
|
| 232 |
|
| 233 |
# --- Gradio UI ---
|
| 234 |
with gr.Blocks() as demo:
|
| 235 |
gr.Markdown("# Smol CodeAgent Evaluation Runner")
|
| 236 |
+
gr.Markdown("Bitte einloggen und dann auf "Run Evaluation & Submit All Answers" klicken.")
|
| 237 |
+
with gr.Row():
|
| 238 |
+
login_btn = gr.LoginButton()
|
| 239 |
run_btn = gr.Button("Run Evaluation & Submit All Answers")
|
| 240 |
out_status = gr.Textbox(label="Status", lines=5)
|
| 241 |
out_table = gr.DataFrame(label="Ergebnisse")
|