pmeyhoefer commited on
Commit
ad7b1a7
·
verified ·
1 Parent(s): 0cf07a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -52
app.py CHANGED
@@ -66,36 +66,56 @@ atexit.register(cleanup_temp_files)
66
  @tool
67
  def search_web(query: str, max_results: int = 3) -> str:
68
  """
69
- Websuche via Tavily oder DuckDuckGo.
 
 
 
 
 
 
 
70
  """
71
  if not search_client:
72
- return "Search tool not configured."
73
  try:
74
  if USE_TAVILY and isinstance(search_client, TavilyClient):
75
  res = search_client.search(query=query, search_depth="basic", max_results=max_results)
76
  items = res.get('results', [])
77
- return "\n".join([f"URL: {i['url']}\n{ i['content'][:200] }..." for i in items])
 
 
78
  elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
79
  results = search_client.text(query, max_results=max_results)
80
- return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\n{r['body'][:200]}..." for r in results])
 
 
 
 
81
  except Exception as e:
82
  return f"Error during search: {e}"
83
- return "No results."
84
 
85
  @tool
86
  def download_task_file(task_id: str) -> str:
87
- """Download einer Datei zur Task ID vom Server."""
 
 
 
 
 
 
 
 
88
  url = f"{DEFAULT_API_URL}/files/{task_id}"
89
  try:
90
- r = requests.get(url, stream=True, timeout=30)
91
- r.raise_for_status()
92
- ct = r.headers.get('content-type', '')
93
- suffix = '.pdf' if 'pdf' in ct else '.tmp'
94
- tmp = tempfile.gettempdir()
95
- name = f"task_{task_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}{suffix}"
96
- path = os.path.join(tmp, name)
97
  with open(path, 'wb') as f:
98
- for chunk in r.iter_content(8192): f.write(chunk)
99
  temp_files_to_clean.add(path)
100
  return path
101
  except Exception as e:
@@ -103,21 +123,35 @@ def download_task_file(task_id: str) -> str:
103
 
104
  @tool
105
  def read_file_content(file_path: str) -> str:
106
- """Liest Text aus einer heruntergeladenen Datei."""
 
 
 
 
 
 
 
 
107
  if not file_path.startswith(tempfile.gettempdir()):
108
- return "Error: Invalid path."
 
 
109
  try:
110
- if file_path.endswith('.pdf'):
111
- if not PDF_READER_AVAILABLE: return "Error: PyPDF2 fehlt."
112
- txt = ''
 
113
  with open(file_path, 'rb') as f:
114
- rdr = PyPDF2.PdfReader(f)
115
- for p in rdr.pages:
116
- txt += p.extract_text() or ''
117
- if len(txt) > 5000: break
118
- return txt
 
 
119
  else:
120
- return open(file_path, 'r', encoding='utf-8', errors='ignore').read(5000)
 
121
  except Exception as e:
122
  return f"Error: {e}"
123
 
@@ -129,63 +163,79 @@ def initialize_agent():
129
  key = os.getenv('TAVILY_API_KEY')
130
  search_client = TavilyClient(api_key=key) if key else False
131
  elif USE_DUCKDUCKGO:
132
- search_client = DDGS()
 
 
 
133
  else:
134
  search_client = False
135
  token = os.getenv('HUGGINGFACE_TOKEN')
136
  if not token:
137
- raise ValueError("HUGGINGFACE_TOKEN fehlt.")
138
  hf_model = HfApiModel()
139
  tools = [search_web, download_task_file, read_file_content]
140
- if not search_client: tools = [t for t in tools if t != search_web]
 
141
  agent_instance = CodeAgent(tools=tools, model=hf_model)
142
 
143
- # --- Hauptfunktion ---
144
  def run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True)):
145
- # Profil parsen (evtl. JSON-String)
146
  if isinstance(profile, str):
147
- try: profile = json.loads(profile)
148
- except: return "Ungültiges Profilformat.", None
 
 
149
  if not profile or 'username' not in profile:
150
  return "Bitte zuerst einloggen.", None
151
  username = profile['username']
 
152
  try:
153
  initialize_agent()
154
  except Exception as e:
155
- return f"Init-Error: {e}", None
 
 
 
 
 
156
 
157
- # Fragen holen
158
- questions = requests.get(f"{DEFAULT_API_URL}/questions").json()
159
  logs, payload = [], []
160
- for item in progress.tqdm(questions, desc="Bearbeite" ):
161
- tid, q = item.get('task_id'), item.get('question')
162
- if not tid or not q: continue
163
- prompt = f"Task {tid}: {q}"
 
 
164
  try:
165
- res = agent_instance.run(prompt=prompt)
166
- ans = re.sub(r"^(Answer:|Final Answer:)", "", res or "").strip()
167
  except Exception as e:
168
- ans = f"ERROR: {e}"
169
- logs.append({'Task ID': tid, 'Question': q, 'Submitted Answer': ans})
170
- payload.append({'task_id': tid, 'submitted_answer': ans})
 
171
  df = pd.DataFrame(logs)
172
 
173
- # Submission
174
- sub = {'username': username, 'agent_code': '...', 'answers': payload}
175
  try:
176
- r = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=180)
177
- r.raise_for_status()
178
- status = f"Erfolg: {r.json().get('score',0):.2f}%"
 
179
  except Exception as e:
180
- status = f"Submit-Error: {e}"
 
181
  cleanup_temp_files()
182
  return status, df
183
 
184
  # --- Gradio UI ---
185
  with gr.Blocks() as demo:
186
  gr.Markdown("# Smol CodeAgent Evaluation Runner")
187
- gr.Markdown("Bitte einloggen und dann auf Ausführen klicken.")
188
- with gr.Row(): login_btn = gr.LoginButton()
 
189
  run_btn = gr.Button("Run Evaluation & Submit All Answers")
190
  out_status = gr.Textbox(label="Status", lines=5)
191
  out_table = gr.DataFrame(label="Ergebnisse")
 
66
  @tool
67
  def search_web(query: str, max_results: int = 3) -> str:
68
  """
69
+ Search the web for the given query and return a summary of the top results.
70
+
71
+ Args:
72
+ query (str): The search query to look up online.
73
+ max_results (int): The maximum number of search results to retrieve.
74
+
75
+ Returns:
76
+ str: A concatenated string summarizing the URLs and snippets of the results.
77
  """
78
  if not search_client:
79
+ return "Error: Search tool not configured."
80
  try:
81
  if USE_TAVILY and isinstance(search_client, TavilyClient):
82
  res = search_client.search(query=query, search_depth="basic", max_results=max_results)
83
  items = res.get('results', [])
84
+ if not items:
85
+ return "No search results found."
86
+ return "\n".join([f"URL: {i['url']}\nContent: {i.get('content','')[:200]}..." for i in items])
87
  elif USE_DUCKDUCKGO and isinstance(search_client, DDGS):
88
  results = search_client.text(query, max_results=max_results)
89
+ if not results:
90
+ return "No search results found."
91
+ return "\n".join([f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body'][:200]}..." for r in results])
92
+ else:
93
+ return "Error: No compatible search client available."
94
  except Exception as e:
95
  return f"Error during search: {e}"
 
96
 
97
  @tool
98
  def download_task_file(task_id: str) -> str:
99
+ """
100
+ Download the file associated with a specific task ID from the evaluation server.
101
+
102
+ Args:
103
+ task_id (str): Unique identifier of the task whose file should be downloaded.
104
+
105
+ Returns:
106
+ str: Local filesystem path to the downloaded file, or an error message.
107
+ """
108
  url = f"{DEFAULT_API_URL}/files/{task_id}"
109
  try:
110
+ response = requests.get(url, stream=True, timeout=30)
111
+ response.raise_for_status()
112
+ content_type = response.headers.get('content-type', '')
113
+ suffix = '.pdf' if 'pdf' in content_type else '.tmp'
114
+ tmp_dir = tempfile.gettempdir()
115
+ filename = f"task_{task_id}_{datetime.now().strftime('%Y%m%d%H%M%S')}{suffix}"
116
+ path = os.path.join(tmp_dir, filename)
117
  with open(path, 'wb') as f:
118
+ for chunk in response.iter_content(8192): f.write(chunk)
119
  temp_files_to_clean.add(path)
120
  return path
121
  except Exception as e:
 
123
 
124
  @tool
125
  def read_file_content(file_path: str) -> str:
126
+ """
127
+ Read the text content of a previously downloaded file (PDF or plain text).
128
+
129
+ Args:
130
+ file_path (str): Absolute local path to the file to read (from download_task_file).
131
+
132
+ Returns:
133
+ str: Extracted text content truncated if necessary, or an error message.
134
+ """
135
  if not file_path.startswith(tempfile.gettempdir()):
136
+ return "Error: Invalid file path."
137
+ if not os.path.exists(file_path):
138
+ return "Error: File does not exist."
139
  try:
140
+ if file_path.lower().endswith('.pdf'):
141
+ if not PDF_READER_AVAILABLE:
142
+ return "Error: PyPDF2 not installed."
143
+ text = ''
144
  with open(file_path, 'rb') as f:
145
+ reader = PyPDF2.PdfReader(f)
146
+ for page in reader.pages:
147
+ text += page.extract_text() or ''
148
+ if len(text) > 7000:
149
+ text = text[:7000] + '\n... (truncated)'
150
+ break
151
+ return text
152
  else:
153
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
154
+ return f.read(7000)
155
  except Exception as e:
156
  return f"Error: {e}"
157
 
 
163
  key = os.getenv('TAVILY_API_KEY')
164
  search_client = TavilyClient(api_key=key) if key else False
165
  elif USE_DUCKDUCKGO:
166
+ try:
167
+ search_client = DDGS()
168
+ except:
169
+ search_client = False
170
  else:
171
  search_client = False
172
  token = os.getenv('HUGGINGFACE_TOKEN')
173
  if not token:
174
+ raise ValueError("HUGGINGFACE_TOKEN environment variable is required.")
175
  hf_model = HfApiModel()
176
  tools = [search_web, download_task_file, read_file_content]
177
+ if not search_client:
178
+ tools.remove(search_web)
179
  agent_instance = CodeAgent(tools=tools, model=hf_model)
180
 
181
+ # --- Main Logic ---
182
  def run_and_submit_all(profile, progress=gr.Progress(track_tqdm=True)):
183
+ # Parse profile if passed as JSON string
184
  if isinstance(profile, str):
185
+ try:
186
+ profile = json.loads(profile)
187
+ except json.JSONDecodeError:
188
+ return "Error: Invalid profile format.", None
189
  if not profile or 'username' not in profile:
190
  return "Bitte zuerst einloggen.", None
191
  username = profile['username']
192
+
193
  try:
194
  initialize_agent()
195
  except Exception as e:
196
+ return f"Initialization Error: {e}", None
197
+
198
+ # Fetch questions
199
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
200
+ resp.raise_for_status()
201
+ questions = resp.json()
202
 
 
 
203
  logs, payload = [], []
204
+ for item in progress.tqdm(questions, desc="Bearbeite Fragen"):
205
+ task_id = item.get('task_id')
206
+ question = item.get('question')
207
+ if not task_id or question is None:
208
+ continue
209
+ prompt = f"Task {task_id}: {question}"
210
  try:
211
+ result = agent_instance.run(prompt=prompt)
212
+ answer = re.sub(r"^(Answer:|Final Answer:)", "", result or "").strip()
213
  except Exception as e:
214
+ answer = f"ERROR: {e}"
215
+ logs.append({'Task ID': task_id, 'Question': question, 'Submitted Answer': answer})
216
+ payload.append({'task_id': task_id, 'submitted_answer': answer})
217
+
218
  df = pd.DataFrame(logs)
219
 
220
+ # Submit answers
221
+ submission = {'username': username, 'agent_code': '...', 'answers': payload}
222
  try:
223
+ post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=180)
224
+ post.raise_for_status()
225
+ score = post.json().get('score', 0)
226
+ status = f"Erfolg! Score: {score:.2f}%"
227
  except Exception as e:
228
+ status = f"Submission Error: {e}"
229
+
230
  cleanup_temp_files()
231
  return status, df
232
 
233
  # --- Gradio UI ---
234
  with gr.Blocks() as demo:
235
  gr.Markdown("# Smol CodeAgent Evaluation Runner")
236
+ gr.Markdown("Bitte einloggen und dann auf "Run Evaluation & Submit All Answers" klicken.")
237
+ with gr.Row():
238
+ login_btn = gr.LoginButton()
239
  run_btn = gr.Button("Run Evaluation & Submit All Answers")
240
  out_status = gr.Textbox(label="Status", lines=5)
241
  out_table = gr.DataFrame(label="Ergebnisse")