dmfelder commited on
Commit
a13b02c
·
verified ·
1 Parent(s): 5259aba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -4
app.py CHANGED
@@ -146,6 +146,82 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
146
  return status_message, results_df
147
 
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  # --- Build Gradio Interface using Blocks ---
150
  with gr.Blocks() as demo:
151
  gr.Markdown("# Basic Agent Evaluation Runner")
@@ -166,16 +242,15 @@ with gr.Blocks() as demo:
166
 
167
  run_button = gr.Button("Run Evaluation & Submit All Answers")
168
 
169
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
170
- # Removed max_rows=10 from DataFrame constructor
171
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
172
 
173
  run_button.click(
174
- fn=run_and_submit_all,
 
175
  outputs=[status_output, results_table]
176
  )
177
 
178
-
179
  def compute_question_with_attachment(question: str, task_id: str, file_name: str) -> str:
180
  if file_name:
181
  return f"{question}\n\nAttached file: https://agents-course-unit4-scoring.hf.space/files/{task_id}"
 
146
  return status_message, results_df
147
 
148
 
149
+ def run_evaluation_and_submit_streaming():
150
+ results_log = []
151
+
152
+ try:
153
+ yield ("⏳ Fetching evaluation questions...", None)
154
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
155
+ response.raise_for_status()
156
+ questions_data = response.json()
157
+ if not questions_data:
158
+ yield ("❌ No questions received from server.", None)
159
+ return
160
+ yield (f"✅ Fetched {len(questions_data)} questions.", None)
161
+ except Exception as e:
162
+ yield (f"❌ Failed to fetch questions: {e}", None)
163
+ return
164
+
165
+ try:
166
+ agent = ShrewdAgent()
167
+ yield ("✅ Agent initialized.", None)
168
+ except Exception as e:
169
+ yield (f"❌ Failed to initialize agent: {e}", None)
170
+ return
171
+
172
+ answers = []
173
+ for i, item in enumerate(questions_data, start=1):
174
+ task_id = item.get("task_id")
175
+ question = item.get("question")
176
+ file_name = item.get("file_name")
177
+ full_q = compute_question_with_attachment(question, task_id, file_name)
178
+
179
+ yield (f"\n---\nQ{i}: {question.strip()[:100]}...", None)
180
+ try:
181
+ answer = agent(full_q)
182
+ answers.append({"task_id": task_id, "submitted_answer": answer})
183
+ results_log.append({
184
+ "Task ID": task_id,
185
+ "Question": question,
186
+ "Submitted Answer": answer
187
+ })
188
+ yield (f"A{i}: {answer.strip()[:200]}", None)
189
+ time.sleep(5)
190
+ except Exception as e:
191
+ err = f"❌ Error answering Q{i}: {e}"
192
+ results_log.append({
193
+ "Task ID": task_id,
194
+ "Question": question,
195
+ "Submitted Answer": err
196
+ })
197
+ yield (err, None)
198
+
199
+ try:
200
+ username = os.getenv("SPACE_ID", "anonymous").split("/")[0]
201
+ agent_code = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
202
+ submission_data = {
203
+ "username": username,
204
+ "agent_code": agent_code,
205
+ "answers": answers
206
+ }
207
+ yield ("\n📤 Submitting answers...", None)
208
+ resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=60)
209
+ resp.raise_for_status()
210
+ result = resp.json()
211
+ yield (
212
+ f"✅ Submission Complete!\n"
213
+ f"Score: {result.get('score', 'N/A')}%\n"
214
+ f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
215
+ f"Message: {result.get('message', 'No message')}",
216
+ None
217
+ )
218
+ except Exception as e:
219
+ yield (f"❌ Submission failed: {e}", None)
220
+
221
+ yield (None, pd.DataFrame(results_log)) # Final result table
222
+
223
+
224
+
225
  # --- Build Gradio Interface using Blocks ---
226
  with gr.Blocks() as demo:
227
  gr.Markdown("# Basic Agent Evaluation Runner")
 
242
 
243
  run_button = gr.Button("Run Evaluation & Submit All Answers")
244
 
245
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=25, interactive=False, autoscroll=True)
 
246
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
247
 
248
  run_button.click(
249
+ fn=run_evaluation_and_submit_streaming,
250
+ inputs=[],
251
  outputs=[status_output, results_table]
252
  )
253
 
 
254
  def compute_question_with_attachment(question: str, task_id: str, file_name: str) -> str:
255
  if file_name:
256
  return f"{question}\n\nAttached file: https://agents-course-unit4-scoring.hf.space/files/{task_id}"