dmfelder commited on
Commit
04b7101
·
verified ·
1 Parent(s): 403c9cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -475
app.py CHANGED
@@ -1,463 +1,113 @@
1
  import os
2
- import gradio as gr
3
- import requests
4
- import inspect # This was missing in your latest provided code but is needed for Agent.create_tool
5
- import pandas as pd
6
- import logging
7
  import time
8
- from datetime import datetime
9
- from typing import Dict, List, Optional, Any, Generator, Tuple
10
- from dataclasses import dataclass
11
- from pathlib import Path
12
- import hashlib
13
- import re
14
- import tempfile
15
- from PIL import Image
16
- import soundfile as sf
17
- import numpy as np
18
-
19
- # Core Hugging Face Imports for Agents and Inference
20
- from huggingface_hub import InferenceClient
21
- from transformers import pipeline
22
- from transformers.agents import Agent # This is the main Agent class
23
-
24
- # --- Logging Setup (from previous working version) ---
25
- log_file_name = f"agent_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
26
- log_path = os.path.join(os.getcwd(), log_file_name)
27
-
28
- print(f"[INFO] Log file will be created at: {log_path}")
29
 
30
- logging.basicConfig(
31
- filename=log_path,
32
- filemode='w',
33
- level=logging.DEBUG,
34
- format='%(asctime)s | %(levelname)-8s | %(funcName)-15s | %(message)s',
35
- datefmt='%Y-%m-%d %H:%M:%S'
36
- )
37
-
38
- console_handler = logging.StreamHandler()
39
- console_handler.setLevel(logging.INFO)
40
- console_formatter = logging.Formatter('%(levelname)s: %(message)s')
41
- console_handler.setFormatter(console_formatter)
42
- logging.getLogger().addHandler(console_handler)
43
-
44
- logging.info(f"===== Application Startup at {datetime.now().isoformat()} =====")
45
- logging.info(f"📂 Log file configured at: {log_path}")
46
 
 
47
 
48
- # --- Constants (from template and my previous suggestions) ---
49
- # This API URL is for fetching questions and submitting answers
50
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
51
 
52
- # Patterns for extracting the final answer from agent output (crucial for scoring)
53
- ANSWER_PATTERNS = [
54
- r"the answer is:\s*(.*)", r"final_answer\((.*)\)", r"final_answer\(\s*\"(.*?)\"\s*\)",
55
- r"final_answer\(\s*'(.*?)'\s*\)", r"final_answer\(\s*```(.*?)```\s*\)",
56
- r"final_answer\(\s*`(.*?)`\s*\)", r"final_answer\(\s*\[(.*?)\]\s*\)",
57
- r"final_answer\(\s*\{(.*?)\}\s*\)", r"final_answer\(\s*(.*?)\s*\)",
58
- r"answer is\s*(.*)", r"final answer is\s*(.*)", r"final answer:\s*(.*)",
59
- r"final answer of the question is:\s*(.*)", r"the final answer is:\s*(.*)",
60
- r"final answer\s*:\s*(.*)", r"final_answer\s*:\s*(.*)", r"final_answer is\s*(.*)",
61
- r"the value is:\s*(.*)", r" (.*)", r"The final answer is: (.*)",
62
- r"final answer of the question is (.*)", r"final answer to the question is (.*)",
63
- r"final response: (.*)", r"Final Response: (.*)", r"the final response is (.*)",
64
- r"Final Response is: (.*)", r"Final Answer:\s*(.*)", r"Final Answer is:\s*(.*)",
65
- r"Answer:\s*(.*)", r"The answer is:\s*(.*)", r"Final Answer\s*\[([^\]]+)\]",
66
- r"The final answer is\s*\[([^\]]+)\]", r"The answer is\s*\[([^\]]+)\]",
67
- r"Answer\s*\[([^\]]+)\]", r"```json\n\{\"answer\":\s*\"(.*?)\"\n\}```",
68
- r"```json\n\{\"answer\":\s*(.*?)\n\}```", r"\"answer\":\s*\"(.*?)\"",
69
- r"\"answer\":\s*(.*)", r"(\w[\w\s\.\-,\/]*)\s*$", # Broad pattern to catch simple answers at the end
70
- ]
71
-
72
- @dataclass
73
- class QuestionLog:
74
- question_num: int
75
- question_preview: str
76
- question_type: str
77
- answer: str
78
- processing_time: float
79
- status: str
80
-
81
- # --- SmartAgent Class (Replaces BasicAgent and your current rule-based one) ---
82
- class SmartAgent:
83
- def __init__(self, username: str, http_session: requests.Session):
84
- self.username = username
85
- self.http_session = http_session
86
- self.agent_id = None
87
- self.agent: Optional[Agent] = None
88
- self.pipelines = {}
89
-
90
- self.tool_code_cache: Dict[str, str] = {}
91
- logging.info("SmartAgent initialized.")
92
-
93
- def setup(self):
94
- logging.info("Setting up agent...")
95
- try:
96
- self.agent = self._initialize_agent()
97
- logging.info("Agent setup complete.")
98
- except Exception as e:
99
- logging.exception(f"Error during agent setup: {e}")
100
- raise
101
-
102
- def _initialize_agent(self) -> Agent:
103
- logging.info("Initializing Hugging Face Agent...")
104
- try:
105
- client = InferenceClient() # HF_TOKEN is picked up from environment/secrets
106
-
107
- # The course API manages agent IDs. We check if one exists for the username.
108
- # This is the endpoint that previously gave a 404, because BasicAgent
109
- # didn't interact with the Agent API side. Now SmartAgent does.
110
- # DEFAULT_API_URL handles both questions/submit AND agent creation/tools.
111
- list_agents_resp = self.http_session.get(f"{DEFAULT_API_URL}/agents")
112
- list_agents_resp.raise_for_status()
113
- existing_agents = list_agents_resp.json()
114
- logging.debug(f"Existing agents: {existing_agents}")
115
-
116
- for agent_info in existing_agents:
117
- if agent_info.get("username") == self.username:
118
- self.agent_id = agent_info["agent_id"]
119
- logging.info(f"Re-using existing agent with ID: {self.agent_id}")
120
- return Agent(id=self.agent_id, client=client)
121
-
122
- # If no existing agent, create a new one
123
- create_agent_resp = self.http_session.post(f"{DEFAULT_API_URL}/agents", json={"username": self.username})
124
- create_agent_resp.raise_for_status()
125
- created_agent_info = create_agent_resp.json()
126
- self.agent_id = created_agent_info["agent_id"]
127
- logging.info(f"Created new agent with ID: {self.agent_id}")
128
- return Agent(id=self.agent_id, client=client)
129
-
130
- except requests.exceptions.RequestException as req_e:
131
- logging.error(f"Network or API error during agent initialization: {req_e}")
132
- raise
133
- except Exception as e:
134
- logging.error(f"Unexpected error during agent initialization: {e}")
135
- raise
136
-
137
- def _get_tool_code(self, tool_code_hash: str) -> str:
138
- if tool_code_hash in self.tool_code_cache:
139
- return self.tool_code_cache[tool_code_hash]
140
-
141
- logging.info(f"Fetching tool code for hash: {tool_code_hash}")
142
- try:
143
- resp = self.http_session.get(f"{DEFAULT_API_URL}/tool_code/{tool_code_hash}")
144
- resp.raise_for_status()
145
- tool_code = resp.json().get("tool_code", "")
146
- if not tool_code:
147
- raise ValueError(f"Tool code for hash {tool_code_hash} is empty.")
148
- self.tool_code_cache[tool_code_hash] = tool_code
149
- return tool_code
150
- except requests.exceptions.RequestException as req_e:
151
- logging.error(f"Failed to fetch tool code for {tool_code_hash}: {req_e}")
152
- raise
153
- except Exception as e:
154
- logging.error(f"Error getting tool code: {e}")
155
- raise
156
-
157
- def __call__(self, question: str, question_type: str, tools_code: Optional[List[Dict]] = None) -> str:
158
- # This __call__ method wraps the _execute_agent and _extract_answer
159
- # to fit how the main run_and_submit_all expects the agent to be called.
160
- if not self.agent:
161
- raise ValueError("Agent not initialized. Call setup() first.")
162
-
163
- agent_raw_output = self._execute_agent(question, question_type, tools_code)
164
- extracted_answer = self._extract_answer(agent_raw_output, question_type)
165
- return extracted_answer
166
-
167
- def _execute_agent(self, question: str, question_type: str, tools_code: Optional[List[Dict]] = None) -> str:
168
- if not self.agent:
169
- raise ValueError("Agent not initialized. Call setup() first.")
170
-
171
- logging.info(f"Executing agent for question type '{question_type}': {question[:50]}...")
172
- try:
173
- special_tools = []
174
- if tools_code:
175
- for tool_def in tools_code:
176
- tool_code_hash = tool_def.get("tool_code_hash")
177
- tool_name = tool_def.get("tool_name")
178
- if tool_code_hash and tool_name:
179
- tool_code_str = self._get_tool_code(tool_code_hash)
180
- unique_func_name = f"dynamic_tool_func_{hashlib.md5(tool_code_str.encode()).hexdigest()}"
181
- tool_code_str = tool_code_str.replace("def run_tool", f"def {unique_func_name}")
182
-
183
- global_vars = {}
184
- local_vars = {"inputs": None, "tool_code_hash": tool_code_hash} # 'inputs' needed for exec context
185
- # Inject self._run_tool into the execution context so dynamic tools can call it
186
- global_vars['run_tool'] = self._run_tool
187
-
188
- exec(tool_code_str, global_vars, local_vars)
189
-
190
- if unique_func_name not in global_vars:
191
- raise ValueError(f"Function {unique_func_name} not found after executing tool code.")
192
-
193
- special_tools.append(
194
- Agent.create_tool(
195
- name=tool_name,
196
- description=f"Dynamically loaded tool for {tool_name}",
197
- function=global_vars[unique_func_name]
198
- )
199
- )
200
- logging.debug(f"Added dynamic tool: {tool_name}")
201
-
202
- agent_output = self.agent.run(question, additional_tools=special_tools if special_tools else None)
203
- raw_answer = agent_output.chat_history[-1].response
204
- logging.debug(f"Agent raw output: {raw_answer}")
205
- return raw_answer
206
- except Exception as e:
207
- logging.error(f"Error during agent execution: {e}")
208
- raise
209
-
210
- def _extract_answer(self, raw_answer: str, question_type: str) -> str:
211
- logging.debug(f"Extracting answer from raw_answer: {raw_answer}")
212
- answer = "ERROR"
213
- for pattern in ANSWER_PATTERNS:
214
- match = re.search(pattern, raw_answer, re.IGNORECASE | re.DOTALL)
215
- if match:
216
- extracted_content = match.group(1).strip()
217
- extracted_content = extracted_content.replace("\\n", "").replace("\\", "")
218
- if extracted_content.startswith('"') and extracted_content.endswith('"'):
219
- extracted_content = extracted_content[1:-1]
220
- if extracted_content.startswith("'") and extracted_content.endswith("'"):
221
- extracted_content = extracted_content[1:-1]
222
- answer = extracted_content
223
- logging.debug(f"Extracted answer using pattern '{pattern}': {answer}")
224
- break
225
-
226
- if answer == "ERROR" and raw_answer:
227
- # Fallback: if no specific pattern matches, but the raw answer is short and doesn't look like agent internal monologue
228
- if len(raw_answer) < 200 and not any(kw in raw_answer.lower() for kw in ["thought", "tool", "action", "observation"]):
229
- answer = raw_answer.strip()
230
- logging.debug(f"No pattern matched, using raw answer directly: {answer}")
231
-
232
- if not answer: # Ensure 'answer' is not an empty string
233
- answer = "ERROR"
234
- return answer
235
-
236
- def _load_pipeline(self, pipeline_name: str, **kwargs):
237
- if pipeline_name not in self.pipelines:
238
- logging.info(f"Loading pipeline: {pipeline_name}")
239
- self.pipelines[pipeline_name] = pipeline(pipeline_name, **kwargs)
240
- return self.pipelines[pipeline_name]
241
-
242
- def _run_tool(self, tool_name: str, inputs: Dict[str, Any]) -> Any:
243
- # This method is called by the dynamically loaded tool code
244
- logging.info(f"Running internal tool: {tool_name} with inputs: {inputs}")
245
- result = None
246
- temp_file_paths = [] # To keep track of temporary files for cleanup
247
-
248
- try:
249
- if tool_name == "image-to-text":
250
- # Assuming 'image' in inputs is a URL
251
- image_url = inputs.get("image")
252
- if not image_url:
253
- raise ValueError("Image URL not provided for image-to-text tool.")
254
-
255
- # Fetch image bytes using the session
256
- image_bytes = self.http_session.get(image_url).content
257
-
258
- # Save to a temporary file
259
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_img_file:
260
- tmp_img_file.write(image_bytes)
261
- image_path = tmp_img_file.name
262
- temp_file_paths.append(image_path) # Add to cleanup list
263
-
264
- image = Image.open(image_path)
265
- image_to_text_pipeline = self._load_pipeline("image-to-text")
266
- result = image_to_text_pipeline(image)[0]["generated_text"]
267
- logging.info(f"Image-to-text result: {result[:50]}...")
268
-
269
- elif tool_name == "text-to-image":
270
- text_to_image_pipeline = self._load_pipeline("text-to-image", model="runwayml/stable-diffusion-v1-5")
271
- images = text_to_image_pipeline(inputs["text"])
272
- if images and images.images:
273
- # Save the generated image to a temporary file
274
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_output_img_file:
275
- images.images[0].save(tmp_output_img_file.name)
276
- result = tmp_output_img_file.name # Return the path to the image
277
- temp_file_paths.append(result)
278
- logging.info(f"Text-to-image result saved to: {result}")
279
- else:
280
- logging.warning("Text-to-image pipeline returned no images.")
281
- result = None
282
-
283
- elif tool_name == "speech-to-text":
284
- # Assuming 'audio' in inputs is a URL
285
- audio_url = inputs.get("audio")
286
- if not audio_url:
287
- raise ValueError("Audio URL not provided for speech-to-text tool.")
288
-
289
- audio_bytes = self.http_session.get(audio_url).content
290
- with tempfile.NamedTemporaryFile(delete=False, suffix=".flac") as tmp_audio_file:
291
- tmp_audio_file.write(audio_bytes)
292
- audio_path = tmp_audio_file.name
293
- temp_file_paths.append(audio_path)
294
-
295
- speech_to_text_pipeline = self._load_pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en")
296
- result = speech_to_text_pipeline(audio_path)["text"]
297
- logging.info(f"Speech-to-text result: {result[:50]}...")
298
-
299
- elif tool_name == "text-to-speech":
300
- text_to_speech_pipeline = self._load_pipeline("text-to-speech", model="suno/bark-small")
301
- speech = text_to_speech_pipeline(inputs["text"])
302
- if speech and speech.audio is not None:
303
- # Save the generated audio to a temporary file
304
- with tempfile.NamedTemporaryFile(delete=False, suffix=".flac") as tmp_output_audio_file:
305
- sf.write(tmp_output_audio_file.name, speech.audio.numpy(), samplerate=speech.sampling_rate)
306
- result = tmp_output_audio_file.name # Return the path to the audio
307
- temp_file_paths.append(result)
308
- logging.info(f"Text-to-speech result saved to: {result}")
309
- else:
310
- logging.warning("Text-to-speech pipeline returned no audio.")
311
- result = None
312
- else:
313
- logging.warning(f"Unknown tool: {tool_name}. Skipping execution.")
314
- return f"Error: Unknown tool {tool_name}"
315
-
316
- return result
317
-
318
- except Exception as e:
319
- logging.error(f"Error running tool '{tool_name}': {e}", exc_info=True)
320
- return f"Error running tool {tool_name}: {e}"
321
- finally:
322
- # Clean up temporary files
323
- for fp in temp_file_paths:
324
- if os.path.exists(fp):
325
- try:
326
- os.unlink(fp)
327
- logging.debug(f"Cleaned up temporary file: {fp}")
328
- except OSError as e:
329
- logging.warning(f"Could not delete temporary file {fp}: {e}")
330
-
331
- def cleanup(self):
332
- logging.info("Cleaning up agent resources...")
333
- if self.agent and self.agent_id:
334
- try:
335
- # The agent API handles cleanup, we don't explicitly delete here.
336
- logging.info(f"Agent with ID {self.agent_id} is conceptually deleted (or will expire).")
337
- except Exception as e:
338
- logging.warning(f"Failed to delete agent or clean up its remote state: {e}")
339
- self.pipelines.clear()
340
- self.tool_code_cache.clear()
341
- logging.info("SmartAgent resources cleaned up.")
342
-
343
- # --- Main Run and Submit Function (Modified to use SmartAgent) ---
344
- def run_and_submit_all(profile: gr.OAuthProfile | None):
345
  """
346
- Fetches all questions, runs the SmartAgent on them, submits all answers,
347
  and displays the results.
348
  """
349
- if not profile:
350
- logging.info("User not logged in.")
351
- yield "Please Login to Hugging Face with the button.", None
352
- return
353
-
354
- username = f"{profile.username}"
355
- logging.info(f"User logged in: {username}")
356
-
357
- api_url = DEFAULT_API_URL # This is used for questions, submit, agent management, and tool code
358
 
359
- # Determine HF Space Runtime URL and Repo URL for submission
360
- space_id = os.getenv("SPACE_ID")
361
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "N/A_LOCAL_RUN"
362
- logging.info(f"Agent code URL for submission: {agent_code}")
 
 
363
 
364
- # Configure HTTP session with retries for robustness
365
- from requests.adapters import HTTPAdapter
366
- from urllib3.util.retry import Retry
367
- retry_strategy = Retry(
368
- total=5,
369
- backoff_factor=1,
370
- status_forcelist=[429, 500, 502, 503, 504],
371
- allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"]
372
- )
373
-
374
- adapter = HTTPAdapter(max_retries=retry_strategy)
375
-
376
- http_session = requests.Session()
377
- http_session.mount("https://", adapter)
378
- http_session.mount("http://", adapter)
379
-
380
- agent = None # Initialize agent to None for finally block
381
 
 
382
  try:
383
- # 1. Instantiate and Setup SmartAgent
384
- agent = SmartAgent(username=username, http_session=http_session)
385
- agent.setup() # This connects to the /agents endpoint
386
-
387
- # Initial yield for Gradio progress display
388
- yield "🚀 Initializing and fetching questions...", pd.DataFrame([])
389
-
390
- # 2. Fetch Questions
391
- questions_url = f"{api_url}/questions"
392
- logging.info(f"Fetching questions from: {questions_url}")
393
- response = http_session.get(questions_url, timeout=15) # Use http_session here
 
394
  response.raise_for_status()
395
  questions_data = response.json()
396
  if not questions_data:
397
- logging.warning("Fetched questions list is empty.")
398
- yield "Fetched questions list is empty or invalid format.", pd.DataFrame([])
399
- return
400
- logging.info(f"Fetched {len(questions_data)} questions.")
401
-
402
- # 3. Run your Agent on each question
403
- results_log = []
404
- answers_payload = []
405
- logging.info(f"Running agent on {len(questions_data)} questions...")
406
-
407
- for i, item in enumerate(questions_data, 1):
408
- task_id = item.get("task_id")
409
- question_text = item.get("question")
410
- question_type = item.get("question_type")
411
- question_preview = item.get("question_preview", question_text[:50] + "...")
412
- tools_code = item.get("tools_code") # Dynamic tool code
413
-
414
- if not all([task_id, question_text is not None, question_type]): # question_text can be empty string
415
- logging.warning(f"Skipping item with missing task_id, question, or type: {item}")
416
- results_log.append({"Task ID": task_id, "Question": question_preview, "Submitted Answer": "ERROR: Malformed Question", "Status": "ERROR"})
417
- continue
418
-
419
- start_time = time.time()
420
- submitted_answer = "ERROR"
421
- status = "ERROR"
422
-
423
- try:
424
- # Call the SmartAgent's __call__ method
425
- submitted_answer = agent(question_text, question_type, tools_code)
426
- processing_time = time.time() - start_time
427
- status = "OK" if submitted_answer != "ERROR" else "ERROR"
428
- logging.info(f"Q{i} [{question_type}] Answer: {submitted_answer} (Took {processing_time:.2f}s)")
429
-
430
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer}) # 'submitted_answer' key for submission
431
- results_log.append({"Task ID": task_id, "Question": question_preview, "Submitted Answer": submitted_answer, "Status": status})
432
-
433
- except Exception as e:
434
- processing_time = time.time() - start_time
435
- logging.exception(f"Error running agent on task {task_id}: {e}")
436
- submitted_answer = f"AGENT ERROR: {e}"
437
- status = "ERROR"
438
- answers_payload.append({"task_id": task_id, "submitted_answer": "ERROR"}) # Submit "ERROR"
439
- results_log.append({"Task ID": task_id, "Question": question_preview, "Submitted Answer": submitted_answer, "Status": status})
440
-
441
- # Yield progressive updates to Gradio UI
442
- yield (
443
- f"Processing Q{i}/{len(questions_data)}. Last Answer: {submitted_answer[:100]}",
444
- pd.DataFrame(results_log)
445
- )
446
 
447
- if not answers_payload:
448
- logging.info("Agent did not produce any answers to submit.")
449
- yield "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
450
- return
451
 
452
- # 4. Prepare Submission and 5. Submit
453
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
454
- submit_url = f"{api_url}/submit"
455
- logging.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
456
 
457
- response = http_session.post(submit_url, json=submission_data, timeout=120) # Use http_session here
 
 
 
458
  response.raise_for_status()
459
  result_data = response.json()
460
-
461
  final_status = (
462
  f"Submission Successful!\n"
463
  f"User: {result_data.get('username')}\n"
@@ -465,10 +115,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
465
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
466
  f"Message: {result_data.get('message', 'No message received.')}"
467
  )
468
- logging.info("Submission successful.")
469
-
470
- yield final_status, pd.DataFrame(results_log)
471
-
472
  except requests.exceptions.HTTPError as e:
473
  error_detail = f"Server responded with status {e.response.status_code}."
474
  try:
@@ -477,70 +126,83 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
477
  except requests.exceptions.JSONDecodeError:
478
  error_detail += f" Response: {e.response.text[:500]}"
479
  status_message = f"Submission Failed: {error_detail}"
480
- logging.error(status_message)
481
- yield status_message, pd.DataFrame(results_log)
 
482
  except requests.exceptions.Timeout:
483
  status_message = "Submission Failed: The request timed out."
484
- logging.error(status_message)
485
- yield status_message, pd.DataFrame(results_log)
 
486
  except requests.exceptions.RequestException as e:
487
  status_message = f"Submission Failed: Network error - {e}"
488
- logging.error(status_message)
489
- yield status_message, pd.DataFrame(results_log)
 
490
  except Exception as e:
491
- status_message = f"An unexpected error occurred during run or submission: {e}"
492
- logging.exception(status_message)
493
- yield status_message, pd.DataFrame(results_log)
494
- finally:
495
- if agent:
496
- agent.cleanup()
497
 
498
 
499
- # --- Build Gradio Interface using Blocks (as per template) ---
500
  with gr.Blocks() as demo:
501
- gr.Markdown("# Hugging Face Agent Certification Runner")
502
  gr.Markdown(
503
  """
504
  **Instructions:**
505
- 1. This Space uses the official template's structure. Make sure you've cloned it!
506
- 2. **Log in to your Hugging Face account using the button below.** This is crucial for authentication and uses your HF username for submission.
507
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your advanced agent, submit answers, and see the score.
508
  ---
509
  **Disclaimers:**
510
- The evaluation process can take significant time as your agent goes through all questions. Progress updates will be shown below.
 
511
  """
512
  )
513
-
514
- # Store the LoginButton in a variable FIRST
515
- login_button_component = gr.LoginButton()
516
 
517
- run_button = gr.Button("🚀 Run Evaluation & Submit All Answers")
 
 
 
518
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
519
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
520
 
521
- # Now, use the variable for the input
522
  run_button.click(
523
  fn=run_and_submit_all,
524
- inputs=[login_button_component], # <<< THIS IS THE KEY CHANGE >>>
525
- outputs=[status_output, results_table],
526
- show_progress=True, # Show Gradio's internal progress bar
527
  )
528
 
 
 
 
 
 
 
 
 
529
  if __name__ == "__main__":
530
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
531
  space_host_startup = os.getenv("SPACE_HOST")
532
- space_id_startup = os.getenv("SPACE_ID")
 
533
  if space_host_startup:
534
  print(f"✅ SPACE_HOST found: {space_host_startup}")
535
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
536
- else:
537
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
538
- if space_id_startup:
 
539
  print(f"✅ SPACE_ID found: {space_id_startup}")
540
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
541
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
542
  else:
543
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
544
  print("-"*(60 + len(" App Starting ")) + "\n")
545
- print("Launching Gradio Interface for Advanced Agent Evaluation...")
 
546
  demo.launch(debug=True, share=False)
 
1
  import os
 
 
 
 
 
2
  import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ from ShrewdAgent import ShrewdAgent
9
 
10
+ # (Keep Constants as is)
11
+ # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
+ # --- Basic Agent Definition ---
15
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
16
+ class BasicAgent:
17
+ def __init__(self):
18
+ print("BasicAgent initialized.")
19
+ def __call__(self, question: str) -> str:
20
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
21
+ fixed_answer = "This is a default answer."
22
+ print(f"Agent returning fixed answer: {fixed_answer}")
23
+ return fixed_answer
24
+
25
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  """
27
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
28
  and displays the results.
29
  """
30
+ # --- Determine HF Space Runtime URL and Repo URL ---
31
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
 
 
 
 
 
 
32
 
33
+ if profile:
34
+ username= f"{profile.username}"
35
+ print(f"User logged in: {username}")
36
+ else:
37
+ print("User not logged in.")
38
+ return "Please Login to Hugging Face with the button.", None
39
 
40
+ api_url = DEFAULT_API_URL
41
+ questions_url = f"{api_url}/questions"
42
+ submit_url = f"{api_url}/submit"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # 1. Instantiate Agent (modify this part to create your agent)
45
  try:
46
+ agent = ShrewdAgent()
47
+ except Exception as e:
48
+ print(f"Error instantiating agent: {e}")
49
+ return f"Error initializing agent: {e}", None
50
+ # In the case of an app running as a hugging Face space, this link points toward your codebase (usefull for others so please keep it public)
51
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
52
+ print(agent_code)
53
+
54
+ # 2. Fetch Questions
55
+ print(f"Fetching questions from: {questions_url}")
56
+ try:
57
+ response = requests.get(questions_url, timeout=15)
58
  response.raise_for_status()
59
  questions_data = response.json()
60
  if not questions_data:
61
+ print("Fetched questions list is empty.")
62
+ return "Fetched questions list is empty or invalid format.", None
63
+ print(f"Fetched {len(questions_data)} questions.")
64
+ except requests.exceptions.RequestException as e:
65
+ print(f"Error fetching questions: {e}")
66
+ return f"Error fetching questions: {e}", None
67
+ except requests.exceptions.JSONDecodeError as e:
68
+ print(f"Error decoding JSON response from questions endpoint: {e}")
69
+ print(f"Response text: {response.text[:500]}")
70
+ return f"Error decoding server response for questions: {e}", None
71
+ except Exception as e:
72
+ print(f"An unexpected error occurred fetching questions: {e}")
73
+ return f"An unexpected error occurred fetching questions: {e}", None
74
+
75
+ # 3. Run your Agent
76
+ results_log = []
77
+ answers_payload = []
78
+ print(f"Running agent on {len(questions_data)} questions...")
79
+ for item in questions_data:
80
+ task_id = item.get("task_id")
81
+ question_text = item.get("question")
82
+ file_name = item.get("file_name")
83
+ if not task_id or question_text is None:
84
+ print(f"Skipping item with missing task_id or question: {item}")
85
+ continue
86
+ try:
87
+ question_with_attachment = compute_question_with_attachment(question_text, task_id, file_name)
88
+ submitted_answer = agent(question_with_attachment)
89
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
90
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
91
+ time.sleep(70) # wait for reducing rate limit errors
92
+ except Exception as e:
93
+ print(f"Error running agent on task {task_id}: {e}")
94
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ if not answers_payload:
97
+ print("Agent did not produce any answers to submit.")
98
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 
99
 
100
+ # 4. Prepare Submission
101
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
102
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
103
+ print(status_update)
104
 
105
+ # 5. Submit
106
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
107
+ try:
108
+ response = requests.post(submit_url, json=submission_data, timeout=60)
109
  response.raise_for_status()
110
  result_data = response.json()
 
111
  final_status = (
112
  f"Submission Successful!\n"
113
  f"User: {result_data.get('username')}\n"
 
115
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
116
  f"Message: {result_data.get('message', 'No message received.')}"
117
  )
118
+ print("Submission successful.")
119
+ results_df = pd.DataFrame(results_log)
120
+ return final_status, results_df
 
121
  except requests.exceptions.HTTPError as e:
122
  error_detail = f"Server responded with status {e.response.status_code}."
123
  try:
 
126
  except requests.exceptions.JSONDecodeError:
127
  error_detail += f" Response: {e.response.text[:500]}"
128
  status_message = f"Submission Failed: {error_detail}"
129
+ print(status_message)
130
+ results_df = pd.DataFrame(results_log)
131
+ return status_message, results_df
132
  except requests.exceptions.Timeout:
133
  status_message = "Submission Failed: The request timed out."
134
+ print(status_message)
135
+ results_df = pd.DataFrame(results_log)
136
+ return status_message, results_df
137
  except requests.exceptions.RequestException as e:
138
  status_message = f"Submission Failed: Network error - {e}"
139
+ print(status_message)
140
+ results_df = pd.DataFrame(results_log)
141
+ return status_message, results_df
142
  except Exception as e:
143
+ status_message = f"An unexpected error occurred during submission: {e}"
144
+ print(status_message)
145
+ results_df = pd.DataFrame(results_log)
146
+ return status_message, results_df
 
 
147
 
148
 
149
+ # --- Build Gradio Interface using Blocks ---
150
  with gr.Blocks() as demo:
151
+ gr.Markdown("# Basic Agent Evaluation Runner")
152
  gr.Markdown(
153
  """
154
  **Instructions:**
155
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
156
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
157
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
158
  ---
159
  **Disclaimers:**
160
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
161
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
162
  """
163
  )
 
 
 
164
 
165
+ gr.LoginButton()
166
+
167
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
168
+
169
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
170
+ # Removed max_rows=10 from DataFrame constructor
171
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
172
 
 
173
  run_button.click(
174
  fn=run_and_submit_all,
175
+ outputs=[status_output, results_table]
 
 
176
  )
177
 
178
+
179
+ def compute_question_with_attachment(question: str, task_id: str, file_name: str) -> str:
180
+ if file_name:
181
+ return f"{question}\n\nAttached file: https://agents-course-unit4-scoring.hf.space/files/{task_id}"
182
+ else:
183
+ return question
184
+
185
+
186
  if __name__ == "__main__":
187
  print("\n" + "-"*30 + " App Starting " + "-"*30)
188
+ # Check for SPACE_HOST and SPACE_ID at startup for information
189
  space_host_startup = os.getenv("SPACE_HOST")
190
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
191
+
192
  if space_host_startup:
193
  print(f"✅ SPACE_HOST found: {space_host_startup}")
194
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
195
+ else:
196
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
197
+
198
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
199
  print(f"✅ SPACE_ID found: {space_id_startup}")
200
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
201
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
202
  else:
203
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
204
+
205
  print("-"*(60 + len(" App Starting ")) + "\n")
206
+
207
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
208
  demo.launch(debug=True, share=False)