Update app.py
Browse files
app.py
CHANGED
|
@@ -1,652 +1,194 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
-
import io
|
| 4 |
-
import contextlib
|
| 5 |
-
import re
|
| 6 |
-
import uuid
|
| 7 |
-
try:
|
| 8 |
-
import spaces # type: ignore
|
| 9 |
-
except ImportError:
|
| 10 |
-
# Create a dummy spaces class for local development compatibility
|
| 11 |
-
class spaces:
|
| 12 |
-
@staticmethod
|
| 13 |
-
def GPU():
|
| 14 |
-
def decorator(func):
|
| 15 |
-
return func
|
| 16 |
-
return decorator
|
| 17 |
-
print("Warning: `spaces` module not found. Using dummy implementation for local execution.")
|
| 18 |
-
|
| 19 |
import requests
|
| 20 |
import inspect
|
| 21 |
import pandas as pd
|
| 22 |
-
import json
|
| 23 |
-
from typing import List, Dict, Tuple, Union, Optional, TypedDict, Literal
|
| 24 |
-
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
|
| 25 |
-
from langchain_core.agents import AgentAction, AgentFinish
|
| 26 |
-
from langchain_core.tools import tool
|
| 27 |
-
from langchain_community.tools.ddg_search import DuckDuckGoSearchRun
|
| 28 |
-
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
|
| 29 |
-
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 30 |
-
from langgraph.prebuilt import ToolNode
|
| 31 |
-
from langgraph.graph import StateGraph, END
|
| 32 |
-
from functools import partial
|
| 33 |
-
from transformers import pipeline
|
| 34 |
|
|
|
|
| 35 |
# --- Constants ---
|
| 36 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
- Information about recent developments, trends, or changes.
|
| 52 |
-
* If asked for specific, verifiable facts (names, dates, statistics, events) that are not common knowledge, you MUST use the 'web_search' tool.
|
| 53 |
-
* When using web_search, provide SPECIFIC search queries focused on exactly what you need. Target the precise piece of information.
|
| 54 |
-
* DO NOT use broad, general searches like "climate change". Target the exact information needed (e.g., "current CO2 levels 2023 global average ppm").
|
| 55 |
-
3. 'download_file' TOOL USAGE:
|
| 56 |
-
* Use ONLY when:
|
| 57 |
-
- The question EXPLICITLY mentions a file, document, or attachment (e.g., "read the file", "in the attached document").
|
| 58 |
-
- The question refers to information that MUST logically come from a file associated with the provided task_id.
|
| 59 |
-
- The question contains phrases like "in the document", "from the file", "according to the text provided".
|
| 60 |
-
* IMPORTANT: Pass ONLY the task_id string to this tool. Nothing else.
|
| 61 |
-
* DO NOT attempt to download a file if the question doesn't clearly indicate one exists or is necessary.
|
| 62 |
-
4. PROCESS AFTER TOOL USE:
|
| 63 |
-
* WAIT for the tool's response before proceeding.
|
| 64 |
-
* EXTRACT only the specific piece of information required by the question from the tool's response.
|
| 65 |
-
* DO NOT include the full tool response or any commentary about the tool use in your final answer.
|
| 66 |
-
* FORMULATE your final answer based SOLELY on the relevant information extracted from the tool, adhering strictly to the format rules.
|
| 67 |
-
FINAL ANSWER FORMATTING (CRITICAL):
|
| 68 |
-
-----------------------------------
|
| 69 |
-
* Your final response *must* contain ONLY the answer itself, exactly as requested.
|
| 70 |
-
* Do NOT include any introductory phrases like "The answer is:", "Based on my search:", "Here is the file content:", "The result is:", etc.
|
| 71 |
-
* If the question asks for a number, respond with ONLY the number (e.g., `42`).
|
| 72 |
-
* If the question asks for a name, respond with ONLY the name (e.g., `Paris`).
|
| 73 |
-
* If the question asks for a date, respond with ONLY the date in the requested format (e.g., `2023-10-26`).
|
| 74 |
-
* If the answer is derived from a downloaded file, extract the specific piece of information requested and return *only* that information.
|
| 75 |
-
* If you determine you cannot answer the question accurately after using tools or reasoning, respond with only the text: `I cannot answer this question.`
|
| 76 |
-
Example Interaction 1:
|
| 77 |
-
User Question: What is the boiling point of water in Celsius?
|
| 78 |
-
Your Final Answer: 100
|
| 79 |
-
Example Interaction 2:
|
| 80 |
-
User Question: Calculate (5 * 3) + 2
|
| 81 |
-
Your Final Answer: 17
|
| 82 |
-
Example Interaction 3:
|
| 83 |
-
User Question: Read the document associated with task_id 'abc-123' and tell me the value mentioned for 'Project Alpha'.
|
| 84 |
-
(Tool: download_file(task_id='abc-123') -> Returns: "File content: ... Project Alpha: Complete ...")
|
| 85 |
-
Your Final Answer: Complete
|
| 86 |
-
Example Interaction 4 (Requires Web Search):
|
| 87 |
-
User Question: What is the capital of France?
|
| 88 |
-
(LLM decides tool is needed: Request Tool Call `web_search(query='capital of France')`)
|
| 89 |
-
(Tool Result: "Paris is the capital and most populous city of France...")
|
| 90 |
-
(LLM extracts answer from tool result)
|
| 91 |
-
Your Final Answer: Paris
|
| 92 |
-
"""
|
| 93 |
-
|
| 94 |
-
# --- Agent State Definition ---
|
| 95 |
-
class AgentState(TypedDict):
|
| 96 |
-
"""Represents the state of our agent graph."""
|
| 97 |
-
question: str # The initial question from the API
|
| 98 |
-
task_id: str # Task ID associated with the question
|
| 99 |
-
agent_outcome: Optional[Union[AgentAction, AgentFinish]] # The latest decision from the agent node
|
| 100 |
-
intermediate_steps: List[Tuple[AgentAction, str]] # List of (tool action, tool observation) tuples
|
| 101 |
-
chat_history: List[BaseMessage] # History of messages (human, ai, tool)
|
| 102 |
-
downloaded_files: Dict[str, str] # Maps task_id to downloaded file content (string)
|
| 103 |
-
tool_cache: Dict[str, str] # Cache for tool results {cache_key: result}
|
| 104 |
-
error: Optional[str] # To capture any errors during execution
|
| 105 |
-
max_iterations: int # Iteration limit
|
| 106 |
-
current_iteration: int # Current iteration count
|
| 107 |
-
|
| 108 |
-
# --- Tool Definitions ---
|
| 109 |
@tool
|
| 110 |
-
def
|
| 111 |
-
"""
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
| 114 |
"""
|
| 115 |
-
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 116 |
-
print(f"Attempting to download file from: {file_url}")
|
| 117 |
try:
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
try:
|
| 122 |
-
content = response.content.decode('utf-8')
|
| 123 |
-
print(f"Successfully downloaded and decoded file for task {task_id}. Content length: {len(content)}")
|
| 124 |
-
summary = f"Successfully downloaded file for task {task_id}. Content starts: {content[:500]}..."
|
| 125 |
-
return summary
|
| 126 |
-
except UnicodeDecodeError:
|
| 127 |
-
print(f"Warning: Could not decode file content as UTF-8 for task {task_id}. Returning raw bytes summary.")
|
| 128 |
-
return f"Successfully downloaded file for task {task_id}, but it may not be text. Raw content (first 500 bytes): {response.content[:500]}..."
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
web_search = DuckDuckGoSearchRun()
|
| 145 |
-
agent_tools = [download_file, web_search]
|
| 146 |
-
|
| 147 |
-
# --- Helper Functions for Graph Nodes ---
|
| 148 |
-
def agent_node(state: AgentState, llm_with_tools, prompt):
|
| 149 |
-
"""Runs the LLM to determine the next action or finish."""
|
| 150 |
-
print(f"Running agent_node (Iteration {state.get('current_iteration', 0)})" )
|
| 151 |
-
agent_input_dict = {
|
| 152 |
-
"input": state['question'],
|
| 153 |
-
"chat_history": state['chat_history'],
|
| 154 |
-
}
|
| 155 |
-
|
| 156 |
-
# Format the prompt explicitly first
|
| 157 |
-
# The prompt template takes chat_history and input
|
| 158 |
-
formatted_prompt = prompt.invoke(agent_input_dict)
|
| 159 |
-
print("Formatted prompt generated for LLM.")
|
| 160 |
-
|
| 161 |
-
# --- Add GAIA-specific instructions ---
|
| 162 |
-
# Convert ChatPromptValue to messages, modify the last HumanMessage
|
| 163 |
-
messages = formatted_prompt.to_messages()
|
| 164 |
-
# Use triple quotes for the instruction string
|
| 165 |
-
gaia_instructions = """\n\nWhen answering, provide ONLY the precise answer requested. Do not include explanations, steps, reasoning, or additional text. Be direct and specific. GAIA benchmark requires exact matching answers. For example, if asked "What is the capital of France?", respond simply with "Paris"."""
|
| 166 |
-
|
| 167 |
-
if messages and isinstance(messages[-1], HumanMessage):
|
| 168 |
-
messages[-1].content += gaia_instructions
|
| 169 |
-
print("Appended GAIA formatting instructions to the last HumanMessage.")
|
| 170 |
-
else:
|
| 171 |
-
# If the last message isn't Human (unexpected), add instructions as a new Human message
|
| 172 |
-
messages.append(HumanMessage(content=gaia_instructions))
|
| 173 |
-
print("Warning: Appended GAIA instructions as a new HumanMessage.")
|
| 174 |
-
|
| 175 |
-
# Pass the modified messages list to the LLM
|
| 176 |
-
response = llm_with_tools.invoke(messages)
|
| 177 |
-
|
| 178 |
-
# --- Tool Call Handling ---
|
| 179 |
-
agent_outcome = None # Initialize agent_outcome
|
| 180 |
-
raw_content = response.content # Get raw content once
|
| 181 |
-
|
| 182 |
-
# Check for automatic tool calls first (ideal case)
|
| 183 |
-
if hasattr(response, 'tool_calls') and response.tool_calls:
|
| 184 |
-
print(f"Agent decided to call tools (structured): {response.tool_calls}")
|
| 185 |
-
actions = [
|
| 186 |
-
# Ensure tool_call_id is included if available directly from the model
|
| 187 |
-
AgentAction(
|
| 188 |
-
tool=call['name'],
|
| 189 |
-
tool_input=call['args'],
|
| 190 |
-
log=str(call),
|
| 191 |
-
tool_call_id=call.get('id') # Get ID if model provides it
|
| 192 |
-
)
|
| 193 |
-
for call in response.tool_calls
|
| 194 |
-
]
|
| 195 |
-
if actions:
|
| 196 |
-
# If model provides multiple calls, we might need to handle them.
|
| 197 |
-
# For now, just take the first action if it exists.
|
| 198 |
-
agent_outcome = actions[0]
|
| 199 |
-
|
| 200 |
-
# If no structured tool call, try manual parsing on the stripped content
|
| 201 |
-
if agent_outcome is None:
|
| 202 |
-
# --- Strip prompt echo FIRST ---
|
| 203 |
-
marker = "<|im_start|>assistant"
|
| 204 |
-
marker_pos = raw_content.rfind(marker)
|
| 205 |
-
if marker_pos != -1:
|
| 206 |
-
final_content = raw_content[marker_pos + len(marker):].lstrip()
|
| 207 |
-
print(f"Stripped prompt echo for parsing. Content: {final_content[:150]}...")
|
| 208 |
-
else:
|
| 209 |
-
final_content = raw_content.strip()
|
| 210 |
-
print("Assistant marker not found. Parsing raw content.")
|
| 211 |
-
# --- End Stripping ---
|
| 212 |
-
|
| 213 |
-
# --- Manual Parsing on final_content ---
|
| 214 |
-
search_match = re.search(r"web_search\((.*?)\)", final_content)
|
| 215 |
-
download_match = re.search(r"download_file\((.*?)\)", final_content)
|
| 216 |
-
manual_tool_call_id = f"tool_{uuid.uuid4()}" # Generate unique ID
|
| 217 |
-
|
| 218 |
-
if search_match:
|
| 219 |
-
tool_input_str = search_match.group(1).strip()
|
| 220 |
-
try:
|
| 221 |
-
# Try parsing as dict {'query': '...'} first
|
| 222 |
-
parsed_input = json.loads(tool_input_str.replace("'", '"')) # Replace single quotes for JSON
|
| 223 |
-
if isinstance(parsed_input, dict) and 'query' in parsed_input:
|
| 224 |
-
tool_input = parsed_input
|
| 225 |
-
else:
|
| 226 |
-
# If not a dict with 'query', assume the string itself is the query
|
| 227 |
-
tool_input = {"query": tool_input_str}
|
| 228 |
-
except json.JSONDecodeError:
|
| 229 |
-
# Handle plain string query: web_search("the query") or web_search('the query')
|
| 230 |
-
if (tool_input_str.startswith("'") and tool_input_str.endswith("'")) or \
|
| 231 |
-
(tool_input_str.startswith('"') and tool_input_str.endswith('"')):
|
| 232 |
-
tool_input = {"query": tool_input_str[1:-1]}
|
| 233 |
-
else: # Assume raw string is the query
|
| 234 |
-
tool_input = {"query": tool_input_str}
|
| 235 |
-
print(f"Agent decided to call tool (MANUALLY PARSED): web_search, Input: {tool_input}")
|
| 236 |
-
agent_outcome = AgentAction(
|
| 237 |
-
tool="duckduckgo_search",
|
| 238 |
-
tool_input=tool_input,
|
| 239 |
-
log=f"Manually Parsed from content: {search_match.group(0)}",
|
| 240 |
-
tool_call_id=manual_tool_call_id
|
| 241 |
-
)
|
| 242 |
-
elif download_match:
|
| 243 |
-
tool_input_str = download_match.group(1).strip()
|
| 244 |
-
if (tool_input_str.startswith("'") and tool_input_str.endswith("'")) or \
|
| 245 |
-
(tool_input_str.startswith('"') and tool_input_str.endswith('"')):
|
| 246 |
-
tool_input = tool_input_str[1:-1]
|
| 247 |
-
else:
|
| 248 |
-
tool_input = tool_input_str
|
| 249 |
-
print(f"Agent decided to call tool (MANUALLY PARSED): download_file, Input: {tool_input}")
|
| 250 |
-
agent_outcome = AgentAction(
|
| 251 |
-
tool="download_file",
|
| 252 |
-
tool_input=tool_input,
|
| 253 |
-
log=f"Manually Parsed from content: {download_match.group(0)}",
|
| 254 |
-
tool_call_id=manual_tool_call_id
|
| 255 |
-
)
|
| 256 |
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
marker = "<|im_start|>assistant"
|
| 262 |
-
marker_pos = raw_content.rfind(marker)
|
| 263 |
-
if marker_pos != -1:
|
| 264 |
-
final_content = raw_content[marker_pos + len(marker):].lstrip()
|
| 265 |
-
print(f"Stripped prompt echo for final answer. Using: {final_content[:100]}...")
|
| 266 |
-
else:
|
| 267 |
-
final_content = raw_content.strip()
|
| 268 |
-
print("Assistant marker not found for final answer. Using raw content.")
|
| 269 |
-
# --- End Workaround ---
|
| 270 |
-
agent_outcome = AgentFinish(return_values={"output": final_content}, log=raw_content)
|
| 271 |
-
|
| 272 |
-
# Add the original response (including potential tool calls) to history for context
|
| 273 |
-
new_history = state['chat_history'] + [response]
|
| 274 |
-
return {"agent_outcome": agent_outcome, "chat_history": new_history}
|
| 275 |
-
|
| 276 |
-
def tool_node(state: AgentState, tool_executor):
|
| 277 |
-
"""Executes tools and returns the results, using a cache."""
|
| 278 |
-
print("Running tool_node")
|
| 279 |
-
agent_action = state['agent_outcome']
|
| 280 |
-
if not isinstance(agent_action, AgentAction):
|
| 281 |
-
print("Warning: tool_node called without AgentAction in state.")
|
| 282 |
-
return {}
|
| 283 |
-
|
| 284 |
-
# Construct a unique cache key for this tool call
|
| 285 |
-
tool_name = agent_action.tool
|
| 286 |
-
tool_input = str(agent_action.tool_input) # Ensure input is string for dict key
|
| 287 |
-
cache_key = f"{tool_name}::{tool_input}"
|
| 288 |
-
tool_cache = state.get('tool_cache', {})
|
| 289 |
-
|
| 290 |
-
# Check cache first
|
| 291 |
-
if cache_key in tool_cache:
|
| 292 |
-
observation = tool_cache[cache_key]
|
| 293 |
-
print(f"Cache hit for tool {tool_name} with input {tool_input[:50]}... Returning cached result.")
|
| 294 |
-
else:
|
| 295 |
-
print(f"Cache miss for tool {tool_name} with input {tool_input[:50]}... Executing tool.")
|
| 296 |
-
observation = tool_executor.invoke(agent_action)
|
| 297 |
-
print(f"Tool {tool_name} executed. Observation: {str(observation)[:200]}...")
|
| 298 |
-
# Update cache
|
| 299 |
-
tool_cache[cache_key] = str(observation)
|
| 300 |
-
|
| 301 |
-
new_intermediate_steps = state['intermediate_steps'] + [(agent_action, str(observation))]
|
| 302 |
-
# Directly use the tool_call_id from the AgentAction
|
| 303 |
-
# Assumes agent_action WILL have tool_call_id if it's an AgentAction leading here
|
| 304 |
-
tool_message = ToolMessage(
|
| 305 |
-
content=str(observation),
|
| 306 |
-
tool_call_id=agent_action.tool_call_id
|
| 307 |
-
)
|
| 308 |
-
new_history = state['chat_history'] + [tool_message]
|
| 309 |
-
|
| 310 |
-
# Return updated state including the potentially modified cache
|
| 311 |
-
return {
|
| 312 |
-
"intermediate_steps": new_intermediate_steps,
|
| 313 |
-
"chat_history": new_history,
|
| 314 |
-
"tool_cache": tool_cache # Ensure cache updates are propagated
|
| 315 |
-
}
|
| 316 |
-
|
| 317 |
-
def should_continue(state: AgentState) -> Literal["tools", "__end__"]:
|
| 318 |
-
"""Determines whether to continue the loop or end."""
|
| 319 |
-
print("Running should_continue")
|
| 320 |
-
outcome = state['agent_outcome']
|
| 321 |
-
current_iter = state.get('current_iteration', 0)
|
| 322 |
-
max_iter = state.get('max_iterations', 10)
|
| 323 |
-
|
| 324 |
-
if isinstance(outcome, AgentFinish):
|
| 325 |
-
print("Decision: End (AgentFinish)")
|
| 326 |
-
return "__end__"
|
| 327 |
-
elif current_iter >= max_iter:
|
| 328 |
-
print("Decision: End (Max Iterations Reached)")
|
| 329 |
-
return "__end__"
|
| 330 |
-
elif isinstance(outcome, AgentAction):
|
| 331 |
-
print("Decision: Continue (Tools)")
|
| 332 |
-
return "tools"
|
| 333 |
-
else:
|
| 334 |
-
print("Decision: End (Unexpected State)")
|
| 335 |
-
return "__end__"
|
| 336 |
-
|
| 337 |
-
# Add a dummy function decorated for the Spaces platform GPU check
|
| 338 |
-
@spaces.GPU()
|
| 339 |
-
def gpu_check():
|
| 340 |
-
"""Dummy function to signal GPU usage to the Hugging Face Spaces platform."""
|
| 341 |
-
print("GPU check function called (decorator signals usage).")
|
| 342 |
-
|
| 343 |
-
# --- Agent Definition ---
|
| 344 |
-
class LangGraphAgent:
|
| 345 |
-
def __init__(self, max_iterations=10):
|
| 346 |
-
print("Initializing LangGraphAgent...")
|
| 347 |
-
self.max_iterations = max_iterations
|
| 348 |
-
|
| 349 |
-
# 1. Define LLM
|
| 350 |
-
print("Loading Hugging Face pipeline...")
|
| 351 |
-
try:
|
| 352 |
-
# Explicitly create the transformers pipeline first
|
| 353 |
-
hf_pipeline = pipeline(
|
| 354 |
-
"text-generation", # Task for instruct models
|
| 355 |
-
model="Qwen/Qwen2-72B-Instruct", # Use Qwen2 72B Instruct model
|
| 356 |
-
# Group model-specific args into model_kwargs
|
| 357 |
-
model_kwargs={"torch_dtype": "auto", "device_map": "auto"},
|
| 358 |
-
# Keep pipeline-specific args separate
|
| 359 |
-
max_new_tokens=1024,
|
| 360 |
-
add_special_tokens=False # Let ChatHuggingFace handle templating/special tokens
|
| 361 |
-
)
|
| 362 |
-
print("Hugging Face transformers pipeline loaded successfully.")
|
| 363 |
-
|
| 364 |
-
# Wrap the transformers pipeline with LangChain's HuggingFacePipeline
|
| 365 |
-
print("Creating HuggingFacePipeline wrapper...")
|
| 366 |
-
lc_pipeline = HuggingFacePipeline(pipeline=hf_pipeline)
|
| 367 |
-
print("HuggingFacePipeline wrapper created successfully.")
|
| 368 |
-
|
| 369 |
-
print("Initializing ChatHuggingFace wrapper...")
|
| 370 |
-
# Pass the LangChain pipeline wrapper to ChatHuggingFace
|
| 371 |
-
self.llm = ChatHuggingFace(llm=lc_pipeline)
|
| 372 |
-
# Pass the raw transformers pipeline directly to ChatHuggingFace
|
| 373 |
-
# self.llm = ChatHuggingFace(pipeline=hf_pipeline)
|
| 374 |
-
print("ChatHuggingFace wrapper initialized successfully.")
|
| 375 |
|
| 376 |
-
except Exception as e:
|
| 377 |
-
print(f"FATAL: Error loading Hugging Face pipeline or ChatHuggingFace: {e}")
|
| 378 |
-
raise
|
| 379 |
-
|
| 380 |
-
# 2. Define Tools
|
| 381 |
-
self.tools = agent_tools
|
| 382 |
-
# Use ToolNode
|
| 383 |
-
self.tool_executor = ToolNode(self.tools)
|
| 384 |
-
print(f"Tools initialized: {[tool.name for tool in self.tools]}")
|
| 385 |
-
|
| 386 |
-
# 3. Create Prompt Template
|
| 387 |
-
self.prompt = ChatPromptTemplate.from_messages(
|
| 388 |
-
[
|
| 389 |
-
("system", SYSTEM_PROMPT),
|
| 390 |
-
MessagesPlaceholder(variable_name="chat_history"),
|
| 391 |
-
("human", "{input}"),
|
| 392 |
-
]
|
| 393 |
-
)
|
| 394 |
-
print("Chat prompt template created.")
|
| 395 |
-
|
| 396 |
-
# Bind tools to the LLM
|
| 397 |
-
print("Binding tools to the LLM...")
|
| 398 |
-
self.llm_with_tools = self.llm.bind_tools(self.tools)
|
| 399 |
-
print("Tools bound successfully.")
|
| 400 |
-
|
| 401 |
-
# 4. Define Graph Nodes
|
| 402 |
-
agent_node_partial = partial(agent_node, llm_with_tools=self.llm_with_tools, prompt=self.prompt)
|
| 403 |
-
tool_node_partial = partial(tool_node, tool_executor=self.tool_executor)
|
| 404 |
-
|
| 405 |
-
# 5. Define Graph Structure
|
| 406 |
-
print("Defining LangGraph workflow...")
|
| 407 |
-
workflow = StateGraph(AgentState)
|
| 408 |
-
workflow.add_node("agent", agent_node_partial)
|
| 409 |
-
workflow.add_node("tools", tool_node_partial)
|
| 410 |
-
workflow.set_entry_point("agent")
|
| 411 |
-
workflow.add_conditional_edges(
|
| 412 |
-
"agent",
|
| 413 |
-
should_continue,
|
| 414 |
-
{"tools": "tools", "__end__": END},
|
| 415 |
-
)
|
| 416 |
-
workflow.add_edge("tools", "agent")
|
| 417 |
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
)
|
| 444 |
|
| 445 |
-
final_state_result = None
|
| 446 |
-
try:
|
| 447 |
-
print(f"Invoking graph for task {task_id}...")
|
| 448 |
-
final_state_result = self.graph.invoke(
|
| 449 |
-
initial_state,
|
| 450 |
-
config={"recursion_limit": self.max_iterations + 10}
|
| 451 |
-
)
|
| 452 |
-
print(f"Graph invocation complete for task {task_id}.")
|
| 453 |
-
|
| 454 |
-
if final_state_result and isinstance(final_state_result.get('agent_outcome'), AgentFinish):
|
| 455 |
-
final_answer = final_state_result['agent_outcome'].return_values['output']
|
| 456 |
-
print(f"Agent finished successfully. Final Answer: {final_answer[:200]}...")
|
| 457 |
-
return final_answer
|
| 458 |
-
else:
|
| 459 |
-
error_msg = "Agent did not finish with a final answer (AgentFinish)."
|
| 460 |
-
print(f"{error_msg} Final State: {str(final_state_result)[:500]}")
|
| 461 |
-
last_message = "No message found in history."
|
| 462 |
-
if isinstance(final_state_result, dict) and 'chat_history' in final_state_result and final_state_result['chat_history']:
|
| 463 |
-
last_message = final_state_result['chat_history'][-1].content
|
| 464 |
-
return f"AGENT_ERROR: {error_msg} Last Message: {last_message[:200]}..."
|
| 465 |
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
if final_state_result:
|
| 470 |
-
error_msg += f" | Final State (partial): {str(final_state_result)[:500]}"
|
| 471 |
-
return f"AGENT_ERROR: {error_msg}"
|
| 472 |
-
finally:
|
| 473 |
-
print(f"--- Finished Agent Run for Task {task_id} ---")
|
| 474 |
-
|
| 475 |
-
# --- Gradio App Logic ---
|
| 476 |
-
# We need to make run_and_submit_all a generator to yield updates
|
| 477 |
-
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 478 |
"""
|
| 479 |
-
|
| 480 |
-
and displays the results. Yields status updates. Requires user login.
|
| 481 |
"""
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
|
|
|
|
|
|
| 493 |
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
|
|
|
|
|
|
| 498 |
|
| 499 |
api_url = DEFAULT_API_URL
|
| 500 |
questions_url = f"{api_url}/questions"
|
| 501 |
submit_url = f"{api_url}/submit"
|
| 502 |
|
| 503 |
-
# 1. Instantiate Agent
|
| 504 |
-
log_capture = io.StringIO()
|
| 505 |
try:
|
| 506 |
-
|
| 507 |
-
full_activity_log += initial_agent_log
|
| 508 |
-
yield initial_agent_log.strip(), full_activity_log, results_df
|
| 509 |
-
with contextlib.redirect_stdout(log_capture): # Capture prints during init
|
| 510 |
-
agent = LangGraphAgent(max_iterations=15)
|
| 511 |
-
print("Agent instantiation successful.")
|
| 512 |
-
init_log_output = log_capture.getvalue()
|
| 513 |
-
full_activity_log += init_log_output
|
| 514 |
-
status_update = "Agent Initialized. Fetching questions..."
|
| 515 |
-
yield status_update, full_activity_log, results_df
|
| 516 |
except Exception as e:
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
finally:
|
| 523 |
-
# Ensure any captured init log is added even if exception occurs later
|
| 524 |
-
init_log_output = log_capture.getvalue()
|
| 525 |
-
if init_log_output not in full_activity_log: # Avoid duplication
|
| 526 |
-
full_activity_log += init_log_output
|
| 527 |
-
|
| 528 |
-
# Construct agent_code link
|
| 529 |
-
if space_id:
|
| 530 |
-
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 531 |
-
code_link_log = f"Agent code link: {agent_code}\n"
|
| 532 |
-
print(code_link_log.strip())
|
| 533 |
-
full_activity_log += code_link_log
|
| 534 |
-
else:
|
| 535 |
-
agent_code = "local_run_no_code_link"
|
| 536 |
-
code_link_log = "Warning: SPACE_ID not found. Using placeholder for agent_code link.\n"
|
| 537 |
-
print(code_link_log.strip())
|
| 538 |
-
full_activity_log += code_link_log
|
| 539 |
|
| 540 |
# 2. Fetch Questions
|
| 541 |
-
|
| 542 |
-
print(fetch_log_start.strip())
|
| 543 |
-
full_activity_log += fetch_log_start
|
| 544 |
-
questions_data = None
|
| 545 |
try:
|
| 546 |
response = requests.get(questions_url, timeout=15)
|
| 547 |
response.raise_for_status()
|
| 548 |
questions_data = response.json()
|
| 549 |
if not questions_data:
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
yield empty_q_log.strip(), full_activity_log, results_df
|
| 554 |
-
return
|
| 555 |
-
q_fetch_success = f"Fetched {len(questions_data)} questions.\n"
|
| 556 |
-
print(q_fetch_success.strip())
|
| 557 |
-
full_activity_log += q_fetch_success
|
| 558 |
-
status_update = f"Fetched {len(questions_data)} questions. Running agent..."
|
| 559 |
-
yield status_update, full_activity_log, results_df
|
| 560 |
except requests.exceptions.RequestException as e:
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
response_text = response.text if response else "No response object"
|
| 568 |
-
error_msg = f"Error decoding JSON response from questions endpoint: {e}"
|
| 569 |
-
print(error_msg)
|
| 570 |
-
print(f"Response text: {response_text[:500]}")
|
| 571 |
-
full_activity_log += f"\nERROR: {error_msg}\nResponse text: {response_text[:500]}\n"
|
| 572 |
-
yield f"Error decoding server response for questions: {e}", full_activity_log, results_df
|
| 573 |
-
return
|
| 574 |
except Exception as e:
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
run_start_log = f"Running agent on {total_questions} questions...\n"
|
| 584 |
-
print(run_start_log.strip())
|
| 585 |
-
full_activity_log += run_start_log
|
| 586 |
-
for i, item in enumerate(questions_data):
|
| 587 |
task_id = item.get("task_id")
|
| 588 |
question_text = item.get("question")
|
| 589 |
-
|
| 590 |
-
status_update = f"Running agent... Processing question {i+1}/{total_questions} (Task ID: {task_id})"
|
| 591 |
-
# Yield progress update with the current log
|
| 592 |
-
yield status_update, full_activity_log, pd.DataFrame(results_log)
|
| 593 |
-
|
| 594 |
if not task_id or question_text is None:
|
| 595 |
-
|
| 596 |
-
print(skip_log.strip())
|
| 597 |
-
full_activity_log += skip_log
|
| 598 |
-
results_log.append({"Task ID": task_id or "Missing", "Question": question_text or "Missing", "Submitted Answer": "SKIPPED - Invalid item data"})
|
| 599 |
continue
|
| 600 |
-
|
| 601 |
-
task_start_log = f"--- Running Task {task_id} ---\n"
|
| 602 |
-
print(task_start_log.strip())
|
| 603 |
-
full_activity_log += task_start_log
|
| 604 |
-
log_capture = io.StringIO() # Reset buffer for each agent call
|
| 605 |
try:
|
| 606 |
-
|
| 607 |
-
submitted_answer = agent(question=question_text, task_id=task_id)
|
| 608 |
-
# Also capture the print right after the call if successful
|
| 609 |
-
print(f"Agent returned answer for task {task_id}: {submitted_answer[:100]}...\n")
|
| 610 |
-
|
| 611 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 612 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 613 |
except Exception as e:
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
# Add error to main log even if not captured by redirect_stdout
|
| 617 |
-
if error_log not in log_capture.getvalue():
|
| 618 |
-
full_activity_log += f"ERROR: {error_log}"
|
| 619 |
-
error_answer = f"AGENT_ERROR: {e}"
|
| 620 |
-
answers_payload.append({"task_id": task_id, "submitted_answer": error_answer})
|
| 621 |
-
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_answer})
|
| 622 |
-
finally:
|
| 623 |
-
# Always append the captured log for this task
|
| 624 |
-
current_task_log = log_capture.getvalue()
|
| 625 |
-
full_activity_log += current_task_log
|
| 626 |
-
task_end_log = f"--- Finished Task {task_id} ---\n\n"
|
| 627 |
-
print(task_end_log.strip())
|
| 628 |
-
full_activity_log += task_end_log
|
| 629 |
|
| 630 |
if not answers_payload:
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
full_activity_log += no_answer_log
|
| 634 |
-
yield no_answer_log.strip(), full_activity_log, pd.DataFrame(results_log)
|
| 635 |
-
return
|
| 636 |
|
| 637 |
# 4. Prepare Submission
|
| 638 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 639 |
-
status_update = f"Agent finished
|
| 640 |
-
prep_log = status_update + "\n"
|
| 641 |
print(status_update)
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
# 5. Submit Results
|
| 647 |
-
submit_start_log = f"Submitting {len(answers_payload)} answers to: {submit_url}\n"
|
| 648 |
-
print(submit_start_log.strip())
|
| 649 |
-
full_activity_log += submit_start_log
|
| 650 |
try:
|
| 651 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
| 652 |
response.raise_for_status()
|
|
@@ -658,85 +200,71 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
| 658 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 659 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 660 |
)
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
yield final_status, full_activity_log, results_df # Final status yield
|
| 665 |
except requests.exceptions.HTTPError as e:
|
| 666 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 667 |
try:
|
| 668 |
error_json = e.response.json()
|
| 669 |
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
| 670 |
-
except
|
| 671 |
error_detail += f" Response: {e.response.text[:500]}"
|
| 672 |
status_message = f"Submission Failed: {error_detail}"
|
| 673 |
print(status_message)
|
| 674 |
-
|
| 675 |
-
|
| 676 |
except requests.exceptions.Timeout:
|
| 677 |
status_message = "Submission Failed: The request timed out."
|
| 678 |
print(status_message)
|
| 679 |
-
|
| 680 |
-
|
| 681 |
except requests.exceptions.RequestException as e:
|
| 682 |
status_message = f"Submission Failed: Network error - {e}"
|
| 683 |
print(status_message)
|
| 684 |
-
|
| 685 |
-
|
| 686 |
except Exception as e:
|
| 687 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 688 |
print(status_message)
|
| 689 |
-
|
| 690 |
-
|
|
|
|
| 691 |
|
| 692 |
-
# --- Build Gradio Interface ---
|
| 693 |
with gr.Blocks() as demo:
|
| 694 |
-
gr.Markdown("#
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
3. **See Results:** The final score and a table showing each question, the agent's answer, and whether it was correct will be displayed below. You'll also see status updates during the run.
|
| 708 |
-
**Technical Details (for the curious):**
|
| 709 |
-
* **Agent Framework:** LangGraph (helps orchestrate the agent's steps)
|
| 710 |
-
* **Language Model:** `Qwen/Qwen2-72B-Instruct`
|
| 711 |
-
* **Tools Available to Agent:** DuckDuckGo Web Search, File Downloader
|
| 712 |
-
* **Key Feature:** The agent caches results from tools, so if it needs the same information again, it can retrieve it quickly without re-running the tool.
|
| 713 |
-
**(Optional) For Developers:** You can clone this Space and modify the `app.py` file to experiment with different prompts, agent logic, or even different language models or tools.
|
| 714 |
-
"""
|
| 715 |
-
gr.Markdown(instructions_markdown)
|
| 716 |
|
| 717 |
gr.LoginButton()
|
| 718 |
|
| 719 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 720 |
|
| 721 |
-
status_output = gr.Textbox(label="Run Status / Submission Result", lines=
|
| 722 |
-
|
| 723 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 724 |
|
| 725 |
run_button.click(
|
| 726 |
fn=run_and_submit_all,
|
| 727 |
-
|
| 728 |
-
outputs=[status_output, agent_activity_feed, results_table]
|
| 729 |
)
|
| 730 |
|
| 731 |
-
# --- Main Execution Block ---
|
| 732 |
if __name__ == "__main__":
|
| 733 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 734 |
-
|
| 735 |
-
# Call the GPU check function early during startup
|
| 736 |
-
gpu_check()
|
| 737 |
-
|
| 738 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 739 |
-
space_id_startup = os.getenv("SPACE_ID")
|
| 740 |
|
| 741 |
if space_host_startup:
|
| 742 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
@@ -744,7 +272,7 @@ if __name__ == "__main__":
|
|
| 744 |
else:
|
| 745 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 746 |
|
| 747 |
-
if space_id_startup:
|
| 748 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 749 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 750 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
@@ -753,7 +281,5 @@ if __name__ == "__main__":
|
|
| 753 |
|
| 754 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 755 |
|
| 756 |
-
print("Launching Gradio Interface for
|
| 757 |
-
# Set share=False for security unless needed
|
| 758 |
-
# Set debug=True for more detailed logs during development
|
| 759 |
demo.launch(debug=True, share=False)
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# (Keep Constants as is)
|
| 8 |
# --- Constants ---
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 10 |
|
| 11 |
+
# --- Basic Agent Definition ---
|
| 12 |
+
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
+
|
| 14 |
+
import requests
|
| 15 |
+
import json
|
| 16 |
+
from typing import List, Dict, Any
|
| 17 |
+
from smolagents import CodeAgent, DuckDuckGoSearchTool, FinalAnswerTool, tool
|
| 18 |
+
from smolagents.models import ChatMessage
|
| 19 |
+
import datetime
|
| 20 |
+
import pytz
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
@tool
|
| 25 |
+
def get_current_time_in_timezone(timezone: str) -> str:
|
| 26 |
+
"""
|
| 27 |
+
A tool that fetches the current local time in a specified timezone.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
timezone: A string representing a valid timezone (e.g., 'America/New_York').
|
| 31 |
"""
|
|
|
|
|
|
|
| 32 |
try:
|
| 33 |
+
tz = pytz.timezone(timezone)
|
| 34 |
+
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
|
| 35 |
+
return f"The current local time in {timezone} is: {local_time}"
|
| 36 |
+
except Exception as e:
|
| 37 |
+
return f"Error fetching time for timezone '{timezone}': {str(e)}"
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
@tool
|
| 41 |
+
def calculate(expression: str) -> str:
|
| 42 |
+
"""
|
| 43 |
+
A tool that evaluates mathematical expressions safely.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
expression: A mathematical expression (e.g., '2 + 2 * 3').
|
| 47 |
+
"""
|
| 48 |
+
try:
|
| 49 |
+
# Safe evaluation of mathematical expressions
|
| 50 |
+
allowed_chars = set('0123456789+-*/(). ')
|
| 51 |
+
if not all(c in allowed_chars for c in expression):
|
| 52 |
+
return "Error: Expression contains invalid characters"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
+
result = eval(expression)
|
| 55 |
+
return f"The result of {expression} is: {result}"
|
| 56 |
+
except Exception as e:
|
| 57 |
+
return f"Error evaluating expression: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
@tool
|
| 61 |
+
def search_web(query: str) -> str:
|
| 62 |
+
"""
|
| 63 |
+
A tool that searches the web for information using DuckDuckGo.
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
query: The search query string.
|
| 67 |
+
"""
|
| 68 |
+
try:
|
| 69 |
+
from duckduckgo_search import DDGS
|
| 70 |
+
ddgs = DDGS()
|
| 71 |
+
results = list(ddgs.text(query, max_results=5))
|
| 72 |
+
|
| 73 |
+
if not results:
|
| 74 |
+
return f"No results found for: {query}"
|
| 75 |
+
|
| 76 |
+
output = f"Search results for '{query}':\n\n"
|
| 77 |
+
for i, result in enumerate(results, 1):
|
| 78 |
+
output += f"{i}. {result.get('title', 'N/A')}\n"
|
| 79 |
+
output += f" {result.get('body', 'N/A')[:200]}...\n"
|
| 80 |
+
output += f" URL: {result.get('href', 'N/A')}\n\n"
|
| 81 |
+
|
| 82 |
+
return output
|
| 83 |
+
except Exception as e:
|
| 84 |
+
return f"Error searching web: {str(e)}"
|
|
|
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
+
def create_agent(model_name: str = "llama3.2:3b-instruct-q4_K_M",
|
| 88 |
+
max_steps: int = 6,
|
| 89 |
+
verbosity: int = 1) -> CodeAgent:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
"""
|
| 91 |
+
Create a CodeAgent with the specified model and tools
|
|
|
|
| 92 |
"""
|
| 93 |
+
print(f"\nCreating agent with model: {model_name}")
|
| 94 |
+
print(f"Max steps: {max_steps}")
|
| 95 |
+
print(f"Verbosity: {verbosity}\n")
|
| 96 |
+
model = InferenceClientModel(
|
| 97 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=8096)
|
| 98 |
+
|
| 99 |
+
agent = CodeAgent(
|
| 100 |
+
model=model,
|
| 101 |
+
tools=[
|
| 102 |
+
get_current_time_in_timezone,
|
| 103 |
+
calculate,
|
| 104 |
+
search_web,
|
| 105 |
+
FinalAnswerTool()
|
| 106 |
+
],
|
| 107 |
+
max_steps=max_steps,
|
| 108 |
+
verbosity_level=verbosity
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
return agent
|
| 112 |
|
| 113 |
+
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 114 |
+
"""
|
| 115 |
+
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 116 |
+
and displays the results.
|
| 117 |
+
"""
|
| 118 |
+
# --- Determine HF Space Runtime URL and Repo URL ---
|
| 119 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 120 |
|
| 121 |
+
if profile:
|
| 122 |
+
username= f"{profile.username}"
|
| 123 |
+
print(f"User logged in: {username}")
|
| 124 |
+
else:
|
| 125 |
+
print("User not logged in.")
|
| 126 |
+
return "Please Login to Hugging Face with the button.", None
|
| 127 |
|
| 128 |
api_url = DEFAULT_API_URL
|
| 129 |
questions_url = f"{api_url}/questions"
|
| 130 |
submit_url = f"{api_url}/submit"
|
| 131 |
|
| 132 |
+
# 1. Instantiate Agent ( modify this part to create your agent)
|
|
|
|
| 133 |
try:
|
| 134 |
+
agent = create_agent()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
except Exception as e:
|
| 136 |
+
print(f"Error instantiating agent: {e}")
|
| 137 |
+
return f"Error initializing agent: {e}", None
|
| 138 |
+
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
| 139 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 140 |
+
print(agent_code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
# 2. Fetch Questions
|
| 143 |
+
print(f"Fetching questions from: {questions_url}")
|
|
|
|
|
|
|
|
|
|
| 144 |
try:
|
| 145 |
response = requests.get(questions_url, timeout=15)
|
| 146 |
response.raise_for_status()
|
| 147 |
questions_data = response.json()
|
| 148 |
if not questions_data:
|
| 149 |
+
print("Fetched questions list is empty.")
|
| 150 |
+
return "Fetched questions list is empty or invalid format.", None
|
| 151 |
+
print(f"Fetched {len(questions_data)} questions.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
except requests.exceptions.RequestException as e:
|
| 153 |
+
print(f"Error fetching questions: {e}")
|
| 154 |
+
return f"Error fetching questions: {e}", None
|
| 155 |
+
except requests.exceptions.JSONDecodeError as e:
|
| 156 |
+
print(f"Error decoding JSON response from questions endpoint: {e}")
|
| 157 |
+
print(f"Response text: {response.text[:500]}")
|
| 158 |
+
return f"Error decoding server response for questions: {e}", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
except Exception as e:
|
| 160 |
+
print(f"An unexpected error occurred fetching questions: {e}")
|
| 161 |
+
return f"An unexpected error occurred fetching questions: {e}", None
|
| 162 |
+
|
| 163 |
+
# 3. Run your Agent
|
| 164 |
+
results_log = []
|
| 165 |
+
answers_payload = []
|
| 166 |
+
print(f"Running agent on {len(questions_data)} questions...")
|
| 167 |
+
for item in questions_data:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
task_id = item.get("task_id")
|
| 169 |
question_text = item.get("question")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
if not task_id or question_text is None:
|
| 171 |
+
print(f"Skipping item with missing task_id or question: {item}")
|
|
|
|
|
|
|
|
|
|
| 172 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
try:
|
| 174 |
+
submitted_answer = agent(question_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 176 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 177 |
except Exception as e:
|
| 178 |
+
print(f"Error running agent on task {task_id}: {e}")
|
| 179 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
if not answers_payload:
|
| 182 |
+
print("Agent did not produce any answers to submit.")
|
| 183 |
+
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
# 4. Prepare Submission
|
| 186 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 187 |
+
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
|
|
|
| 188 |
print(status_update)
|
| 189 |
+
|
| 190 |
+
# 5. Submit
|
| 191 |
+
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
try:
|
| 193 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
| 194 |
response.raise_for_status()
|
|
|
|
| 200 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 201 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 202 |
)
|
| 203 |
+
print("Submission successful.")
|
| 204 |
+
results_df = pd.DataFrame(results_log)
|
| 205 |
+
return final_status, results_df
|
|
|
|
| 206 |
except requests.exceptions.HTTPError as e:
|
| 207 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 208 |
try:
|
| 209 |
error_json = e.response.json()
|
| 210 |
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
| 211 |
+
except requests.exceptions.JSONDecodeError:
|
| 212 |
error_detail += f" Response: {e.response.text[:500]}"
|
| 213 |
status_message = f"Submission Failed: {error_detail}"
|
| 214 |
print(status_message)
|
| 215 |
+
results_df = pd.DataFrame(results_log)
|
| 216 |
+
return status_message, results_df
|
| 217 |
except requests.exceptions.Timeout:
|
| 218 |
status_message = "Submission Failed: The request timed out."
|
| 219 |
print(status_message)
|
| 220 |
+
results_df = pd.DataFrame(results_log)
|
| 221 |
+
return status_message, results_df
|
| 222 |
except requests.exceptions.RequestException as e:
|
| 223 |
status_message = f"Submission Failed: Network error - {e}"
|
| 224 |
print(status_message)
|
| 225 |
+
results_df = pd.DataFrame(results_log)
|
| 226 |
+
return status_message, results_df
|
| 227 |
except Exception as e:
|
| 228 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 229 |
print(status_message)
|
| 230 |
+
results_df = pd.DataFrame(results_log)
|
| 231 |
+
return status_message, results_df
|
| 232 |
+
|
| 233 |
|
| 234 |
+
# --- Build Gradio Interface using Blocks ---
|
| 235 |
with gr.Blocks() as demo:
|
| 236 |
+
gr.Markdown("# Basic Agent Evaluation Runner")
|
| 237 |
+
gr.Markdown(
|
| 238 |
+
"""
|
| 239 |
+
**Instructions:**
|
| 240 |
+
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
|
| 241 |
+
2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
| 242 |
+
3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
| 243 |
+
---
|
| 244 |
+
**Disclaimers:**
|
| 245 |
+
Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
| 246 |
+
This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
|
| 247 |
+
"""
|
| 248 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
gr.LoginButton()
|
| 251 |
|
| 252 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 253 |
|
| 254 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 255 |
+
# Removed max_rows=10 from DataFrame constructor
|
| 256 |
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 257 |
|
| 258 |
run_button.click(
|
| 259 |
fn=run_and_submit_all,
|
| 260 |
+
outputs=[status_output, results_table]
|
|
|
|
| 261 |
)
|
| 262 |
|
|
|
|
| 263 |
if __name__ == "__main__":
|
| 264 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 265 |
+
# Check for SPACE_HOST and SPACE_ID at startup for information
|
|
|
|
|
|
|
|
|
|
| 266 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 267 |
+
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
| 268 |
|
| 269 |
if space_host_startup:
|
| 270 |
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
|
|
|
| 272 |
else:
|
| 273 |
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
| 274 |
|
| 275 |
+
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
| 276 |
print(f"✅ SPACE_ID found: {space_id_startup}")
|
| 277 |
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 278 |
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
|
|
|
| 281 |
|
| 282 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 283 |
|
| 284 |
+
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
|
|
|
|
|
|
| 285 |
demo.launch(debug=True, share=False)
|