Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,8 @@ import pandas as pd
|
|
| 6 |
import time
|
| 7 |
import re
|
| 8 |
from markdownify import markdownify
|
| 9 |
-
from smolagents import Tool, DuckDuckGoSearchTool, CodeAgent, WikipediaSearchTool
|
|
|
|
| 10 |
from datetime import datetime, timedelta
|
| 11 |
import threading
|
| 12 |
|
|
@@ -14,10 +15,10 @@ import threading
|
|
| 14 |
# --- Constants ---
|
| 15 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 16 |
|
| 17 |
-
# Rate limiting configuration
|
| 18 |
-
RATE_LIMIT_REQUESTS =
|
| 19 |
RATE_LIMIT_WINDOW = 60 # 60 seconds
|
| 20 |
-
REQUEST_DELAY =
|
| 21 |
|
| 22 |
class RateLimiter:
|
| 23 |
def __init__(self, max_requests=RATE_LIMIT_REQUESTS, window_seconds=RATE_LIMIT_WINDOW):
|
|
@@ -109,17 +110,27 @@ class VisitWebpageTool(Tool):
|
|
| 109 |
def __init__(self, *args, **kwargs):
|
| 110 |
self.is_initialized = False
|
| 111 |
|
| 112 |
-
# ---
|
| 113 |
class BasicAgent:
|
| 114 |
def __init__(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
self.rate_limiter = RateLimiter()
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
def __call__(self, question: str, max_retries: int = 3) -> str:
|
| 125 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
|
@@ -129,9 +140,14 @@ class BasicAgent:
|
|
| 129 |
# Apply rate limiting
|
| 130 |
self.rate_limiter.wait_if_needed()
|
| 131 |
|
| 132 |
-
#
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
return agent_answer
|
| 136 |
|
| 137 |
except Exception as e:
|
|
@@ -152,6 +168,30 @@ class BasicAgent:
|
|
| 152 |
return f"AGENT_ERROR: {error_msg}"
|
| 153 |
|
| 154 |
return "MAX_RETRIES_EXCEEDED"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
def download_file(self, task_id: str) -> str:
|
| 157 |
"""
|
|
@@ -196,7 +236,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
|
|
| 196 |
submit_url = f"{api_url}/submit"
|
| 197 |
|
| 198 |
# 1. Instantiate Agent
|
| 199 |
-
progress(0, desc="Initializing agent...")
|
| 200 |
try:
|
| 201 |
agent = BasicAgent()
|
| 202 |
except Exception as e:
|
|
@@ -231,7 +271,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
|
|
| 231 |
results_log = []
|
| 232 |
answers_payload = []
|
| 233 |
total_questions = len(questions_data)
|
| 234 |
-
print(f"Running agent on {total_questions} questions...")
|
| 235 |
|
| 236 |
for i, item in enumerate(questions_data):
|
| 237 |
progress((0.1 + 0.8 * i / total_questions), desc=f"Processing question {i+1}/{total_questions}")
|
|
@@ -251,7 +291,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
|
|
| 251 |
if requires_file:
|
| 252 |
file_path = agent.download_file(task_id)
|
| 253 |
print(f"File for task {task_id} saved at: {file_path}")
|
| 254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
else:
|
| 256 |
submitted_answer = agent(question_text)
|
| 257 |
|
|
@@ -280,7 +328,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
|
|
| 280 |
# 4. Prepare Submission
|
| 281 |
progress(0.9, desc="Submitting answers...")
|
| 282 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 283 |
-
status_update = f"
|
| 284 |
print(status_update)
|
| 285 |
|
| 286 |
# 5. Submit
|
|
@@ -296,6 +344,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
|
|
| 296 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 297 |
f"Processed: {len(results_log)} questions\n"
|
| 298 |
f"Successfully submitted: {len(answers_payload)} answers\n"
|
|
|
|
| 299 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 300 |
)
|
| 301 |
print("Submission successful.")
|
|
@@ -331,24 +380,24 @@ def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
|
|
| 331 |
|
| 332 |
# --- Build Gradio Interface using Blocks ---
|
| 333 |
with gr.Blocks() as demo:
|
| 334 |
-
gr.Markdown("#
|
| 335 |
gr.Markdown(
|
| 336 |
"""
|
| 337 |
**Instructions:**
|
| 338 |
|
| 339 |
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc.
|
| 340 |
-
2.
|
| 341 |
-
3.
|
|
|
|
| 342 |
|
| 343 |
---
|
| 344 |
-
**
|
| 345 |
-
-
|
| 346 |
-
-
|
| 347 |
-
-
|
| 348 |
-
-
|
| 349 |
-
- ✅ Detailed status reporting
|
| 350 |
|
| 351 |
-
**Note:** This
|
| 352 |
"""
|
| 353 |
)
|
| 354 |
|
|
@@ -367,6 +416,14 @@ with gr.Blocks() as demo:
|
|
| 367 |
|
| 368 |
if __name__ == "__main__":
|
| 369 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 371 |
space_id_startup = os.getenv("SPACE_ID")
|
| 372 |
|
|
@@ -385,5 +442,5 @@ if __name__ == "__main__":
|
|
| 385 |
|
| 386 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 387 |
|
| 388 |
-
print("Launching Gradio Interface for
|
| 389 |
demo.launch(debug=True, share=False)
|
|
|
|
| 6 |
import time
|
| 7 |
import re
|
| 8 |
from markdownify import markdownify
|
| 9 |
+
from smolagents import Tool, DuckDuckGoSearchTool, CodeAgent, WikipediaSearchTool
|
| 10 |
+
from langchain_anthropic import ChatAnthropic
|
| 11 |
from datetime import datetime, timedelta
|
| 12 |
import threading
|
| 13 |
|
|
|
|
| 15 |
# --- Constants ---
|
| 16 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 17 |
|
| 18 |
+
# Rate limiting configuration for Anthropic (more generous limits)
|
| 19 |
+
RATE_LIMIT_REQUESTS = 50 # Anthropic has higher rate limits
|
| 20 |
RATE_LIMIT_WINDOW = 60 # 60 seconds
|
| 21 |
+
REQUEST_DELAY = 1 # Reduced delay since Anthropic has better rate limits
|
| 22 |
|
| 23 |
class RateLimiter:
|
| 24 |
def __init__(self, max_requests=RATE_LIMIT_REQUESTS, window_seconds=RATE_LIMIT_WINDOW):
|
|
|
|
| 110 |
def __init__(self, *args, **kwargs):
|
| 111 |
self.is_initialized = False
|
| 112 |
|
| 113 |
+
# --- Custom Agent using Claude directly ---
|
| 114 |
class BasicAgent:
|
| 115 |
def __init__(self):
|
| 116 |
+
# Initialize Anthropic Claude model
|
| 117 |
+
API_KEY = os.getenv("ANTHROPIC_API_KEY")
|
| 118 |
+
if not API_KEY:
|
| 119 |
+
raise ValueError("ANTHROPIC_API_KEY not found in environment variables.")
|
| 120 |
+
|
| 121 |
+
self.model_name = "claude-3-haiku-20240307"
|
| 122 |
+
self.chat_model = ChatAnthropic(model=self.model_name, anthropic_api_key=API_KEY)
|
| 123 |
self.rate_limiter = RateLimiter()
|
| 124 |
+
|
| 125 |
+
# Initialize tools
|
| 126 |
+
self.tools = {
|
| 127 |
+
'search': DuckDuckGoSearchTool(),
|
| 128 |
+
'wikipedia': WikipediaSearchTool(),
|
| 129 |
+
'webpage': VisitWebpageTool(),
|
| 130 |
+
'download': DownloadTaskAttachmentTool()
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
print(f"BasicAgent initialized with Claude model: {self.model_name}")
|
| 134 |
|
| 135 |
def __call__(self, question: str, max_retries: int = 3) -> str:
|
| 136 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
|
|
|
| 140 |
# Apply rate limiting
|
| 141 |
self.rate_limiter.wait_if_needed()
|
| 142 |
|
| 143 |
+
# Create a comprehensive prompt for Claude
|
| 144 |
+
prompt = self._create_prompt(question)
|
| 145 |
+
|
| 146 |
+
# Get response from Claude
|
| 147 |
+
response = self.chat_model.invoke(prompt)
|
| 148 |
+
agent_answer = response.content
|
| 149 |
+
|
| 150 |
+
print(f"Agent returning answer: {agent_answer[:100]}...")
|
| 151 |
return agent_answer
|
| 152 |
|
| 153 |
except Exception as e:
|
|
|
|
| 168 |
return f"AGENT_ERROR: {error_msg}"
|
| 169 |
|
| 170 |
return "MAX_RETRIES_EXCEEDED"
|
| 171 |
+
|
| 172 |
+
def _create_prompt(self, question: str) -> str:
|
| 173 |
+
"""Create a comprehensive prompt for Claude to answer the question"""
|
| 174 |
+
prompt = f"""You are a helpful AI agent tasked with answering questions accurately and comprehensively.
|
| 175 |
+
|
| 176 |
+
You have access to the following tools if needed:
|
| 177 |
+
- Web search for current information
|
| 178 |
+
- Wikipedia search for factual information
|
| 179 |
+
- Webpage visiting for detailed content
|
| 180 |
+
- File downloading for task-specific files
|
| 181 |
+
|
| 182 |
+
Question: {question}
|
| 183 |
+
|
| 184 |
+
Please provide a clear, accurate, and comprehensive answer. If you need to use external tools or resources, describe what you would do, but provide your best direct answer based on your training data.
|
| 185 |
+
|
| 186 |
+
If the question involves:
|
| 187 |
+
- Current events or recent information: Mention that you would use web search
|
| 188 |
+
- Specific factual lookups: Mention that you would use Wikipedia or web search
|
| 189 |
+
- File analysis: Mention that you would download and analyze the file
|
| 190 |
+
- Code or technical problems: Provide working solutions with explanations
|
| 191 |
+
|
| 192 |
+
Answer:"""
|
| 193 |
+
|
| 194 |
+
return prompt
|
| 195 |
|
| 196 |
def download_file(self, task_id: str) -> str:
|
| 197 |
"""
|
|
|
|
| 236 |
submit_url = f"{api_url}/submit"
|
| 237 |
|
| 238 |
# 1. Instantiate Agent
|
| 239 |
+
progress(0, desc="Initializing Claude agent...")
|
| 240 |
try:
|
| 241 |
agent = BasicAgent()
|
| 242 |
except Exception as e:
|
|
|
|
| 271 |
results_log = []
|
| 272 |
answers_payload = []
|
| 273 |
total_questions = len(questions_data)
|
| 274 |
+
print(f"Running Claude agent on {total_questions} questions...")
|
| 275 |
|
| 276 |
for i, item in enumerate(questions_data):
|
| 277 |
progress((0.1 + 0.8 * i / total_questions), desc=f"Processing question {i+1}/{total_questions}")
|
|
|
|
| 291 |
if requires_file:
|
| 292 |
file_path = agent.download_file(task_id)
|
| 293 |
print(f"File for task {task_id} saved at: {file_path}")
|
| 294 |
+
# Read file content and include in question
|
| 295 |
+
try:
|
| 296 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 297 |
+
file_content = f.read()
|
| 298 |
+
enhanced_question = f"{question_text}\n\nFile content:\n{file_content}"
|
| 299 |
+
except:
|
| 300 |
+
# If can't read as text, just mention the file path
|
| 301 |
+
enhanced_question = f"{question_text}\n\nFile downloaded to: {file_path}"
|
| 302 |
+
submitted_answer = agent(enhanced_question)
|
| 303 |
else:
|
| 304 |
submitted_answer = agent(question_text)
|
| 305 |
|
|
|
|
| 328 |
# 4. Prepare Submission
|
| 329 |
progress(0.9, desc="Submitting answers...")
|
| 330 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 331 |
+
status_update = f"Claude agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 332 |
print(status_update)
|
| 333 |
|
| 334 |
# 5. Submit
|
|
|
|
| 344 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
| 345 |
f"Processed: {len(results_log)} questions\n"
|
| 346 |
f"Successfully submitted: {len(answers_payload)} answers\n"
|
| 347 |
+
f"Model used: Claude 3 Haiku\n"
|
| 348 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 349 |
)
|
| 350 |
print("Submission successful.")
|
|
|
|
| 380 |
|
| 381 |
# --- Build Gradio Interface using Blocks ---
|
| 382 |
with gr.Blocks() as demo:
|
| 383 |
+
gr.Markdown("# Claude Agent Evaluation Runner")
|
| 384 |
gr.Markdown(
|
| 385 |
"""
|
| 386 |
**Instructions:**
|
| 387 |
|
| 388 |
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc.
|
| 389 |
+
2. Make sure you have set your `ANTHROPIC_API_KEY` environment variable.
|
| 390 |
+
3. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
| 391 |
+
4. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your Claude agent, submit answers, and see the score.
|
| 392 |
|
| 393 |
---
|
| 394 |
+
**Model Configuration:**
|
| 395 |
+
- 🤖 Using Claude 3 Haiku via Anthropic API
|
| 396 |
+
- ⚡ Higher rate limits compared to free tier models
|
| 397 |
+
- 🛠️ Custom prompt engineering for better responses
|
| 398 |
+
- 📁 Enhanced file handling for task attachments
|
|
|
|
| 399 |
|
| 400 |
+
**Note:** This version uses your Anthropic Claude model directly instead of smolagents CodeAgent.
|
| 401 |
"""
|
| 402 |
)
|
| 403 |
|
|
|
|
| 416 |
|
| 417 |
if __name__ == "__main__":
|
| 418 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 419 |
+
|
| 420 |
+
# Check for required API key
|
| 421 |
+
api_key_check = os.getenv("ANTHROPIC_API_KEY")
|
| 422 |
+
if api_key_check:
|
| 423 |
+
print("✅ ANTHROPIC_API_KEY found")
|
| 424 |
+
else:
|
| 425 |
+
print("❌ ANTHROPIC_API_KEY not found - please set this environment variable")
|
| 426 |
+
|
| 427 |
space_host_startup = os.getenv("SPACE_HOST")
|
| 428 |
space_id_startup = os.getenv("SPACE_ID")
|
| 429 |
|
|
|
|
| 442 |
|
| 443 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 444 |
|
| 445 |
+
print("Launching Gradio Interface for Claude Agent Evaluation...")
|
| 446 |
demo.launch(debug=True, share=False)
|