0f3dy's picture
Update app.py
f1253fd verified
raw
history blame
19.1 kB
import os
import gradio as gr
import requests
import inspect
import pandas as pd
import time
import re
from markdownify import markdownify
from smolagents import Tool, DuckDuckGoSearchTool, CodeAgent, WikipediaSearchTool
from langchain_anthropic import ChatAnthropic
from datetime import datetime, timedelta
import threading
# (Keep Constants as is)
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# Rate limiting configuration for Anthropic (more generous limits)
RATE_LIMIT_REQUESTS = 50 # Anthropic has higher rate limits
RATE_LIMIT_WINDOW = 60 # 60 seconds
REQUEST_DELAY = 1 # Reduced delay since Anthropic has better rate limits
class RateLimiter:
def __init__(self, max_requests=RATE_LIMIT_REQUESTS, window_seconds=RATE_LIMIT_WINDOW):
self.max_requests = max_requests
self.window_seconds = window_seconds
self.requests = []
self.lock = threading.Lock()
def wait_if_needed(self):
with self.lock:
now = datetime.now()
# Remove requests older than the window
self.requests = [req_time for req_time in self.requests
if now - req_time < timedelta(seconds=self.window_seconds)]
if len(self.requests) >= self.max_requests:
# Wait until we can make another request
oldest_request = min(self.requests)
wait_time = (oldest_request + timedelta(seconds=self.window_seconds) - now).total_seconds()
if wait_time > 0:
print(f"Rate limit reached. Waiting {wait_time:.1f} seconds...")
time.sleep(wait_time + 1) # Add 1 second buffer
# Record this request
self.requests.append(now)
class DownloadTaskAttachmentTool(Tool):
name = "download_file"
description = "Downloads the file attached to the task ID"
inputs = {'task_id': {'type': 'string', 'description': 'The task id to download attachment from.'}}
output_type = "string"
def forward(self, task_id: str) -> str:
"""
Downloads a file associated with the given task ID.
Returns the file path where the file is saved locally.
"""
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
local_file_path = f"downloads/{task_id}.file"
print(f"Downloading file for task ID {task_id} from {file_url}...")
try:
response = requests.get(file_url, stream=True, timeout=15)
response.raise_for_status()
os.makedirs("downloads", exist_ok=True)
with open(local_file_path, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"File downloaded successfully: {local_file_path}")
return local_file_path
except requests.exceptions.RequestException as e:
print(f"Error downloading file for task {task_id}: {e}")
raise
def __init__(self, *args, **kwargs):
self.is_initialized = False
class VisitWebpageTool(Tool):
name = "visit_webpage"
description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
output_type = "string"
def forward(self, url: str) -> str:
try:
import requests
from markdownify import markdownify
from requests.exceptions import RequestException
from smolagents.utils import truncate_content
except ImportError as e:
raise ImportError(
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
) from e
try:
response = requests.get(url, timeout=20)
response.raise_for_status()
markdown_content = markdownify(response.text).strip()
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
return truncate_content(markdown_content, 10000)
except requests.exceptions.Timeout:
return "The request timed out. Please try again later or check the URL."
except RequestException as e:
return f"Error fetching the webpage: {str(e)}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
def __init__(self, *args, **kwargs):
self.is_initialized = False
# --- Custom Agent using Claude directly ---
class BasicAgent:
def __init__(self):
# Initialize Anthropic Claude model
API_KEY = os.getenv("ANTHROPIC_API_KEY")
if not API_KEY:
raise ValueError("ANTHROPIC_API_KEY not found in environment variables.")
self.model_name = "claude-3-haiku-20240307"
self.chat_model = ChatAnthropic(model=self.model_name, anthropic_api_key=API_KEY)
self.rate_limiter = RateLimiter()
# Initialize tools
self.tools = {
'search': DuckDuckGoSearchTool(),
'wikipedia': WikipediaSearchTool(),
'webpage': VisitWebpageTool(),
'download': DownloadTaskAttachmentTool()
}
print(f"BasicAgent initialized with Claude model: {self.model_name}")
def __call__(self, question: str, max_retries: int = 3) -> str:
print(f"Agent received question (first 50 chars): {question[:50]}...")
for attempt in range(max_retries):
try:
# Apply rate limiting
self.rate_limiter.wait_if_needed()
# Create a comprehensive prompt for Claude
prompt = self._create_prompt(question)
# Get response from Claude
response = self.chat_model.invoke(prompt)
agent_answer = response.content
print(f"Agent returning answer: {agent_answer[:100]}...")
return agent_answer
except Exception as e:
error_msg = str(e)
print(f"Attempt {attempt + 1} failed: {error_msg}")
# Check if it's a rate limit error
if "rate limit" in error_msg.lower() or "429" in error_msg:
if attempt < max_retries - 1:
wait_time = (attempt + 1) * 30 # Progressive backoff
print(f"Rate limit hit. Waiting {wait_time} seconds before retry...")
time.sleep(wait_time)
continue
else:
return f"RATE_LIMIT_ERROR: {error_msg}"
else:
# For other errors, return immediately
return f"AGENT_ERROR: {error_msg}"
return "MAX_RETRIES_EXCEEDED"
def _create_prompt(self, question: str) -> str:
"""Create a comprehensive prompt for Claude to answer the question"""
prompt = f"""You are a helpful AI agent tasked with answering questions accurately and comprehensively.
You have access to the following tools if needed:
- Web search for current information
- Wikipedia search for factual information
- Webpage visiting for detailed content
- File downloading for task-specific files
Question: {question}
Please provide a clear, accurate, and comprehensive answer. If you need to use external tools or resources, describe what you would do, but provide your best direct answer based on your training data.
If the question involves:
- Current events or recent information: Mention that you would use web search
- Specific factual lookups: Mention that you would use Wikipedia or web search
- File analysis: Mention that you would download and analyze the file
- Code or technical problems: Provide working solutions with explanations
Answer:"""
return prompt
def download_file(self, task_id: str) -> str:
"""
Downloads a file associated with the given task ID.
Returns the file path where the file is saved locally.
"""
file_url = f"{DEFAULT_API_URL}/files/{task_id}"
local_file_path = f"downloads/{task_id}.file"
print(f"Downloading file for task ID {task_id} from {file_url}...")
try:
response = requests.get(file_url, stream=True, timeout=15)
response.raise_for_status()
os.makedirs("downloads", exist_ok=True)
with open(local_file_path, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"File downloaded successfully: {local_file_path}")
return local_file_path
except requests.exceptions.RequestException as e:
print(f"Error downloading file for task {task_id}: {e}")
raise
def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
"""
Fetches all questions, runs the BasicAgent on them, submits all answers,
and displays the results with progress tracking.
"""
space_id = os.getenv("SPACE_ID")
if profile:
username = f"{profile.username}"
print(f"User logged in: {username}")
else:
print("User not logged in.")
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# 1. Instantiate Agent
progress(0, desc="Initializing Claude agent...")
try:
agent = BasicAgent()
except Exception as e:
print(f"Error instantiating agent: {e}")
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
print(agent_code)
# 2. Fetch Questions
progress(0.1, desc="Fetching questions...")
print(f"Fetching questions from: {questions_url}")
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
print("Fetched questions list is empty.")
return "Fetched questions list is empty or invalid format.", None
print(f"Fetched {len(questions_data)} questions.")
except requests.exceptions.RequestException as e:
print(f"Error fetching questions: {e}")
return f"Error fetching questions: {e}", None
except requests.exceptions.JSONDecodeError as e:
print(f"Error decoding JSON response from questions endpoint: {e}")
return f"Error decoding server response for questions: {e}", None
except Exception as e:
print(f"An unexpected error occurred fetching questions: {e}")
return f"An unexpected error occurred fetching questions: {e}", None
# 3. Run your Agent
results_log = []
answers_payload = []
total_questions = len(questions_data)
print(f"Running Claude agent on {total_questions} questions...")
for i, item in enumerate(questions_data):
progress((0.1 + 0.8 * i / total_questions), desc=f"Processing question {i+1}/{total_questions}")
task_id = item.get("task_id")
question_text = item.get("question")
requires_file = item.get("requires_file", False)
if not task_id or question_text is None:
print(f"Skipping item with missing task_id or question: {item}")
continue
print(f"Processing task {task_id} ({i+1}/{total_questions})")
try:
# Download file if required
if requires_file:
file_path = agent.download_file(task_id)
print(f"File for task {task_id} saved at: {file_path}")
# Read file content and include in question
try:
with open(file_path, 'r', encoding='utf-8') as f:
file_content = f.read()
enhanced_question = f"{question_text}\n\nFile content:\n{file_content}"
except:
# If can't read as text, just mention the file path
enhanced_question = f"{question_text}\n\nFile downloaded to: {file_path}"
submitted_answer = agent(enhanced_question)
else:
submitted_answer = agent(question_text)
# Check if the answer indicates an error
if submitted_answer.startswith(("RATE_LIMIT_ERROR", "AGENT_ERROR", "MAX_RETRIES_EXCEEDED")):
print(f"Error processing task {task_id}: {submitted_answer}")
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
# Don't add to answers_payload for submission if it's an error
continue
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
# Add delay between requests
time.sleep(REQUEST_DELAY)
except Exception as e:
error_msg = f"PROCESSING_ERROR: {e}"
print(f"Error running agent on task {task_id}: {e}")
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_msg})
if not answers_payload:
print("Agent did not produce any valid answers to submit.")
return "Agent did not produce any valid answers to submit. Check the results table for errors.", pd.DataFrame(results_log)
# 4. Prepare Submission
progress(0.9, desc="Submitting answers...")
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
status_update = f"Claude agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
print(status_update)
# 5. Submit
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Processed: {len(results_log)} questions\n"
f"Successfully submitted: {len(answers_payload)} answers\n"
f"Model used: Claude 3 Haiku\n"
f"Message: {result_data.get('message', 'No message received.')}"
)
print("Submission successful.")
progress(1.0, desc="Complete!")
results_df = pd.DataFrame(results_log)
return final_status, results_df
except requests.exceptions.HTTPError as e:
error_detail = f"Server responded with status {e.response.status_code}."
try:
error_json = e.response.json()
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
except requests.exceptions.JSONDecodeError:
error_detail += f" Response: {e.response.text[:500]}"
status_message = f"Submission Failed: {error_detail}"
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except requests.exceptions.Timeout:
status_message = "Submission Failed: The request timed out."
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except requests.exceptions.RequestException as e:
status_message = f"Submission Failed: Network error - {e}"
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
except Exception as e:
status_message = f"An unexpected error occurred during submission: {e}"
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
gr.Markdown("# Claude Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc.
2. Make sure you have set your `ANTHROPIC_API_KEY` environment variable.
3. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
4. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your Claude agent, submit answers, and see the score.
---
**Model Configuration:**
- 🤖 Using Claude 3 Haiku via Anthropic API
- ⚡ Higher rate limits compared to free tier models
- 🛠️ Custom prompt engineering for better responses
- 📁 Enhanced file handling for task attachments
**Note:** This version uses your Anthropic Claude model directly instead of smolagents CodeAgent.
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=8, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table],
show_progress=True
)
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
# Check for required API key
api_key_check = os.getenv("ANTHROPIC_API_KEY")
if api_key_check:
print("✅ ANTHROPIC_API_KEY found")
else:
print("❌ ANTHROPIC_API_KEY not found - please set this environment variable")
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
print(f"✅ SPACE_HOST found: {space_host_startup}")
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup:
print(f"✅ SPACE_ID found: {space_id_startup}")
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
print("-"*(60 + len(" App Starting ")) + "\n")
print("Launching Gradio Interface for Claude Agent Evaluation...")
demo.launch(debug=True, share=False)