Shivangsinha's picture
Update app.py
4e66a6c verified
import os
import time
import gradio as gr
import requests
import pandas as pd
from smolagents import (
CodeAgent,
InferenceClientModel,
DuckDuckGoSearchTool,
WikipediaSearchTool,
PythonInterpreterTool,
VisitWebpageTool,
tool,
)
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@tool
def get_current_date_time() -> str:
"""Returns the current date and time in ISO format."""
from datetime import datetime
return datetime.now().isoformat()
class StrictHuggingFaceAgent:
def __init__(self):
print("Initializing Strict Hugging Face Agent with Few-Shot Prompting...")
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError("HF_TOKEN environment variable not set in Space Secrets.")
self.model = InferenceClientModel(
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
token=hf_token,
)
self.tools = [
DuckDuckGoSearchTool(),
WikipediaSearchTool(),
VisitWebpageTool(),
PythonInterpreterTool(),
get_current_date_time,
]
self.agent = CodeAgent(
tools=self.tools,
model=self.model,
max_steps=7,
additional_authorized_imports=["datetime", "re", "json", "math", "collections", "pandas", "requests", "bs4"],
)
print("Agent ready.")
def __call__(self, question: str) -> str:
print(f"\nAgent received question: {question[:80]}...")
max_retries = 3
for attempt in range(max_retries):
try:
time.sleep(2)
answer = self.agent.run(question)
# Clean up any accidental leading/trailing whitespace or quotes the agent might slip in
clean_answer = str(answer).strip(" '\"\n\t.")
print(f"Agent answer: {clean_answer}")
return clean_answer
except Exception as e:
err_msg = str(e).lower()
if "429" in err_msg or "rate limit" in err_msg or "too many requests" in err_msg:
wait_time = 20 * (attempt + 1)
print(f"Rate limit hit! Pausing for {wait_time} seconds before retrying...")
time.sleep(wait_time)
else:
print(f"Agent error processing question: {e}")
return f"Error: {str(e)}"
return "Error: Rate limit exceeded after maximum retries."
# --- App Runner ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username = f"{profile.username}"
print(f"User logged in: {username}")
else:
print("User not logged in.")
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = StrictHuggingFaceAgent()
except Exception as e:
print(f"Error instantiating agent: {e}")
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
return "No questions.", None
except Exception as e:
return f"Error fetching questions: {e}", None
results_log = []
answers_payload = []
print(f"Running agent on {len(questions_data)} questions...")
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question_text = item.get("question")
file_url = item.get("file_url")
if not task_id or not question_text:
continue
# Inject the file URL if it exists
if file_url:
question_text += f"\n\n[IMPORTANT: This task requires analyzing an attached file. You MUST download or read it directly from this URL: {file_url} using your Python tool.]"
# The ultimate, unbreakable strict prompt WITH few-shot examples
ultra_strict_prompt = (
f"{question_text}\n\n"
"=== CRITICAL OUTPUT INSTRUCTIONS ===\n"
"You are being evaluated by a strict programmatic regex parser.\n"
"Your final answer MUST consist of ONLY the exact requested name, number, or string.\n"
"DO NOT wrap your answer in quotes, DO NOT add a trailing period, and DO NOT provide any explanation or conversational filler.\n\n"
"Here are examples of perfect submissions:\n"
"Example 1\n"
"Question: What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?\n"
"Answer: Claus\n\n"
"Example 2\n"
"Question: How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?\n"
"Answer: 519\n\n"
"Example 3\n"
"Question: .rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI\n"
"Answer: right\n\n"
"Failure to follow these instructions perfectly will result in an immediate score of 0."
)
try:
submitted_answer = agent(ultra_strict_prompt)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
# 15 second cooldown to protect your new Hugging Face token limits
print("Cooling down for 15 seconds to protect quotas...")
time.sleep(15)
if not answers_payload:
return "No answers.", pd.DataFrame(results_log)
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
print(f"Submitting {len(answers_payload)} answers...")
try:
# INCREASED TIMEOUT TO 300 SECONDS (5 Minutes) to allow the scoring server to wake up!
response = requests.post(submit_url, json=submission_data, timeout=300)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score')}%\n"
f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
f"Message: {result_data.get('message')}"
)
print("\n" + "="*40)
print(final_status)
print("="*40 + "\n")
return final_status, pd.DataFrame(results_log)
except Exception as e:
# ADDED PRINT STATEMENT so you can actually see the error in the logs!
error_msg = f"Submission failed: {e}"
print(f"\n🚨 {error_msg} 🚨\n")
return error_msg, pd.DataFrame(results_log)
# --- Build Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# Strict Hugging Face Evaluation Runner (Few-Shot Edition)")
gr.Markdown(
"""
**Instructions:**
1. Ensure your fresh `HF_TOKEN` is set in Space Secrets.
2. Log in below.
3. Click 'Run Evaluation & Submit' to start.
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("Starting Gradio app...")
demo.launch(debug=True, share=False)