Spaces:
Sleeping
Sleeping
File size: 8,241 Bytes
10e9b7d 4c50fee 10e9b7d eccf8e4 3c4371f 12c29d2 488b7fa 24811b1 12c29d2 37790b8 12c29d2 10e9b7d 3db6293 e80aab9 12c29d2 70d2572 31243f4 70d2572 fb7db6d 70d2572 37790b8 70d2572 693dcc6 12c29d2 70d2572 12c29d2 70d2572 12c29d2 31243f4 567c67d 70d2572 6767692 70d2572 6767692 70d2572 12c29d2 37790b8 46df6c3 7e4a06b 46df6c3 3c4371f 7e4a06b 3c4371f 7d65c66 fb7db6d 7e4a06b 31243f4 fb7db6d 31243f4 70d2572 31243f4 3c4371f 31243f4 fb7db6d 36ed51a fb7db6d eccf8e4 31243f4 7d65c66 31243f4 693dcc6 46df6c3 31243f4 fb7db6d 7d65c66 3c4371f fb7db6d 693dcc6 31243f4 3098349 693dcc6 31243f4 fb7db6d 70d2572 3098349 70d2572 3098349 70d2572 4e66a6c 70d2572 3098349 31243f4 70d2572 7d65c66 31243f4 693dcc6 fb7db6d 70d2572 6767692 31243f4 693dcc6 fb7db6d 7d65c66 693dcc6 fb7db6d e80aab9 4e66a6c e80aab9 31243f4 e80aab9 693dcc6 e80aab9 4e66a6c 693dcc6 4e66a6c 693dcc6 7d65c66 4e66a6c e80aab9 693dcc6 e80aab9 70d2572 0ee0419 e514fd7 693dcc6 70d2572 3098349 567c67d 693dcc6 e80aab9 7e4a06b 31243f4 9088b99 7d65c66 fb7db6d 31243f4 e80aab9 693dcc6 e80aab9 693dcc6 fb7db6d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 | import os
import time
import gradio as gr
import requests
import pandas as pd
from smolagents import (
CodeAgent,
InferenceClientModel,
DuckDuckGoSearchTool,
WikipediaSearchTool,
PythonInterpreterTool,
VisitWebpageTool,
tool,
)
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@tool
def get_current_date_time() -> str:
"""Returns the current date and time in ISO format."""
from datetime import datetime
return datetime.now().isoformat()
class StrictHuggingFaceAgent:
def __init__(self):
print("Initializing Strict Hugging Face Agent with Few-Shot Prompting...")
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError("HF_TOKEN environment variable not set in Space Secrets.")
self.model = InferenceClientModel(
model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
token=hf_token,
)
self.tools = [
DuckDuckGoSearchTool(),
WikipediaSearchTool(),
VisitWebpageTool(),
PythonInterpreterTool(),
get_current_date_time,
]
self.agent = CodeAgent(
tools=self.tools,
model=self.model,
max_steps=7,
additional_authorized_imports=["datetime", "re", "json", "math", "collections", "pandas", "requests", "bs4"],
)
print("Agent ready.")
def __call__(self, question: str) -> str:
print(f"\nAgent received question: {question[:80]}...")
max_retries = 3
for attempt in range(max_retries):
try:
time.sleep(2)
answer = self.agent.run(question)
# Clean up any accidental leading/trailing whitespace or quotes the agent might slip in
clean_answer = str(answer).strip(" '\"\n\t.")
print(f"Agent answer: {clean_answer}")
return clean_answer
except Exception as e:
err_msg = str(e).lower()
if "429" in err_msg or "rate limit" in err_msg or "too many requests" in err_msg:
wait_time = 20 * (attempt + 1)
print(f"Rate limit hit! Pausing for {wait_time} seconds before retrying...")
time.sleep(wait_time)
else:
print(f"Agent error processing question: {e}")
return f"Error: {str(e)}"
return "Error: Rate limit exceeded after maximum retries."
# --- App Runner ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username = f"{profile.username}"
print(f"User logged in: {username}")
else:
print("User not logged in.")
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = StrictHuggingFaceAgent()
except Exception as e:
print(f"Error instantiating agent: {e}")
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
return "No questions.", None
except Exception as e:
return f"Error fetching questions: {e}", None
results_log = []
answers_payload = []
print(f"Running agent on {len(questions_data)} questions...")
for i, item in enumerate(questions_data):
task_id = item.get("task_id")
question_text = item.get("question")
file_url = item.get("file_url")
if not task_id or not question_text:
continue
# Inject the file URL if it exists
if file_url:
question_text += f"\n\n[IMPORTANT: This task requires analyzing an attached file. You MUST download or read it directly from this URL: {file_url} using your Python tool.]"
# The ultimate, unbreakable strict prompt WITH few-shot examples
ultra_strict_prompt = (
f"{question_text}\n\n"
"=== CRITICAL OUTPUT INSTRUCTIONS ===\n"
"You are being evaluated by a strict programmatic regex parser.\n"
"Your final answer MUST consist of ONLY the exact requested name, number, or string.\n"
"DO NOT wrap your answer in quotes, DO NOT add a trailing period, and DO NOT provide any explanation or conversational filler.\n\n"
"Here are examples of perfect submissions:\n"
"Example 1\n"
"Question: What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?\n"
"Answer: Claus\n\n"
"Example 2\n"
"Question: How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?\n"
"Answer: 519\n\n"
"Example 3\n"
"Question: .rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI\n"
"Answer: right\n\n"
"Failure to follow these instructions perfectly will result in an immediate score of 0."
)
try:
submitted_answer = agent(ultra_strict_prompt)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
# 15 second cooldown to protect your new Hugging Face token limits
print("Cooling down for 15 seconds to protect quotas...")
time.sleep(15)
if not answers_payload:
return "No answers.", pd.DataFrame(results_log)
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
print(f"Submitting {len(answers_payload)} answers...")
try:
# INCREASED TIMEOUT TO 300 SECONDS (5 Minutes) to allow the scoring server to wake up!
response = requests.post(submit_url, json=submission_data, timeout=300)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score')}%\n"
f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
f"Message: {result_data.get('message')}"
)
print("\n" + "="*40)
print(final_status)
print("="*40 + "\n")
return final_status, pd.DataFrame(results_log)
except Exception as e:
# ADDED PRINT STATEMENT so you can actually see the error in the logs!
error_msg = f"Submission failed: {e}"
print(f"\n🚨 {error_msg} 🚨\n")
return error_msg, pd.DataFrame(results_log)
# --- Build Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# Strict Hugging Face Evaluation Runner (Few-Shot Edition)")
gr.Markdown(
"""
**Instructions:**
1. Ensure your fresh `HF_TOKEN` is set in Space Secrets.
2. Log in below.
3. Click 'Run Evaluation & Submit' to start.
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("Starting Gradio app...")
demo.launch(debug=True, share=False) |