Spaces:
Sleeping
Sleeping
File size: 3,922 Bytes
10e9b7d eccf8e4 3c4371f 2dd087e 10e9b7d e80aab9 3db6293 e80aab9 4bcd6d4 31243f4 2dd087e 8a36d69 2dd087e 8a36d69 4bcd6d4 2dd087e 4bcd6d4 31243f4 4bcd6d4 2dd087e 1552a23 2dd087e 1552a23 4bcd6d4 4021bf3 4bcd6d4 3c4371f 4bcd6d4 7d65c66 3c4371f 4bcd6d4 7e4a06b 31243f4 e80aab9 eccf8e4 4bcd6d4 2dd087e 31243f4 7d65c66 31243f4 e80aab9 4bcd6d4 2dd087e 4bcd6d4 2dd087e 7d65c66 31243f4 4bcd6d4 2dd087e 4bcd6d4 31243f4 e80aab9 2dd087e 4bcd6d4 e80aab9 4bcd6d4 7d65c66 2dd087e e80aab9 4bcd6d4 60a3312 2dd087e 7e4a06b 60a3312 2dd087e 4bcd6d4 e80aab9 42aeb7b e80aab9 1552a23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel, VisitWebpageTool
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Agent Definition ---
class AgentArchitect:
def __init__(self):
# SECURE: Fetches the OpenAI API key from your Space Secrets
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
print("CRITICAL: OPENAI_API_KEY is missing. Please add it to your Space Secrets!")
# Bypassing Hugging Face billing completely.
# We use gpt-4o-mini because it is highly capable at coding and very cost-effective.
self.model = LiteLLMModel(
model_id="gpt-4o-mini",
api_key=openai_api_key
)
self.tools = [DuckDuckGoSearchTool(), VisitWebpageTool()]
self.agent = CodeAgent(
tools=self.tools,
model=self.model,
add_base_tools=True
)
def __call__(self, question: str) -> str:
try:
# Enforce Exact Match scoring formatting
prompt = (
f"{question}\n\n"
f"Instructions: Think step-by-step. Solve the problem using your tools. "
f"Provide ONLY the final, concise answer."
)
result = self.agent.run(prompt)
return str(result)
except Exception as e:
return f"Error: {e}"
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if not profile:
return "Please Login to Hugging Face with the button.", None
username = profile.username
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent_instance = AgentArchitect()
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
# Agent logic with OpenAI's brain
submitted_answer = agent_instance(question_text)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
agent_code_link = f"https://huggingface.co/spaces/{space_id}/tree/main"
submission_data = {
"username": username.strip(),
"agent_code": agent_code_link,
"answers": answers_payload
}
submit_response = requests.post(submit_url, json=submission_data, timeout=60)
submit_response.raise_for_status()
result_data = submit_response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Final Score: {result_data.get('score')}% \n"
f"Message: {result_data.get('message')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", None
# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🚀 Professional Agent Evaluator (OpenAI Edition)")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
status_output = gr.Textbox(label="Leaderboard Status", lines=4)
results_table = gr.DataFrame(label="Agent Reasoning Trace", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
demo.launch() |