Raj989898's picture
Upload app.py
07ed52a verified
raw
history blame
4.52 kB
import os
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent
from smolagents import DuckDuckGoSearchTool
from smolagents import PythonInterpreterTool
from smolagents import InferenceClientModel
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# ---------------- AGENT ---------------- #
class SmartAgent:
def __init__(self):
print("Initializing SmartAgent")
self.model = InferenceClientModel(
model_id="meta-llama/Meta-Llama-3-8B-Instruct"
)
self.agent = CodeAgent(
tools=[
DuckDuckGoSearchTool(),
PythonInterpreterTool()
],
model=self.model,
max_steps=8
)
def __call__(self, question: str) -> str:
print("Question received:", question)
try:
answer = self.agent.run(question)
if answer is None:
return ""
return str(answer).strip()
except Exception as e:
print("Agent error:", e)
return ""
# ---------------- RUN EVALUATION ---------------- #
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username = profile.username
else:
return "Please login first.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
agent = SmartAgent()
except Exception as e:
return f"Agent initialization error: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
# ---------------- GET QUESTIONS ---------------- #
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions = response.json()
except Exception as e:
return f"Error fetching questions: {e}", None
answers_payload = []
results_log = []
# ---------------- RUN AGENT ---------------- #
for item in questions:
task_id = item.get("task_id")
question = item.get("question")
if not task_id or not question:
continue
try:
answer = agent(question)
answers_payload.append(
{
"task_id": task_id,
"submitted_answer": answer
}
)
results_log.append(
{
"Task ID": task_id,
"Question": question,
"Submitted Answer": answer
}
)
except Exception as e:
results_log.append(
{
"Task ID": task_id,
"Question": question,
"Submitted Answer": f"ERROR: {e}"
}
)
# ---------------- SUBMIT ---------------- #
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload
}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result.get('username')}\n"
f"Score: {result.get('score')}%\n"
f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission failed: {e}", pd.DataFrame(results_log)
# ---------------- UI ---------------- #
with gr.Blocks() as demo:
gr.Markdown("# Basic Agent Evaluation Runner")
gr.Markdown(
"""
Instructions:
1. Login to Hugging Face
2. Click **Run Evaluation & Submit All Answers**
3. The agent will answer 20 GAIA questions
4. Your score will appear when finished
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(
label="Run Status / Submission Result",
lines=5,
interactive=False
)
results_table = gr.DataFrame(
label="Questions and Agent Answers",
wrap=True
)
run_button.click(
fn=run_and_submit_all,
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("Starting Agent Evaluation App")
demo.launch(debug=True)