micposso's picture
ddd
10d4601
import os
import gradio as gr
import requests
import pandas as pd
import json
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Agent Using OpenAI GPT-4o ---
class BasicAgent:
def __init__(self, model="gpt-4o"):
self.model = model
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not set.")
self.client = OpenAI(api_key=api_key)
print(f"BasicAgent initialized using model: {self.model}")
def __call__(self, question: str) -> dict:
system_prompt = (
"You are a general AI assistant. I will ask you a question. "
"Report your thoughts, and finish your answer with the following template: "
"FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible "
"OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma "
"to write your number neither use units such as $ or percent sign unless specified otherwise. "
"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits "
"in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules "
"depending on whether the element to be put in the list is a number or a string."
)
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": question}
],
temperature=0.2,
max_tokens=500
)
full_answer = response.choices[0].message.content.strip()
if "FINAL ANSWER:" in full_answer:
final = full_answer.split("FINAL ANSWER:")[-1].strip()
else:
final = full_answer # fallback
return {
"model_answer": f"FINAL ANSWER: {final}",
"reasoning_trace": full_answer
}
except Exception as e:
return {
"model_answer": "FINAL ANSWER: AGENT ERROR",
"reasoning_trace": str(e)
}
# --- Evaluation and Submission ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
if profile:
print(f"User logged in: {profile.username}")
else:
print("User not logged in.")
return "Please Login to Hugging Face with the button.", None
questions_url = f"{DEFAULT_API_URL}/questions"
submit_url = f"{DEFAULT_API_URL}/submit"
try:
agent = BasicAgent()
except Exception as e:
return f"Error initializing agent: {e}", None
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
return "Fetched questions list is empty or invalid.", None
print(f"Fetched {len(questions_data)} questions.")
except Exception as e:
return f"Error fetching questions: {e}", None
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or not question_text:
continue
try:
result = agent(question_text)
model_answer = result["model_answer"]
trace = result["reasoning_trace"]
answers_payload.append({
"task_id": task_id,
"model_answer": model_answer,
"reasoning_trace": trace
})
results_log.append({
"Task ID": task_id,
"Question": question_text,
"Model Answer": model_answer
})
except Exception as e:
results_log.append({
"Task ID": task_id,
"Question": question_text,
"Model Answer": f"AGENT ERROR: {e}"
})
if not answers_payload:
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
# Optional: save submission.jsonl for debug
with open("submission.jsonl", "w") as f:
for ans in answers_payload:
f.write(json.dumps(ans) + "\n")
# Submit: API expects just a list of {"task_id", "model_answer", "reasoning_trace"}
try:
response = requests.post(submit_url, json=answers_payload, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {profile.username}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'No message received.')}"
)
return final_status, pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# GPT-4o Agent Evaluation Runner")
gr.Markdown("""
1. Log into your Hugging Face account.
2. Click the button to fetch questions, generate answers using GPT-4o, and submit.
3. You will see your score and submitted answers below.
""")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
# --- Entry Point ---
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
space_host = os.getenv("SPACE_HOST")
space_id = os.getenv("SPACE_ID")
if space_host:
print(f"✅ SPACE_HOST found: https://{space_host}.hf.space")
else:
print("ℹ️ SPACE_HOST not found.")
if space_id:
print(f"✅ SPACE_ID found: https://huggingface.co/spaces/{space_id}")
else:
print("ℹ️ SPACE_ID not found.")
print("-"*(60 + len(" App Starting ")) + "\n")
demo.launch(debug=True, share=False)