Agents-course-final-project-api

Sleeping

App Files Files Community

Agents-course-final-project-api / app.py

micposso

ddd

10d4601 8 months ago

raw

history blame contribute delete

6.58 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	import json
	from openai import OpenAI
	from dotenv import load_dotenv

	load_dotenv()

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# --- Agent Using OpenAI GPT-4o ---
	class BasicAgent:
	def __init__(self, model="gpt-4o"):
	self.model = model
	api_key = os.getenv("OPENAI_API_KEY")
	if not api_key:
	raise ValueError("OPENAI_API_KEY not set.")
	self.client = OpenAI(api_key=api_key)
	print(f"BasicAgent initialized using model: {self.model}")

	def __call__(self, question: str) -> dict:
	system_prompt = (
	"You are a general AI assistant. I will ask you a question. "
	"Report your thoughts, and finish your answer with the following template: "
	"FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible "
	"OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma "
	"to write your number neither use units such as $ or percent sign unless specified otherwise. "
	"If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits "
	"in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules "
	"depending on whether the element to be put in the list is a number or a string."
	)

	try:
	response = self.client.chat.completions.create(
	model=self.model,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": question}
	],
	temperature=0.2,
	max_tokens=500
	)

	full_answer = response.choices[0].message.content.strip()

	if "FINAL ANSWER:" in full_answer:
	final = full_answer.split("FINAL ANSWER:")[-1].strip()
	else:
	final = full_answer # fallback

	return {
	"model_answer": f"FINAL ANSWER: {final}",
	"reasoning_trace": full_answer
	}

	except Exception as e:
	return {
	"model_answer": "FINAL ANSWER: AGENT ERROR",
	"reasoning_trace": str(e)
	}

	# --- Evaluation and Submission ---
	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	if profile:
	print(f"User logged in: {profile.username}")
	else:
	print("User not logged in.")
	return "Please Login to Hugging Face with the button.", None

	questions_url = f"{DEFAULT_API_URL}/questions"
	submit_url = f"{DEFAULT_API_URL}/submit"

	try:
	agent = BasicAgent()
	except Exception as e:
	return f"Error initializing agent: {e}", None

	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	return "Fetched questions list is empty or invalid.", None
	print(f"Fetched {len(questions_data)} questions.")
	except Exception as e:
	return f"Error fetching questions: {e}", None

	results_log = []
	answers_payload = []

	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or not question_text:
	continue

	try:
	result = agent(question_text)
	model_answer = result["model_answer"]
	trace = result["reasoning_trace"]

	answers_payload.append({
	"task_id": task_id,
	"model_answer": model_answer,
	"reasoning_trace": trace
	})

	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Model Answer": model_answer
	})

	except Exception as e:
	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Model Answer": f"AGENT ERROR: {e}"
	})

	if not answers_payload:
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	# Optional: save submission.jsonl for debug
	with open("submission.jsonl", "w") as f:
	for ans in answers_payload:
	f.write(json.dumps(ans) + "\n")

	# Submit: API expects just a list of {"task_id", "model_answer", "reasoning_trace"}
	try:
	response = requests.post(submit_url, json=answers_payload, timeout=60)
	response.raise_for_status()
	result_data = response.json()

	final_status = (
	f"Submission Successful!\n"
	f"User: {profile.username}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	return final_status, pd.DataFrame(results_log)

	except Exception as e:
	return f"Submission Failed: {e}", pd.DataFrame(results_log)

	# --- Gradio UI ---
	with gr.Blocks() as demo:
	gr.Markdown("# GPT-4o Agent Evaluation Runner")
	gr.Markdown("""
	1. Log into your Hugging Face account.
	2. Click the button to fetch questions, generate answers using GPT-4o, and submit.
	3. You will see your score and submitted answers below.
	""")

	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

	# --- Entry Point ---
	if __name__ == "__main__":
	print("\n" + "-"30 + " App Starting " + "-"30)
	space_host = os.getenv("SPACE_HOST")
	space_id = os.getenv("SPACE_ID")

	if space_host:
	print(f"✅ SPACE_HOST found: https://{space_host}.hf.space")
	else:
	print("ℹ️ SPACE_HOST not found.")

	if space_id:
	print(f"✅ SPACE_ID found: https://huggingface.co/spaces/{space_id}")
	else:
	print("ℹ️ SPACE_ID not found.")

	print("-"*(60 + len(" App Starting ")) + "\n")
	demo.launch(debug=True, share=False)