Final_Assignment_Template2

Sleeping

App Files Files Community

Final_Assignment_Template2 / app.py

lethaq

Update app.py

bc80784 verified 7 months ago

raw

history blame contribute delete

4.11 kB


	"""Enhanced Agent Evaluation Runner with simplified Agent integration"""
	import os
	import time
	import gradio as gr
	import requests
	import pandas as pd
	from dotenv import load_dotenv
	from agent import Agent
	agent = Agent()

	load_dotenv()

	# 常量
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	"""
	Fetches all questions, runs the Agent on them, submits all answers,
	and displays the results.
	"""
	# 登录检查
	if not profile:
	return "Please Login to Hugging Face with the button.", None
	username = profile.username

	# 初始化你的简易 Agent

	# 组装提交相关 URL
	space_id = os.getenv("SPACE_ID")
	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
	questions_url = f"{DEFAULT_API_URL}/questions"
	submit_url = f"{DEFAULT_API_URL}/submit"

	# 1. 拉取题目
	try:
	resp = requests.get(questions_url, timeout=20)
	resp.raise_for_status()
	questions_data = resp.json()
	if not questions_data:
	return "No questions received from server.", None
	except Exception as e:
	return f"Error fetching questions: {e}", None

	# 2. 遍历题目并调用 Agent 获取答案
	results_log = []
	answers_payload = []

	for item in questions_data:
	task_id = item.get("task_id")
	question = item.get("question")
	if not task_id or question is None:
	continue
	try:
	# 只调用一次，带 task_id
	answer = agent(question, task_id=task_id)

	answers_payload.append({
	"task_id": task_id,
	"submitted_answer": answer
	})
	results_log.append({
	"Task ID": task_id,
	"Question": question,
	"Submitted Answer": answer
	})

	time.sleep(0.3) # 小延迟防止 QPS 超限
	except Exception as e:
	err = f"ERROR: {e}"
	answers_payload.append({"task_id": task_id, "submitted_answer": err})
	results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": err})

	if not answers_payload:
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	# 3. 提交答案
	submission_data = {
	"username": username.strip(),
	"agent_code": agent_code,
	"answers": answers_payload
	}
	try:
	post = requests.post(submit_url, json=submission_data, timeout=60)
	post.raise_for_status()
	data = post.json()
	status = (
	f"✅ Submission Successful!\n"
	f"User: {data.get('username')}\n"
	f"Score: {data.get('score','N/A')}% "
	f"({data.get('correct_count','?')}/{data.get('total_attempted','?')})\n"
	f"Message: {data.get('message','No additional message.')}"
	)
	return status, pd.DataFrame(results_log)
	except Exception as e:
	return f"❌ Submission Failed: {e}", pd.DataFrame(results_log)

	# --- Gradio 界面 ---
	with gr.Blocks(title="Simplified GAIA Agent Evaluation") as demo:
	gr.Markdown("# Simplified GAIA Agent Evaluation Runner")
	gr.Markdown("""
	Instructions:
	1. Set your `GOOGLE_API_KEY` in the environment variables.
	2. Log in to your Hugging Face account using the button below.
	3. Click Run Evaluation & Submit All Answers to start.

	This runner uses:
	- A custom `agent.py` for answering GAIA questions.
	- Gradio for UI.
	- HTTP requests to fetch & submit answers.
	""")
	gr.LoginButton()

	run_btn = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
	status_out = gr.Textbox(label="Status / Results", lines=6, interactive=False)
	table_out = gr.DataFrame(label="Questions and Answers", wrap=True)

	run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])

	if __name__ == "__main__":
	demo.launch(debug=True, share=False)