Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / app.py

pmeyhoefer

Update app.py

35bdc51 verified 9 months ago

raw

history blame contribute delete

7.68 kB

	import os
	import logging
	import traceback
	import gradio as gr
	import requests
	import pandas as pd
	from smolagents import CodeAgent, tool
	from smolagents.models import OpenAIServerModel

	# Setup logging
	logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
	logger = logging.getLogger(__name__)

	# Constants
	SUBMISSION_URL = "https://agents-course-unit4-scoring.hf.space"
	GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
	if not GITHUB_TOKEN:
	raise ValueError("CRITICAL: GITHUB_TOKEN environment variable not set.")
	GITHUB_ENDPOINT = "https://models.github.ai/inference"
	MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-4o-mini")

	@tool
	def wikipedia_lookup(page_title: str) -> str:
	"""
	Fetches the summary intro text of an English Wikipedia page. Use exact titles.

	Args:
	page_title (str): The exact title of the Wikipedia page (e.g., 'Albert Einstein').
	"""
	page_safe = page_title.replace(" ", "_")
	logger.info(f"Wikipedia lookup: '{page_title}'")
	try:
	url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_safe}"
	headers = {'User-Agent': f'GAIAgent/1.2 ({os.getenv("SPACE_ID", "unknown")})'}
	r = requests.get(url, headers=headers, timeout=15)
	r.raise_for_status()
	data = r.json()

	if extract := data.get("extract", ""):
	return extract

	title = data.get("title", page_title)
	if data.get("type") == "disambiguation":
	return f"Wikipedia Error: '{title}' is a disambiguation page. Try a more specific title."
	return f"Wikipedia Error: Page '{title}' found but has no summary."
	except requests.exceptions.HTTPError as e:
	status_code = e.response.status_code
	return f"Wikipedia Error: {'Page not found' if status_code == 404 else f'HTTP {status_code}'} for '{page_title}'."
	except Exception as e:
	return f"Wikipedia Error: {e}"

	# Agent prompt - updated to mention only Wikipedia tool
	REACT_INSTRUCTION_PROMPT = """You are a helpful assistant using tools to answer questions.
	Available Tools:
	- wikipedia_lookup(page_title: str): Looks up a specific English Wikipedia page. Use exact titles (e.g., 'Berlin').
	Follow these steps:
	1. Thought: Plan which tool to use and why.
	2. Action: Call the tool (e.g., wikipedia_lookup(page_title="...")).
	3. Observation: Record the result.
	4. Thought: Analyze result. If answered, prepare final answer. If not, plan next step.
	5. Repeat Action/Observation/Thought until answered or determined impossible.
	6. Thought: Summarize findings based ONLY on observations.
	7. Final Answer: Provide the answer starting exactly with "FINAL ANSWER: " using the required format (number, short string, or comma-separated list).
	Formatting Rules for FINAL ANSWER:
	- Numbers: Just the number (e.g., `42`).
	- Strings: Minimal words, no articles. Digits as words (e.g., `seven`).
	- Lists: Comma-separated (e.g., `paris,london,three`).
	Let's begin!
	"""

	# Initialize LLM and agent
	logger.info(f"Initializing LLM and agent: {MODEL_ID}")
	try:
	llm_model = OpenAIServerModel(
	model_id=MODEL_ID,
	api_key=GITHUB_TOKEN,
	api_base=GITHUB_ENDPOINT
	)

	agent = CodeAgent(
	tools=[wikipedia_lookup], # Only Wikipedia tool
	model=llm_model
	)
	logger.info("Agent initialization complete")
	except Exception as e:
	logger.exception("CRITICAL: Agent initialization failed")
	raise RuntimeError(f"Agent initialization failed: {e}") from e

	def run_agent_on_question(question: str) -> str:
	"""Run the agent on a question and return the result."""
	question = question.strip()
	if not question:
	return "AGENT_ERROR: Empty question"

	logger.info(f"Running agent on: '{question}'")
	try:
	return agent.run(f"{REACT_INSTRUCTION_PROMPT.strip()}\n\nQUESTION: {question}")
	except Exception as e:
	logger.exception("Agent run failed")
	return f"AGENT_ERROR: {e}\n{traceback.format_exc()}"

	def evaluate_and_submit():
	"""Evaluate all questions and submit answers."""
	logger.info("🚀 Starting evaluation...")
	username = os.getenv("HF_USERNAME", "unknown_user")

	# Fetch questions
	try:
	questions = requests.get(f"{SUBMISSION_URL}/questions", timeout=20).json()
	if not isinstance(questions, list):
	raise ValueError("Invalid response format")
	logger.info(f"✅ Fetched {len(questions)} questions")
	except Exception as e:
	logger.exception("Failed to fetch questions")
	return f"❌ Error fetching questions: {e}", pd.DataFrame()

	if not questions:
	return "ℹ️ No questions received", pd.DataFrame()

	# Process questions
	results_log = []
	answers_payload = []

	for i, item in enumerate(questions):
	task_id, question_text = item.get("task_id"), item.get("question")
	if not task_id or not question_text:
	continue

	logger.info(f"Processing Q{i+1}/{len(questions)}: ID={task_id}")
	raw_output = run_agent_on_question(question_text)

	# Extract final answer
	if "FINAL ANSWER:" in raw_output:
	final_answer = raw_output.split("FINAL ANSWER:", 1)[1].strip()
	elif "AGENT_ERROR:" in raw_output:
	final_answer = raw_output
	else:
	final_answer = "AGENT_ERROR: No final answer found"

	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Submitted Answer": final_answer,
	"Full Output": raw_output
	})
	answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})

	results_df = pd.DataFrame(results_log)
	if not answers_payload:
	return "⚠️ No answers generated", results_df

	# Submit answers
	logger.info(f"Submitting {len(answers_payload)} answers...")
	space_id = os.getenv("SPACE_ID", "NA")
	agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id != "NA" else "NA"

	try:
	response = requests.post(
	f"{SUBMISSION_URL}/submit",
	json={"username": username, "agent_code": agent_code_url, "answers": answers_payload},
	timeout=90
	).json()

	score = response.get('score', 'N/A')
	score_str = f"{float(score):.2f}%" if isinstance(score, (int, float)) else str(score)
	return (f"✅ Success! Score: {score_str} "
	f"({response.get('correct_count','?')}/{response.get('total_attempted','?')}). "
	f"Msg: {response.get('message','')}"), results_df
	except Exception as e:
	err_msg = f"❌ Submission Failed: {e}"
	if hasattr(e, 'response') and e.response:
	err_msg += f" \| Response: {e.response.text[:300]}"
	return err_msg, results_df

	# Gradio interface
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🚀 Agent Evaluation Runner 🚀\nEnsure `GITHUB_TOKEN` secret is set. Click Run to start.")
	run_button = gr.Button("▶️ Run Evaluation & Submit All Answers", variant="primary")
	status_box = gr.Textbox(label="📊 Status", lines=4, interactive=False)
	results_display = gr.DataFrame(
	label="📋 Detailed Log",
	headers=["Task ID", "Question", "Submitted Answer", "Full Output"],
	wrap=True,
	column_widths=["10%", "25%", "20%", "45%"]
	)
	run_button.click(fn=evaluate_and_submit, outputs=[status_box, results_display])

	if __name__ == "__main__":
	logger.info("Launching Gradio application...")
	demo.launch(debug=True, share=False)