Spaces:

agents-course
/

Final_Assignment_Template

Running

App Files Files Community

472

Final_Assignment_Template / app.py

divya1308

Update app.py

48979bf verified 11 days ago

Raw

History Blame

11.8 kB

	import os
	import re
	import json
	import tempfile
	from pathlib import Path

	import gradio as gr
	import requests
	import pandas as pd

	from smolagents import CodeAgent, DuckDuckGoSearchTool, VisitWebpageTool, tool
	from smolagents.models import InferenceClientModel


	# ============================================================
	# Constants
	# ============================================================
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"


	# ============================================================
	# Helper tools
	# ============================================================

	@tool
	def download_task_file(task_id: str) -> str:
	"""
	Download the file attached to a GAIA task and return the local file path.
	Use this when the question references an attached file/document/image/data file.
	Args:
	task_id: The task id of the GAIA question.
	Returns:
	Local file path of the downloaded file, or a message if no file is available.
	"""
	api_url = os.getenv("SCORING_API_URL", DEFAULT_API_URL)
	file_url = f"{api_url}/files/{task_id}"

	try:
	response = requests.get(file_url, timeout=60)
	if response.status_code != 200:
	return f"No downloadable file found for task {task_id}. HTTP {response.status_code}"

	content_type = response.headers.get("content-type", "").lower()

	# Try to infer extension
	ext = ""
	if "pdf" in content_type:
	ext = ".pdf"
	elif "json" in content_type:
	ext = ".json"
	elif "csv" in content_type:
	ext = ".csv"
	elif "text" in content_type:
	ext = ".txt"
	elif "html" in content_type:
	ext = ".html"
	elif "png" in content_type:
	ext = ".png"
	elif "jpeg" in content_type or "jpg" in content_type:
	ext = ".jpg"
	elif "excel" in content_type or "spreadsheet" in content_type:
	ext = ".xlsx"

	tmp_dir = tempfile.mkdtemp(prefix="gaia_task_")
	file_path = os.path.join(tmp_dir, f"{task_id}{ext}")

	with open(file_path, "wb") as f:
	f.write(response.content)

	return file_path
	except Exception as e:
	return f"Error downloading file for task {task_id}: {e}"


	@tool
	def read_local_text_file(file_path: str) -> str:
	"""
	Read a local text-like file and return its contents.
	Use this only for local TXT/JSON/CSV/HTML-like files after downloading them.
	Args:
	file_path: Path to a local file.
	Returns:
	File contents as text.
	"""
	try:
	path = Path(file_path)
	if not path.exists():
	return f"File not found: {file_path}"

	# Try UTF-8 first, then fallback
	try:
	return path.read_text(encoding="utf-8")
	except Exception:
	return path.read_text(errors="ignore")
	except Exception as e:
	return f"Error reading file {file_path}: {e}"


	# ============================================================
	# Agent
	# ============================================================

	SYSTEM_PROMPT = """
	You are solving a GAIA benchmark question.

	Rules:
	1. Think carefully and use tools when needed.
	2. If the question mentions an attached file, download it using the download_task_file tool.
	3. If a downloaded file is text/csv/json/html-like, inspect it with read_local_text_file.
	4. If web information is needed, use the search/browser tools.
	5. Return ONLY the final answer.
	6. Do NOT return explanations.
	7. Do NOT return the words "FINAL ANSWER".
	8. Do NOT add markdown, bullet points, or surrounding quotes unless the answer itself requires quotes.
	9. Keep the answer as short and exact as possible.
	"""

	class BasicAgent:
	def __init__(self):
	# You can change the model if needed, but this works well on HF Spaces
	# and avoids the old HfApiModel import issue.
	model_id = os.getenv("MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")

	self.model = InferenceClientModel(
	model_id=model_id,
	token=os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN"),
	)

	self.agent = CodeAgent(
	tools=[
	DuckDuckGoSearchTool(),
	VisitWebpageTool(),
	download_task_file,
	read_local_text_file,
	],
	model=self.model,
	additional_authorized_imports=[
	"json",
	"re",
	"math",
	"statistics",
	"csv",
	"pandas",
	"pathlib",
	],
	max_steps=12,
	verbosity_level=1,
	)

	print(f"BasicAgent initialized with model: {model_id}")

	def clean_final_answer(self, answer: str) -> str:
	"""
	Clean the model output for exact-match scoring.
	"""
	if answer is None:
	return ""

	answer = str(answer).strip()

	# Remove common prefixes the model may add
	answer = re.sub(r"^\sFINAL ANSWER\s[:\-]?\s*", "", answer, flags=re.IGNORECASE)
	answer = re.sub(r"^\sAnswer\s[:\-]?\s*", "", answer, flags=re.IGNORECASE)
	answer = re.sub(r"^\sThe answer is\s", "", answer, flags=re.IGNORECASE)

	# Remove enclosing markdown/code fences if any
	answer = answer.strip().strip("`").strip()

	# If it returns quoted answer like "Paris", remove only outer quotes
	if len(answer) >= 2 and (
	(answer.startswith('"') and answer.endswith('"')) or
	(answer.startswith("'") and answer.endswith("'"))
	):
	answer = answer[1:-1].strip()

	return answer.strip()

	def __call__(self, question: str, task_id: str \| None = None) -> str:
	"""
	Run the agent on a question and return a clean final answer.
	"""
	prompt = f"{SYSTEM_PROMPT}\n\nTask ID: {task_id}\nQuestion:\n{question}\n"
	print(f"Running agent for task_id={task_id}")

	try:
	result = self.agent.run(prompt)
	cleaned = self.clean_final_answer(result)
	print(f"Agent raw result: {result}")
	print(f"Agent cleaned result: {cleaned}")
	return cleaned
	except Exception as e:
	print(f"Agent failed on task {task_id}: {e}")
	return f"ERROR: {e}"


	# ============================================================
	# Main runner
	# ============================================================

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	"""
	Fetch all questions, run the agent, submit answers, and display results.
	"""
	space_id = os.getenv("SPACE_ID")
	api_url = os.getenv("SCORING_API_URL", DEFAULT_API_URL)

	if profile:
	username = profile.username.strip()
	print(f"User logged in: {username}")
	else:
	return "Please login to Hugging Face first.", None

	if not space_id:
	# Fallback so submission still works locally if needed
	agent_code = "LOCAL_RUN_NO_SPACE_ID"
	print("SPACE_ID not found. Using LOCAL_RUN_NO_SPACE_ID")
	else:
	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	# 1) Build agent
	try:
	agent = BasicAgent()
	except Exception as e:
	return f"Error initializing agent: {e}", None

	# 2) Fetch questions
	print(f"Fetching questions from {questions_url}")
	try:
	response = requests.get(questions_url, timeout=60)
	response.raise_for_status()
	questions_data = response.json()

	if not isinstance(questions_data, list) or len(questions_data) == 0:
	return "Questions endpoint returned empty/invalid data.", None

	print(f"Fetched {len(questions_data)} questions.")
	except Exception as e:
	return f"Error fetching questions: {e}", None

	# 3) Solve questions
	answers_payload = []
	results_log = []

	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question", "")

	if not task_id or not question_text:
	results_log.append({
	"Task ID": item.get("task_id", "UNKNOWN"),
	"Question": item.get("question", ""),
	"Submitted Answer": "SKIPPED: Missing task_id or question"
	})
	continue

	try:
	submitted_answer = agent(question_text, task_id=task_id)
	except Exception as e:
	submitted_answer = f"ERROR: {e}"

	answers_payload.append({
	"task_id": task_id,
	"submitted_answer": str(submitted_answer).strip()
	})

	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Submitted Answer": submitted_answer
	})

	if not answers_payload:
	return "No answers were generated.", pd.DataFrame(results_log)

	# 4) Submit
	submission_data = {
	"username": username,
	"agent_code": agent_code,
	"answers": answers_payload
	}

	print("Submitting payload...")
	print(json.dumps({
	"username": username,
	"agent_code": agent_code,
	"answers_count": len(answers_payload)
	}, indent=2))

	try:
	response = requests.post(submit_url, json=submission_data, timeout=180)
	response.raise_for_status()
	result_data = response.json()

	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username', username)}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)

	return final_status, pd.DataFrame(results_log)

	except requests.exceptions.HTTPError as e:
	detail = f"HTTP {e.response.status_code}"
	try:
	detail_json = e.response.json()
	detail += f" \| {detail_json}"
	except Exception:
	detail += f" \| {e.response.text[:1000]}"
	return f"Submission failed: {detail}", pd.DataFrame(results_log)

	except Exception as e:
	return f"Submission failed: {e}", pd.DataFrame(results_log)


	# ============================================================
	# Gradio UI
	# ============================================================

	with gr.Blocks() as demo:
	gr.Markdown("# GAIA Unit 4 Agent Evaluation Runner")
	gr.Markdown(
	"""
	Instructions
	1. Login with your Hugging Face account.
	2. Click Run Evaluation & Submit All Answers.
	3. The app will fetch questions, run the agent, and submit the answers.
	"""
	)

	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")

	status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	print("\n" + "-" * 30 + " App Starting " + "-" * 30)

	space_host = os.getenv("SPACE_HOST")
	space_id = os.getenv("SPACE_ID")

	if space_host:
	print(f"SPACE_HOST: {space_host}")
	else:
	print("SPACE_HOST not found.")

	if space_id:
	print(f"SPACE_ID: {space_id}")
	print(f"Repo Tree URL: https://huggingface.co/spaces/{space_id}/tree/main")
	else:
	print("SPACE_ID not found.")

	print("Launching app...")
	demo.launch(debug=True)