Final_Assignment_Template3

Sleeping

App Files Files Community

Final_Assignment_Template3 / app.py

kpbotla

Upload app.py

c27ae1c verified 6 months ago

raw

history blame contribute delete

5.61 kB


	import os
	import gradio as gr
	import requests
	import pandas as pd
	from duckduckgo_search import DDGS
	from transformers import pipeline
	from smolagents import tool

	@tool
	def web_search(query: str) -> str:
	"""
	Searches for up-to-date facts, biased toward Wikipedia for accuracy.

	Args:
	query (str): The user's factual question.

	Returns:
	str: Best matching fact and URL.
	"""
	refined = f"{query} site:en.wikipedia.org"
	with DDGS() as ddgs:
	results = ddgs.text(refined)
	for r in results[:5]:
	if "wikipedia.org" in r["href"].lower():
	snippet = r.get("body") or r.get("content") or r.get("snippet", "")
	if snippet:
	return f"{snippet}\n\nSource: [{r['href']}]({r['href']})"
	return "Could not find a direct answer from Wikipedia."

	@tool
	def cite(input: str) -> str:
	"""
	Formats a response and URL into a markdown citation.

	Args:
	input (str): A string like 'answer \|\|\| source-url'.

	Returns:
	str: Answer followed by markdown citation.
	"""
	try:
	answer, url = input.split("\|\|\|")
	return f"{answer.strip()}\n\nSource: [{url.strip()}]({url.strip()})"
	except:
	return "Could not format citation."

	@tool
	def python(code: str) -> str:
	"""
	Evaluates math expressions using Python sandboxed eval.

	Args:
	code (str): A math expression or calculation.

	Returns:
	str: The result or error.
	"""
	try:
	result = str(eval(code, {"__builtins__": {}}))
	return f"Answer: {result}"
	except Exception as e:
	return f"Error: {str(e)}"

	@tool
	def fallback(_: str) -> str:
	"""
	Handles unclear or unanswerable queries politely.

	Args:
	_ (str): Unused.

	Returns:
	str: A polite fallback message.
	"""
	return "Sorry, I couldn't confidently answer that. Could you rephrase?"

	class BasicAgent:
	def __call__(self, question: str) -> str:
	q = question.lower()

	try:
	if "\|\|\|" in question:
	return cite(question)
	if any(op in q for op in ["+", "-", "*", "/"]) and any(c.isdigit() for c in q):
	return python(question)
	if len(q.split()) < 3:
	return fallback(question)
	return web_search(question)
	except Exception as e:
	return f"Agent error: {str(e)}"

	# --- Evaluation Logic ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	space_id = os.getenv("SPACE_ID")
	if profile:
	username = profile.username
	else:
	return "Please Login to Hugging Face with the button.", None

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	try:
	agent = BasicAgent()
	except Exception as e:
	return f"Error initializing agent: {e}", None

	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	except Exception as e:
	return f"Error fetching questions: {e}", None

	results_log = []
	answers_payload = []

	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or question_text is None:
	continue
	try:
	submitted_answer = agent(question_text)
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
	except Exception as e:
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

	if not answers_payload:
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	submission_data = {
	"username": username.strip(),
	"agent_code": agent_code,
	"answers": answers_payload
	}

	try:
	response = requests.post(submit_url, json=submission_data, timeout=60)
	response.raise_for_status()
	result_data = response.json()
	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	results_df = pd.DataFrame(results_log)
	return final_status, results_df
	except Exception as e:
	return f"Submission Failed: {e}", pd.DataFrame(results_log)

	# --- Gradio UI ---
	with gr.Blocks() as demo:
	gr.Markdown("# Smart Agent Evaluation Runner")
	gr.Markdown("""
	Instructions:
	1. Login to your HF account using the button.
	2. Click 'Run Evaluation & Submit All Answers' to test your agent.
	""")

	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

	if __name__ == "__main__":
	demo.launch()