hs-unit4-agent

Sleeping

App Files Files Community

hs-unit4-agent / app.py

HarsimranjitSingh

Update app.py

fcac5e2 verified 9 months ago

raw

history blame contribute delete

15.1 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	import logging

	# Configure logging for more detailed output
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# --- LangChain Imports ---
	from langchain_community.tools import DuckDuckGoSearchRun
	from langchain.agents import AgentExecutor, create_react_agent
	from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
	from langchain_core.prompts import PromptTemplate
	from langchain_core.tools import Tool
	from langchain_core.messages import HumanMessage

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# --- Basic Agent Definition ---
	class BasicAgent:
	def __init__(self):
	logger.info("Initializing LangChain-based Agent...")

	hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
	if not hf_token:
	logger.error("HUGGINGFACEHUB_API_TOKEN not found in environment variables.")
	logger.error("Please add it as a secret in your Hugging Face Space settings.")
	self.llm = None
	else:
	try:
	# Using Mistral-7B-Instruct-v0.2 with text-generation task,
	# which is a good general-purpose model and task type.
	logger.info(f"Attempting to initialize ChatHuggingFace with repo_id: mistralai/Mistral-7B-Instruct-v0.2. Token starts with: {hf_token[:5]}*****")
	self.llm = ChatHuggingFace(
	llm=HuggingFaceEndpoint(
	repo_id="mistralai/Mistral-7B-Instruct-v0.2",
	temperature=0.1,
	huggingfacehub_api_token=hf_token,
	task="text-generation", # Mistral-Instruct often works well with this task
	max_new_tokens=512,
	)
	)
	logger.info(f"ChatHuggingFace LLM instance created successfully for repo_id: mistralai/Mistral-7B-Instruct-v0.2")

	# Perform a small test of the LLM instance to ensure it's functional
	logger.info("Performing a small test inference with the initialized ChatHuggingFace LLM.")
	try:
	test_message = HumanMessage(content="What is the capital of France?")
	response_from_llm_test = self.llm.invoke([test_message])
	if response_from_llm_test and response_from_llm_test.content:
	logger.info(f"LLM test inference successful. Response snippet: '{response_from_llm_test.content[:50]}...'")
	else:
	logger.error("LLM test inference returned an empty or invalid response.")
	self.llm = None
	raise ValueError("LLM test inference failed to return content.")
	except Exception as test_e:
	logger.error(f"LLM POST-INITIALIZATION INFERENCE TEST FAILED: {test_e}")
	self.llm = None
	raise

	logger.info("ChatHuggingFace LLM fully initialized and tested.")

	except Exception as e:
	logger.exception(f"CRITICAL ERROR during LLM (ChatHuggingFace/HuggingFaceEndpoint) initialization: {e}")
	self.llm = None

	# --- Define Tools ---
	self.tools = [
	DuckDuckGoSearchRun(
	name="web_search",
	description="A useful tool for searching the internet for information, especially for current events or factual queries. Use this when you need to find specific data or verify facts."
	)
	]

	# --- Create Agent Prompt ---
	self.agent_prompt = PromptTemplate.from_template(
	"""Answer the following question as concisely and directly as possible.
	If the question requires factual information, use your tools to find it.
	If the question requires a numerical answer, provide only the number.
	If the question requires a specific string, provide only that string.
	Do not include any conversational filler, explanations, or extra text unless explicitly asked.

	You have access to the following tools:
	{tools}

	Use the following format:

	Question: the input question you must answer
	Thought: you should always think about what to do
	Action: the action to take, should be one of [{tool_names}]
	Action Input: the input to the action
	Observation: the result of the action
	... (this Thought/Action/Action Input/Observation can repeat N times)
	Thought: I now know the final answer
	Final Answer: the final answer to the original input question. Provide only the final answer without any additional text or explanations.

	Question: {input}
	Thought:{agent_scratchpad}
	"""
	)

	# --- Initialize the Agent Executor ---
	if self.llm:
	try:
	self.agent = create_react_agent(self.llm, self.tools, self.agent_prompt)
	self.agent_executor = AgentExecutor(
	agent=self.agent,
	tools=self.tools,
	verbose=True,
	handle_parsing_errors=True,
	max_iterations=7,
	# early_stopping_method="generate" # <-- REMOVED THIS LINE
	)
	logger.info("LangChain Agent Executor initialized successfully.")
	except Exception as e:
	logger.exception(f"ERROR creating agent or agent executor: {e}")
	self.agent_executor = None
	else:
	self.agent_executor = None
	logger.warning("Agent Executor not initialized because LLM failed to initialize.")

	def __call__(self, question: str) -> str:
	logger.info(f"\n--- Agent received question: {question[:150]}...")

	if not self.agent_executor:
	logger.error("Agent not configured. Returning placeholder answer.")
	return "AGENT_INIT_ERROR: Agent not configured due to prior LLM/agent setup failure. Check Space logs."

	try:
	response = self.agent_executor.invoke({"input": question})
	final_answer = response.get('output', "No answer generated by agent.")
	final_answer = str(final_answer).strip()
	logger.info(f"--- Agent returning answer: '{final_answer[:150]}'")
	return final_answer
	except Exception as e:
	logger.exception(f"ERROR during agent execution for question: {question[:50]}... Error: {e}")
	return f"AGENT_EXECUTION_ERROR: {e}"

	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	space_id = os.getenv("SPACE_ID")
	if profile:
	username= f"{profile.username}"
	logger.info(f"User logged in: {username}")
	else:
	logger.warning("User not logged in.")
	return "Please Login to Hugging Face with the button.", None

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	# 1. Instantiate Agent
	try:
	agent = BasicAgent()
	if not agent.llm or not agent.agent_executor:
	return "Agent could not be initialized. HUGGINGFACEHUB_API_TOKEN might be missing or invalid, or there's an issue with the LLM/Agent setup. Check Space logs for details.", None
	except Exception as e:
	logger.exception(f"Critical error during agent instantiation: {e}")
	return f"Error initializing agent: {e}", None

	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
	logger.info(agent_code)

	# 2. Fetch Questions
	logger.info(f"Fetching questions from: {questions_url}")
	try:
	response = requests.get(questions_url, timeout=30)
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	logger.warning("Fetched questions list is empty.")
	return "Fetched questions list is empty or invalid format.", None
	logger.info(f"Fetched {len(questions_data)} questions.")
	except requests.exceptions.RequestException as e:
	logger.error(f"Error fetching questions: {e}")
	return f"Error fetching questions: {e}", None
	except requests.exceptions.JSONDecodeError as e:
	logger.error(f"Error decoding JSON response from questions endpoint: {e}")
	logger.error(f"Response text: {response.text[:500]}")
	return f"Error decoding server response for questions: {e}", None
	except Exception as e:
	logger.exception(f"An unexpected error occurred fetching questions: {e}")
	return f"An unexpected error occurred fetching questions: {e}", None

	# 3. Run your Agent
	results_log = []
	answers_payload = []
	logger.info(f"Running agent on {len(questions_data)} questions...")
	for i, item in enumerate(questions_data):
	task_id = item.get("task_id")
	question_text = item.get("question")
	logger.info(f"\nProcessing question {i+1}/{len(questions_data)}: Task ID {task_id}")

	if not task_id or question_text is None:
	logger.warning(f"Skipping item with missing task_id or question: {item}")
	continue
	try:
	submitted_answer = agent(question_text)
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
	except Exception as e:
	logger.exception(f"Error running agent on task {task_id}: {e}")
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

	if not answers_payload:
	logger.warning("Agent did not produce any answers to submit.")
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	# 4. Prepare Submission
	submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
	status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
	logger.info(status_update)

	# 5. Submit
	logger.info(f"Submitting {len(answers_payload)} answers to: {submit_url}")
	try:
	response = requests.post(submit_url, json=submission_data, timeout=120)
	response.raise_for_status()
	result_data = response.json()
	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	logger.info("Submission successful.")
	results_df = pd.DataFrame(results_log)
	return final_status, results_df
	except requests.exceptions.HTTPError as e:
	error_detail = f"Server responded with status {e.response.status_code}."
	try:
	error_json = e.response.json()
	error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
	except requests.exceptions.JSONDecodeError:
	error_detail += f" Response: {e.response.text[:500]}"
	status_message = f"Submission Failed: {error_detail}"
	logger.error(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df
	except requests.exceptions.Timeout:
	status_message = "Submission Failed: The request timed out."
	logger.error(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df
	except requests.exceptions.RequestException as e:
	status_message = f"Submission Failed: Network error - {e}"
	logger.error(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df
	except Exception as e:
	status_message = f"An unexpected error occurred during submission: {e}"
	logger.exception(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df

	# --- Build Gradio Interface using Blocks ---
	with gr.Blocks() as demo:
	gr.Markdown("# Advanced Agent Evaluation Runner for GAIA Benchmark")
	gr.Markdown(
	"""
	Instructions:
	1. Clone this space to your account.
	2. Add `HUGGINGFACEHUB_API_TOKEN` as a Space Secret under the "Settings" tab of your duplicated Space. This is crucial for the agent to use the Hugging Face Inference API.
	3. Modify the `BasicAgent` class in `app.py` to refine its logic, add more tools (like a calculator or file reader), and optimize its prompting to improve GAIA scores.
	4. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
	5. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see your score.
	---
	Disclaimers:
	The submission process can take time as the agent processes multiple questions. This setup provides a foundational framework; continuous improvement in agent logic and tool integration is key to higher GAIA scores. Consider strategies like answer caching or asynchronous processing for larger benchmarks.
	"""
	)
	gr.LoginButton()
	run_button = gr.Button("Run Evaluation & Submit All Answers")
	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
	run_button.click(
	fn=run_and_submit_all,
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	logger.info("\n" + "-"30 + " App Starting " + "-"30)
	space_host_startup = os.getenv("SPACE_HOST")
	space_id_startup = os.getenv("SPACE_ID")
	if space_host_startup:
	logger.info(f"✅ SPACE_HOST found: {space_host_startup}")
	logger.info(f" Runtime URL should be: https://{space_host_startup}.hf.space")
	else:
	logger.info("ℹ️ SPACE_HOST environment variable not found (running locally?).")
	if space_id_startup:
	logger.info(f"✅ SPACE_ID found: {space_id_startup}")
	logger.info(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
	logger.info(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
	else:
	logger.info("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
	logger.info("-"*(60 + len(" App Starting ")) + "\n")
	logger.info("Launching Gradio Interface for Basic Agent Evaluation...")
	demo.launch(debug=True, share=False)