Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / app.py

i-dhilip

Update app.py

45fb8fa verified 9 months ago

raw

history blame contribute delete

19.6 kB

	import os
	import gradio as gr
	import requests
	import inspect
	import pandas as pd
	from dotenv import load_dotenv
	from typing import TypedDict, Annotated, Sequence, List, Dict, Any, Optional
	import operator

	from langchain_community.tools.tavily_search import TavilySearchResults
	from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
	from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
	from langchain_community.tools.arxiv.tool import ArxivQueryRun
	from langchain_community.utilities.arxiv import ArxivAPIWrapper

	from langgraph.graph import StateGraph, END

	from langchain_core.messages import BaseMessage, FunctionMessage, HumanMessage, AIMessage, SystemMessage
	from langchain_openai import ChatOpenAI

	# --- Constants ---
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# --- Environment Setup ---
	load_dotenv()

	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
	TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") # Assuming Tavily might also need an API key

	if not OPENROUTER_API_KEY:
	print("Warning: OPENROUTER_API_KEY not found in .env file. The LLM will not function.")

	# --- Tool Setup ---
	tools = []
	if TAVILY_API_KEY:
	tavily_tool = TavilySearchResults(max_results=3, api_key=TAVILY_API_KEY)
	tools.append(tavily_tool)
	else:
	print("Warning: TAVILY_API_KEY not found in .env file. TavilySearchResults tool will not be available.")

	wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(top_k_results=10, doc_content_chars_max=2000))
	tools.append(wikipedia_tool)
	arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper(top_k_results=10, doc_content_chars_max=2000))
	tools.append(arxiv_tool)

	# --- LangGraph Agent Definition ---
	class AgentState(TypedDict):
	messages: Annotated[Sequence[BaseMessage], operator.add]
	next_action: Optional[str] # To decide if we need to call tools or respond

	class LangGraphAgent:
	def __init__(self, llm_choice: str = "qwen"):
	print(f"LangGraphAgent initializing with {llm_choice}...")
	if not OPENROUTER_API_KEY:
	raise ValueError("OPENROUTER_API_KEY is not set. Cannot initialize LLM.")

	self.llm_choice = llm_choice
	self.supports_tool_calling = False # Default to false

	if llm_choice == "llama":
	self.llm = ChatOpenAI(
	model="meta-llama/llama-3.1-8b-instruct:free", # Corrected to Llama 3.1 as per user's earlier request
	api_key=OPENROUTER_API_KEY,
	base_url="https://openrouter.ai/api/v1",
	temperature=0.1,
	)
	# Llama 3.1 8B on OpenRouter might not support tool calling via the OpenAI SDK binding method
	self.supports_tool_calling = False
	print("Initialized Llama 3.1 8B Instruct (tool calling assumed NOT supported).")
	elif llm_choice == "qwen":
	self.llm = ChatOpenAI(
	model="qwen/qwen-2-7b-instruct:free", # Using a Qwen-2 model as qwq-32b might be older
	api_key=OPENROUTER_API_KEY,
	base_url="https://openrouter.ai/api/v1",
	temperature=0.1
	)
	# Qwen models on OpenRouter might not support tool calling via the OpenAI SDK binding method
	self.supports_tool_calling = False
	print("Initialized Qwen-2 7B Instruct (tool calling assumed NOT supported).")
	else:
	raise ValueError(f"Unsupported LLM choice: {llm_choice}. Choose 'llama', or 'qwen'.")

	self.tools_map = {tool.name: tool for tool in tools}
	self.graph = self._build_graph()
	print("LangGraphAgent initialized.")

	def _build_graph(self):
	workflow = StateGraph(AgentState)

	workflow.add_node("llm", self._call_llm)
	workflow.add_node("tools", self._tool_node)

	workflow.set_entry_point("llm")

	workflow.add_conditional_edges(
	"llm",
	self._should_call_tools,
	{
	"continue": "tools",
	"end": END
	}
	)
	workflow.add_edge("tools", "llm")
	return workflow.compile()

	def _should_call_tools(self, state: AgentState) -> str:
	print("LLM deciding next step...")
	if not self.supports_tool_calling:
	print("Tool calling not supported by the current LLM. Ending interaction.")
	return "end"

	last_message = state["messages"][-1]
	if hasattr(last_message, "tool_calls") and last_message.tool_calls:
	print(f"LLM decided to call tools: {last_message.tool_calls}")
	return "continue"
	print("LLM decided to end.")
	return "end"

	def _call_llm(self, state: AgentState) -> Dict[str, Any]:
	print(f"Calling LLM ({self.llm_choice})...")
	if self.supports_tool_calling:
	print("Binding tools to LLM for function calling.")
	llm_with_tools = self.llm.bind_tools(tools)
	response = llm_with_tools.invoke(state["messages"])
	else:
	print("Invoking LLM without binding tools.")
	response = self.llm.invoke(state["messages"])

	print(f"LLM response: {response.content[:100]}...")
	return {"messages": [response]}

	def _tool_node(self, state: AgentState) -> Dict[str, Any]:
	print("Executing tools...")
	tool_messages = []
	last_message = state["messages"][-1]

	if not hasattr(last_message, "tool_calls") or not last_message.tool_calls:
	print("No tool calls found in the last message.")
	# This case should ideally be handled by the conditional edge, but as a fallback:
	return {"messages": [AIMessage(content="No tools to call, proceeding.")]}

	for tool_call in last_message.tool_calls:
	tool_name = tool_call["name"]
	tool_args = tool_call["args"]
	print(f"Calling tool: {tool_name} with args: {tool_args}")
	if tool_name in self.tools_map:
	try:
	tool_result = self.tools_map[tool_name].invoke(tool_args)
	print(f"Tool {tool_name} result (first 100 chars): {str(tool_result)[:100]}...")
	tool_messages.append(FunctionMessage(content=str(tool_result), name=tool_name, tool_call_id=tool_call["id"]))
	except Exception as e:
	print(f"Error executing tool {tool_name}: {e}")
	tool_messages.append(FunctionMessage(content=f"Error executing tool {tool_name}: {e}", name=tool_name, tool_call_id=tool_call["id"]))
	else:
	print(f"Tool {tool_name} not found.")
	tool_messages.append(FunctionMessage(content=f"Tool {tool_name} not found.", name=tool_name, tool_call_id=tool_call["id"]))
	return {"messages": tool_messages}

	def __call__(self, question: str) -> str:
	print(f"Agent received question (first 100 chars): {question[:100]}...")

	system_prompt = (
	"You are an AI assistant designed to answer questions concisely. "
	"Your goal is to provide only the direct answer to the question, without any additional explanations, conversation, or prefixes like 'FINAL ANSWER:'. "
	"For example, if the question is 'What is the capital of France?', you should respond with 'Paris'. "
	"If the question asks for a list, provide it comma-separated, e.g., 'apple, banana, cherry'. "
	"If the question asks for a number, provide only the number, e.g., '42'."
	)
	initial_state = {"messages": [SystemMessage(content=system_prompt), HumanMessage(content=question)]}

	final_graph_state = None
	try:
	for event in self.graph.stream(initial_state, {"recursion_limit": 100}): # Added recursion limit
	if END in event:
	final_graph_state = event[END]
	break
	for key in event:
	if key != END:
	final_graph_state = event[key]

	if final_graph_state and final_graph_state["messages"]:
	for msg in reversed(final_graph_state["messages"]):
	if isinstance(msg, AIMessage) and not msg.tool_calls and msg.content: # Ensure content exists
	answer = msg.content.strip()
	if not answer: # Skip empty answers after initial stripping
	continue

	# Remove common prefixes that LLMs might add despite instructions
	prefixes_to_remove = [
	"FINAL ANSWER:", "The answer is", "Here is the answer:",
	"The final answer is", "Answer:", "Solution:",
	"The direct answer is", "Here's the concise answer:",
	"Here you go:", "Certainly, the answer is"
	]
	for prefix in prefixes_to_remove:
	# Case-insensitive prefix removal
	if answer.lower().startswith(prefix.lower()):
	answer = answer[len(prefix):].strip()

	# More robust quote stripping
	if answer.startswith(("\"", "'")) and answer.endswith(("\"", "'")):
	temp_answer = answer[1:-1]
	# Avoid stripping if it's a legitimately quoted string like "'quoted string'" as the answer itself
	if not (temp_answer.startswith(("\"", "'")) and temp_answer.endswith(("\"", "'"))):
	answer = temp_answer

	if not answer: # Check again if answer became empty after stripping
	continue

	print(f"Agent returning answer: {answer}")
	return answer

	# Refined fallback logic
	print("No suitable AI message with valid content found after processing. Attempting to return last raw AI message if available.")
	last_ai_msg_content = next((m.content.strip() for m in reversed(final_graph_state["messages"]) if isinstance(m, AIMessage) and m.content and not m.tool_calls), None)
	if last_ai_msg_content:
	print(f"Agent returning last raw AI message as fallback: {last_ai_msg_content}")
	return last_ai_msg_content

	print("No suitable AI message found for final answer, even as fallback.")
	return "Error: Agent could not extract a valid answer." # More specific error
	else:
	print("Error: Agent did not reach a final state or no messages found.")
	return "Error: Agent did not produce a conclusive answer."

	except Exception as e:
	print(f"Error during agent execution: {e}")
	import traceback
	traceback.print_exc()
	return f"Error during agent execution: {e}"

	# --- Main Evaluation Logic (Modified from starter) ---
	def run_and_submit_all(profile: gr.OAuthProfile \| None, llm_model_choice: str):
	"""
	Fetches all questions, runs the LangGraphAgent on them, submits all answers,
	and displays the results.
	"""
	space_id = os.getenv("SPACE_ID")

	if profile:
	username = f"{profile.username}"
	print(f"User logged in: {username}")
	else:
	print("User not logged in.")
	return "Please Login to Hugging Face with the button.", None

	if not OPENROUTER_API_KEY:
	return "Error: OPENROUTER_API_KEY not found. Please set it in your .env file.", None
	# TAVILY_API_KEY check is handled by the tool initialization itself with a warning.

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	print(f"Attempting to initialize agent with LLM: {llm_model_choice}")
	try:
	agent = LangGraphAgent(llm_choice=llm_model_choice)
	except Exception as e:
	print(f"Error instantiating agent: {e}")
	return f"Error initializing agent: {e}", None

	agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local_run_no_space_id"
	print(f"Agent code link: {agent_code}")

	print(f"Fetching questions from: {questions_url}")
	try:
	response = requests.get(questions_url, timeout=20)
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	print("Fetched questions list is empty.")
	return "Fetched questions list is empty or invalid format.", None
	print(f"Fetched {len(questions_data)} questions.")
	except requests.exceptions.RequestException as e:
	print(f"Error fetching questions: {e}")
	return f"Error fetching questions: {e}", None
	except requests.exceptions.JSONDecodeError as e:
	print(f"Error decoding JSON response from questions endpoint: {e}")
	print(f"Response text: {response.text[:500]}")
	return f"Error decoding server response for questions: {e}", None

	results_log = []
	answers_payload = []
	print(f"Running agent on {len(questions_data)} questions...")
	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or question_text is None:
	print(f"Skipping item with missing task_id or question: {item}")
	continue
	try:
	print(f"\n--- Processing Task ID: {task_id} ---")
	submitted_answer = agent(question_text)
	answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
	except Exception as e:
	print(f"Error running agent on task {task_id}: {e}")
	results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

	if not answers_payload:
	print("Agent did not produce any answers to submit.")
	return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

	submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
	status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username} '..."
	print(status_update)

	print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
	try:
	response = requests.post(submit_url, json=submission_data, timeout=60)
	response.raise_for_status()
	result_data = response.json()
	final_status = (
	f"Submission Successful!\n"
	f"User: {result_data.get('username')}\n"
	f"Overall Score: {result_data.get('score', 'N/A')}% "
	f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
	f"Message: {result_data.get('message', 'No message received.')}"
	)
	print("Submission successful.")
	results_df = pd.DataFrame(results_log)
	return final_status, results_df
	except requests.exceptions.HTTPError as e:
	error_detail = f"Server responded with status {e.response.status_code}."
	try:
	error_json = e.response.json()
	error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
	except requests.exceptions.JSONDecodeError:
	error_detail += f" Response: {e.response.text[:500]}"
	status_message = f"Submission Failed: {error_detail}"
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df
	except requests.exceptions.Timeout:
	status_message = "Submission Failed: The request timed out."
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df
	except requests.exceptions.RequestException as e:
	status_message = f"Submission Failed: Network error - {e}"
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df
	except Exception as e:
	status_message = f"An unexpected error occurred during submission: {e}"
	print(status_message)
	results_df = pd.DataFrame(results_log)
	return status_message, results_df

	with gr.Blocks() as demo:
	gr.Markdown("# LangGraph GAIA Agent Evaluation Runner")
	gr.Markdown(
	"""
	Instructions:
	1. Clone this space if you haven't already.
	2. Create a `.env` file in the root of your space with your API keys:
	```
	OPENROUTER_API_KEY="your_openrouter_api_key"
	TAVILY_API_KEY="your_tavily_api_key" # Optional, but TavilySearch tool won't work without it
	```
	3. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
	4. Select the LLM model you want the agent to use.
	5. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
	---
	Disclaimers:
	- Ensure your Hugging Face Space is public for the `agent_code` link to be verifiable.
	- Submitting all answers can take some time as the agent processes each question.
	- The agent will use the selected LLM. Note that only some models (e.g., llama) support tool/function calling. If a model without tool support is chosen for a task requiring tools, it may not perform optimally or might not use tools.
	"""
	)

	gr.LoginButton()

	llm_choice_dropdown = gr.Dropdown(
	choices=["llama", "qwen"],
	value="llama", # Default to llama as it supports tool calling
	label="Select LLM Model",
	info="Choose the Large Language Model for the agent."
	)

	run_button = gr.Button("Run Evaluation & Submit All Answers")

	status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_button.click(
	fn=run_and_submit_all,
	inputs=[llm_choice_dropdown], # Add llm_choice_dropdown as an input
	outputs=[status_output, results_table]
	)

	if __name__ == "__main__":
	print("\n" + "-"30 + " App Starting " + "-"30)
	space_host_startup = os.getenv("SPACE_HOST")
	space_id_startup = os.getenv("SPACE_ID")

	if space_host_startup:
	print(f"✅ SPACE_HOST found: {space_host_startup}")
	print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
	else:
	print("ℹ️ SPACE_HOST environment variable not found (running locally?).")

	if space_id_startup:
	print(f"✅ SPACE_ID found: {space_id_startup}")
	print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
	print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
	else:
	print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")

	print("-"*(60 + len(" App Starting ")) + "\n")

	print("Launching Gradio Interface for LangGraph GAIA Agent Evaluation...")
	demo.launch(debug=True, share=False)