Final_Assignment_Template

Runtime error

App Files Files Community

Final_Assignment_Template / agent.py

gokul-pv

prompt update and cleanup

7fbe1a5 5 months ago

raw

history blame contribute delete

13.4 kB

	import math
	import re
	import requests
	import pandas as pd
	import base64

	from markdownify import markdownify
	from requests.exceptions import RequestException
	from typing import TypedDict, Annotated, Optional, Any

	from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
	from langchain_core.tools import tool
	from langchain_ollama import ChatOllama
	from langgraph.graph.message import add_messages
	from langgraph.graph import START, StateGraph
	from langgraph.prebuilt import ToolNode, tools_condition
	from langchain_core.runnables.config import RunnableConfig
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain_groq import ChatGroq
	from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

	# Built-in LangChain tools
	from langchain_community.tools import (
	WikipediaQueryRun,
	DuckDuckGoSearchRun,
	ArxivQueryRun,
	ShellTool,
	)
	from langchain_community.utilities import (
	WikipediaAPIWrapper,
	DuckDuckGoSearchAPIWrapper,
	ArxivAPIWrapper,
	)
	from langchain_experimental.tools import PythonREPLTool


	# Initialize vision_llm at module level (commented out by default)
	# Uncomment and configure as needed
	# vision_llm = ChatOllama(
	# model="qwen2-vl:7b",
	# base_url="http://localhost:11434"
	# )
	vision_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

	# ============== CUSTOM TOOLS (not available in LangChain) ==============

	@tool
	def reverse_text(text: str) -> str:
	"""Reverse the given text character by character."""
	return text[::-1]


	@tool
	def reverse_words(text: str) -> str:
	"""Reverse the order of words in the given text."""
	return " ".join(text.split()[::-1])


	@tool
	def calculator(expression: str) -> str:
	"""Perform mathematical calculations safely. Supports basic arithmetic operations."""
	try:
	# Safe evaluation - only allow basic math operations
	allowed_chars = set('0123456789+-*/.() ')
	if all(c in allowed_chars for c in expression):
	result = eval(expression)
	return str(result)
	else:
	return "Error: Invalid characters in expression"
	except Exception as e:
	return f"Calculation error: {str(e)}"


	@tool
	def advanced_math(operation: str, num1: float, num2: Optional[float] = None) -> str:
	"""
	Perform advanced math operations like sqrt, log, sin, cos, tan, power.
	"""
	try:
	if operation == "sqrt":
	return str(math.sqrt(num1))
	elif operation == "log":
	return str(math.log(num1))
	elif operation == "sin":
	return str(math.sin(num1))
	elif operation == "cos":
	return str(math.cos(num1))
	elif operation == "tan":
	return str(math.tan(num1))
	elif operation == "power":
	if num2 is None:
	return "power operation requires two numbers"
	return str(math.pow(num1, num2))
	else:
	return f"Unknown operation: {operation}"
	except Exception as e:
	return f"Math error: {str(e)}"


	@tool
	def extract_text_multimodal(img_path: str) -> str:
	"""Extract text from image using multimodal LLM vision capabilities."""
	try:
	if 'vision_llm' not in globals():
	return "Error: Vision LLM not configured. Please uncomment and configure vision_llm."

	with open(img_path, "rb") as image_file:
	image_bytes = image_file.read()

	image_base64 = base64.b64encode(image_bytes).decode("utf-8")

	message = [
	HumanMessage(
	content=[
	{
	"type": "text",
	"text": "Extract all the text from this image. Return only the extracted text, no explanations."
	},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/png;base64,{image_base64}"}
	}
	]
	)
	]

	response = vision_llm.invoke(message)
	return response.content.strip()
	except Exception as e:
	return f"Multimodal text extraction error: {str(e)}"


	@tool
	def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame:
	"""Read Excel file and return a pandas DataFrame."""
	try:
	if sheet_name:
	df = pd.read_excel(file_path, sheet_name=sheet_name)
	else:
	df = pd.read_excel(file_path)
	# summary = f"Shape: {df.shape}\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}"
	return df
	except Exception as e:
	# Return error as a string if loading fails
	return pd.DataFrame({"error": [f"Excel reading error: {str(e)}"]})


	@tool
	def visit_webpage(url: str) -> str:
	"""
	Visits a webpage at the given URL and returns its content as a markdown string.
	Use this to browse and extract readable content from webpages.
	"""
	try:
	response = requests.get(url, timeout=20)
	response.raise_for_status()
	markdown_content = markdownify(response.text).strip()
	markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
	MAX_LEN = 40000
	if len(markdown_content) > MAX_LEN:
	return (
	markdown_content[:MAX_LEN//2]
	+ f"\n\n...[Content truncated to {MAX_LEN} chars]...\n\n"
	+ markdown_content[-MAX_LEN//2:]
	)
	return markdown_content
	except requests.exceptions.Timeout:
	return "Timeout while trying to access the webpage."
	except RequestException as e:
	return f"Request error: {str(e)}"
	except Exception as e:
	return f"Unexpected error: {str(e)}"


	def build_tool():
	"""
	Initialize and return a list of built-in and custom LangChain tools.
	"""
	# Initialize built-in LangChain tools
	wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(doc_content_chars_max=2000))
	duckduckgo_search = DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(max_results=15))
	arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
	shell_tool = ShellTool()
	python_repl = PythonREPLTool()

	# Combine built-in tools with custom tools
	all_tools = [
	# Built-in LangChain tools
	wikipedia_tool,
	duckduckgo_search,
	arxiv_tool,
	shell_tool,
	python_repl,

	# Custom tools for specialized tasks
	reverse_text,
	reverse_words,
	calculator,
	advanced_math,
	extract_text_multimodal,
	read_excel_file,
	visit_webpage,
	]
	return all_tools


	class AgentState(TypedDict):
	input_file: Optional[str]
	messages: Annotated[list[AnyMessage], add_messages]


	def build_langgraph(provider: str, model: Optional[str] = None, temperature: float = 0.1, all_tools: Optional[list[Any]] = None) -> StateGraph:
	"""Builds and returns the LangGraph agent with the given provider."""

	if all_tools is None:
	all_tools = []

	# Select model and provider
	if provider == "google":
	llm = ChatGoogleGenerativeAI(model=model or "gemini-2.5-flash", temperature=temperature)
	elif provider == "groq":
	llm = ChatGroq(model=model or "qwen/qwen3-32b", temperature=temperature)
	elif provider == "huggingface":
	llm = ChatHuggingFace(
	llm=HuggingFaceEndpoint(
	repo_id=model or "meta-llama/Llama-3.1-8B-Instruct",
	temperature=temperature
	)
	)
	elif provider == "ollama":
	llm = ChatOllama(model=model or "qwen3:4b", base_url="http://localhost:11434", temperature=temperature)
	else:
	raise ValueError("Unsupported provider. Choose from 'google', 'groq', 'huggingface', or 'ollama'.")

	llm_with_tools = llm.bind_tools(all_tools)

	def assistant(state: AgentState):
	tools_description = """
	Available tools for the tasks:

	WEB & SEARCH:
	- duckduckgo_search: Search the web for information
	- wikipedia_tool: Search Wikipedia for knowledge
	- visit_webpage: Visit a webpage and extract readable markdown content
	- arxiv_tool: Search arXiv for research papers

	CALCULATIONS:
	- calculator: Basic arithmetic operations (+, -, *, /, etc.)
	- advanced_math: Advanced math functions (sqrt, log, trig)
	- python_repl: Execute Python code for complex computations

	TEXT PROCESSING:
	- reverse_text: Reverse text character by character
	- reverse_words: Reverse word order in text

	IMAGE PROCESSING:
	- extract_text_multimodal: Extract text using AI vision

	DATA ANALYSIS:
	- read_excel_file: Read and preview Excel files

	SYSTEM:
	- shell_tool: Execute shell commands (use carefully)
	"""

	file = state["input_file"]
	sys_msg = SystemMessage(
	content=(
	"You are an intelligent AI agent designed to solve complex problems using the tools provided.\n\n"
	"=== Available Tools ===\n"
	f"{tools_description}\n\n"
	"=== Optional Files ===\n"
	f"Currently loaded file: {file}\n\n"
	"=== Problem-Solving Process ===\n"
	"Follow these steps carefully when answering a question:\n"
	"1. Break the problem into smaller, manageable parts.\n"
	"2. Choose the most suitable tool for each part.\n"
	"3. Use multiple tools in sequence if needed.\n"
	"4. Verify your results and explain your reasoning clearly.\n\n"
	"Be precise and clear at every step. After your reasoning, provide ONLY the final answer.\n\n"
	"=== Final Answer Format Rules ===\n"
	"- For numbers: Use only digits (no commas or units) unless units are explicitly requested.\n"
	"- For strings: Do not use articles (a, an, the) or abbreviations. Spell out all digits.\n"
	"- For lists: Use commas to separate items. Apply the above number/string rules to each item.\n"
	"- If the answer is unknown: Respond exactly with \"do not know\"\n\n"
	"Example Question 1:\n\n"
	"If Eliud Kipchoge could maintain his marathon pace indefinitely, how many thousand hours would it take him to run from Earth to the Moon at its closest approach? Use the minimum perigee distance from Wikipedia and round to the nearest 1000 hours. Do not use commas.\n\n"
	"Example Answer 1:\n"
	"17\n\n"
	"Example Reasoning Steps 1:\n"
	"1. Found Eliud Kipchoge's marathon pace: 4 minutes 37 seconds per mile.\n"
	"2. Converted pace into hours per mile.\n"
	"3. Found Moon's closest distance: 225623 miles.\n"
	"4. Multiplied pace by distance to get total hours and rounded to nearest 1000.\n\n"
	"Example Question 2:\n\n"
	"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\n\n"
	"Example Answer 2:\n"
	"Yoshida, Uehara\n\n"
	"Example Reasoning Steps 2:\n"
	"1. Looked up Taishō Tamai on Wikipedia.\n"
	"2. Found the pitcher with number 18 is Kōsei Yoshida.\n"
	"3. Found the pitcher with number 20 is Kenta Uehara.\n\n"
	"Now answer the following questions:\n"
	)
	)

	return {
	"messages": [llm_with_tools.invoke([sys_msg] + state["messages"])],
	"input_file": state["input_file"]
	}

	# Build the graph
	builder = StateGraph(AgentState)
	builder.add_node("assistant", assistant)
	builder.add_node("tools", ToolNode(all_tools))
	builder.add_edge(START, "assistant")
	builder.add_conditional_edges("assistant", tools_condition)
	builder.add_edge("tools", "assistant")
	return builder.compile()


	if __name__ == "__main__":

	all_tools = build_tool()
	react_graph = build_langgraph("groq", all_tools=all_tools)

	print("🚀 GAIA Dataset Agent with LangChain Built-in Tools!")
	print("\n" + "="*60 + "\n")

	# Example: Multi-step problem solving
	print("Testing calculation capabilities...")
	messages = [HumanMessage(content="Calculate the square root of 169, then multiply by 15")]
	result = react_graph.invoke({"messages": messages, "input_file": None})

	for m in result['messages']:
	m.pretty_print()

	print("\n" + "="*60 + "\n")

	# Example: Knowledge retrieval
	print("📚 Testing Wikipedia search...")
	messages = [HumanMessage(content="Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?")]

	config = RunnableConfig(recursion_limit=10)
	result = react_graph.invoke({"messages": messages, "input_file": None}, config)

	for m in result['messages']:
	m.pretty_print()

	print("\n" + "="*60 + "\n")