gokul-pv's picture
prompt update and cleanup
7fbe1a5
import math
import re
import requests
import pandas as pd
import base64
from markdownify import markdownify
from requests.exceptions import RequestException
from typing import TypedDict, Annotated, Optional, Any
from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
from langchain_core.tools import tool
from langchain_ollama import ChatOllama
from langgraph.graph.message import add_messages
from langgraph.graph import START, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.runnables.config import RunnableConfig
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
# Built-in LangChain tools
from langchain_community.tools import (
WikipediaQueryRun,
DuckDuckGoSearchRun,
ArxivQueryRun,
ShellTool,
)
from langchain_community.utilities import (
WikipediaAPIWrapper,
DuckDuckGoSearchAPIWrapper,
ArxivAPIWrapper,
)
from langchain_experimental.tools import PythonREPLTool
# Initialize vision_llm at module level (commented out by default)
# Uncomment and configure as needed
# vision_llm = ChatOllama(
# model="qwen2-vl:7b",
# base_url="http://localhost:11434"
# )
vision_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
# ============== CUSTOM TOOLS (not available in LangChain) ==============
@tool
def reverse_text(text: str) -> str:
"""Reverse the given text character by character."""
return text[::-1]
@tool
def reverse_words(text: str) -> str:
"""Reverse the order of words in the given text."""
return " ".join(text.split()[::-1])
@tool
def calculator(expression: str) -> str:
"""Perform mathematical calculations safely. Supports basic arithmetic operations."""
try:
# Safe evaluation - only allow basic math operations
allowed_chars = set('0123456789+-*/.() ')
if all(c in allowed_chars for c in expression):
result = eval(expression)
return str(result)
else:
return "Error: Invalid characters in expression"
except Exception as e:
return f"Calculation error: {str(e)}"
@tool
def advanced_math(operation: str, num1: float, num2: Optional[float] = None) -> str:
"""
Perform advanced math operations like sqrt, log, sin, cos, tan, power.
"""
try:
if operation == "sqrt":
return str(math.sqrt(num1))
elif operation == "log":
return str(math.log(num1))
elif operation == "sin":
return str(math.sin(num1))
elif operation == "cos":
return str(math.cos(num1))
elif operation == "tan":
return str(math.tan(num1))
elif operation == "power":
if num2 is None:
return "power operation requires two numbers"
return str(math.pow(num1, num2))
else:
return f"Unknown operation: {operation}"
except Exception as e:
return f"Math error: {str(e)}"
@tool
def extract_text_multimodal(img_path: str) -> str:
"""Extract text from image using multimodal LLM vision capabilities."""
try:
if 'vision_llm' not in globals():
return "Error: Vision LLM not configured. Please uncomment and configure vision_llm."
with open(img_path, "rb") as image_file:
image_bytes = image_file.read()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
message = [
HumanMessage(
content=[
{
"type": "text",
"text": "Extract all the text from this image. Return only the extracted text, no explanations."
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{image_base64}"}
}
]
)
]
response = vision_llm.invoke(message)
return response.content.strip()
except Exception as e:
return f"Multimodal text extraction error: {str(e)}"
@tool
def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame:
"""Read Excel file and return a pandas DataFrame."""
try:
if sheet_name:
df = pd.read_excel(file_path, sheet_name=sheet_name)
else:
df = pd.read_excel(file_path)
# summary = f"Shape: {df.shape}\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}"
return df
except Exception as e:
# Return error as a string if loading fails
return pd.DataFrame({"error": [f"Excel reading error: {str(e)}"]})
@tool
def visit_webpage(url: str) -> str:
"""
Visits a webpage at the given URL and returns its content as a markdown string.
Use this to browse and extract readable content from webpages.
"""
try:
response = requests.get(url, timeout=20)
response.raise_for_status()
markdown_content = markdownify(response.text).strip()
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
MAX_LEN = 40000
if len(markdown_content) > MAX_LEN:
return (
markdown_content[:MAX_LEN//2]
+ f"\n\n...[Content truncated to {MAX_LEN} chars]...\n\n"
+ markdown_content[-MAX_LEN//2:]
)
return markdown_content
except requests.exceptions.Timeout:
return "Timeout while trying to access the webpage."
except RequestException as e:
return f"Request error: {str(e)}"
except Exception as e:
return f"Unexpected error: {str(e)}"
def build_tool():
"""
Initialize and return a list of built-in and custom LangChain tools.
"""
# Initialize built-in LangChain tools
wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(doc_content_chars_max=2000))
duckduckgo_search = DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(max_results=15))
arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
shell_tool = ShellTool()
python_repl = PythonREPLTool()
# Combine built-in tools with custom tools
all_tools = [
# Built-in LangChain tools
wikipedia_tool,
duckduckgo_search,
arxiv_tool,
shell_tool,
python_repl,
# Custom tools for specialized tasks
reverse_text,
reverse_words,
calculator,
advanced_math,
extract_text_multimodal,
read_excel_file,
visit_webpage,
]
return all_tools
class AgentState(TypedDict):
input_file: Optional[str]
messages: Annotated[list[AnyMessage], add_messages]
def build_langgraph(provider: str, model: Optional[str] = None, temperature: float = 0.1, all_tools: Optional[list[Any]] = None) -> StateGraph:
"""Builds and returns the LangGraph agent with the given provider."""
if all_tools is None:
all_tools = []
# Select model and provider
if provider == "google":
llm = ChatGoogleGenerativeAI(model=model or "gemini-2.5-flash", temperature=temperature)
elif provider == "groq":
llm = ChatGroq(model=model or "qwen/qwen3-32b", temperature=temperature)
elif provider == "huggingface":
llm = ChatHuggingFace(
llm=HuggingFaceEndpoint(
repo_id=model or "meta-llama/Llama-3.1-8B-Instruct",
temperature=temperature
)
)
elif provider == "ollama":
llm = ChatOllama(model=model or "qwen3:4b", base_url="http://localhost:11434", temperature=temperature)
else:
raise ValueError("Unsupported provider. Choose from 'google', 'groq', 'huggingface', or 'ollama'.")
llm_with_tools = llm.bind_tools(all_tools)
def assistant(state: AgentState):
tools_description = """
Available tools for the tasks:
WEB & SEARCH:
- duckduckgo_search: Search the web for information
- wikipedia_tool: Search Wikipedia for knowledge
- visit_webpage: Visit a webpage and extract readable markdown content
- arxiv_tool: Search arXiv for research papers
CALCULATIONS:
- calculator: Basic arithmetic operations (+, -, *, /, etc.)
- advanced_math: Advanced math functions (sqrt, log, trig)
- python_repl: Execute Python code for complex computations
TEXT PROCESSING:
- reverse_text: Reverse text character by character
- reverse_words: Reverse word order in text
IMAGE PROCESSING:
- extract_text_multimodal: Extract text using AI vision
DATA ANALYSIS:
- read_excel_file: Read and preview Excel files
SYSTEM:
- shell_tool: Execute shell commands (use carefully)
"""
file = state["input_file"]
sys_msg = SystemMessage(
content=(
"You are an intelligent AI agent designed to solve complex problems using the tools provided.\n\n"
"=== Available Tools ===\n"
f"{tools_description}\n\n"
"=== Optional Files ===\n"
f"Currently loaded file: {file}\n\n"
"=== Problem-Solving Process ===\n"
"Follow these steps carefully when answering a question:\n"
"1. Break the problem into smaller, manageable parts.\n"
"2. Choose the most suitable tool for each part.\n"
"3. Use multiple tools in sequence if needed.\n"
"4. Verify your results and explain your reasoning clearly.\n\n"
"Be precise and clear at every step. After your reasoning, provide ONLY the final answer.\n\n"
"=== Final Answer Format Rules ===\n"
"- For numbers: Use only digits (no commas or units) unless units are explicitly requested.\n"
"- For strings: Do not use articles (a, an, the) or abbreviations. Spell out all digits.\n"
"- For lists: Use commas to separate items. Apply the above number/string rules to each item.\n"
"- If the answer is unknown: Respond exactly with \"do not know\"\n\n"
"Example Question 1:\n\n"
"If Eliud Kipchoge could maintain his marathon pace indefinitely, how many thousand hours would it take him to run from Earth to the Moon at its closest approach? Use the minimum perigee distance from Wikipedia and round to the nearest 1000 hours. Do not use commas.\n\n"
"**Example Answer 1:**\n"
"17\n\n"
"**Example Reasoning Steps 1:**\n"
"1. Found Eliud Kipchoge's marathon pace: 4 minutes 37 seconds per mile.\n"
"2. Converted pace into hours per mile.\n"
"3. Found Moon's closest distance: 225623 miles.\n"
"4. Multiplied pace by distance to get total hours and rounded to nearest 1000.\n\n"
"Example Question 2:\n\n"
"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\n\n"
"**Example Answer 2:**\n"
"Yoshida, Uehara\n\n"
"**Example Reasoning Steps 2:**\n"
"1. Looked up Taishō Tamai on Wikipedia.\n"
"2. Found the pitcher with number 18 is Kōsei Yoshida.\n"
"3. Found the pitcher with number 20 is Kenta Uehara.\n\n"
"Now answer the following questions:\n"
)
)
return {
"messages": [llm_with_tools.invoke([sys_msg] + state["messages"])],
"input_file": state["input_file"]
}
# Build the graph
builder = StateGraph(AgentState)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(all_tools))
builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant", tools_condition)
builder.add_edge("tools", "assistant")
return builder.compile()
if __name__ == "__main__":
all_tools = build_tool()
react_graph = build_langgraph("groq", all_tools=all_tools)
print("🚀 GAIA Dataset Agent with LangChain Built-in Tools!")
print("\n" + "="*60 + "\n")
# Example: Multi-step problem solving
print("Testing calculation capabilities...")
messages = [HumanMessage(content="Calculate the square root of 169, then multiply by 15")]
result = react_graph.invoke({"messages": messages, "input_file": None})
for m in result['messages']:
m.pretty_print()
print("\n" + "="*60 + "\n")
# Example: Knowledge retrieval
print("📚 Testing Wikipedia search...")
messages = [HumanMessage(content="Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?")]
config = RunnableConfig(recursion_limit=10)
result = react_graph.invoke({"messages": messages, "input_file": None}, config)
for m in result['messages']:
m.pretty_print()
print("\n" + "="*60 + "\n")