Spaces:
Runtime error
Runtime error
File size: 13,401 Bytes
2f7413a 7fbe1a5 2f7413a 7fbe1a5 2f7413a 7fbe1a5 2f7413a 7fbe1a5 2f7413a 7fbe1a5 2f7413a 7fbe1a5 2f7413a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 |
import math
import re
import requests
import pandas as pd
import base64
from markdownify import markdownify
from requests.exceptions import RequestException
from typing import TypedDict, Annotated, Optional, Any
from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
from langchain_core.tools import tool
from langchain_ollama import ChatOllama
from langgraph.graph.message import add_messages
from langgraph.graph import START, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.runnables.config import RunnableConfig
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
# Built-in LangChain tools
from langchain_community.tools import (
WikipediaQueryRun,
DuckDuckGoSearchRun,
ArxivQueryRun,
ShellTool,
)
from langchain_community.utilities import (
WikipediaAPIWrapper,
DuckDuckGoSearchAPIWrapper,
ArxivAPIWrapper,
)
from langchain_experimental.tools import PythonREPLTool
# Initialize vision_llm at module level (commented out by default)
# Uncomment and configure as needed
# vision_llm = ChatOllama(
# model="qwen2-vl:7b",
# base_url="http://localhost:11434"
# )
vision_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
# ============== CUSTOM TOOLS (not available in LangChain) ==============
@tool
def reverse_text(text: str) -> str:
"""Reverse the given text character by character."""
return text[::-1]
@tool
def reverse_words(text: str) -> str:
"""Reverse the order of words in the given text."""
return " ".join(text.split()[::-1])
@tool
def calculator(expression: str) -> str:
"""Perform mathematical calculations safely. Supports basic arithmetic operations."""
try:
# Safe evaluation - only allow basic math operations
allowed_chars = set('0123456789+-*/.() ')
if all(c in allowed_chars for c in expression):
result = eval(expression)
return str(result)
else:
return "Error: Invalid characters in expression"
except Exception as e:
return f"Calculation error: {str(e)}"
@tool
def advanced_math(operation: str, num1: float, num2: Optional[float] = None) -> str:
"""
Perform advanced math operations like sqrt, log, sin, cos, tan, power.
"""
try:
if operation == "sqrt":
return str(math.sqrt(num1))
elif operation == "log":
return str(math.log(num1))
elif operation == "sin":
return str(math.sin(num1))
elif operation == "cos":
return str(math.cos(num1))
elif operation == "tan":
return str(math.tan(num1))
elif operation == "power":
if num2 is None:
return "power operation requires two numbers"
return str(math.pow(num1, num2))
else:
return f"Unknown operation: {operation}"
except Exception as e:
return f"Math error: {str(e)}"
@tool
def extract_text_multimodal(img_path: str) -> str:
"""Extract text from image using multimodal LLM vision capabilities."""
try:
if 'vision_llm' not in globals():
return "Error: Vision LLM not configured. Please uncomment and configure vision_llm."
with open(img_path, "rb") as image_file:
image_bytes = image_file.read()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
message = [
HumanMessage(
content=[
{
"type": "text",
"text": "Extract all the text from this image. Return only the extracted text, no explanations."
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{image_base64}"}
}
]
)
]
response = vision_llm.invoke(message)
return response.content.strip()
except Exception as e:
return f"Multimodal text extraction error: {str(e)}"
@tool
def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame:
"""Read Excel file and return a pandas DataFrame."""
try:
if sheet_name:
df = pd.read_excel(file_path, sheet_name=sheet_name)
else:
df = pd.read_excel(file_path)
# summary = f"Shape: {df.shape}\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}"
return df
except Exception as e:
# Return error as a string if loading fails
return pd.DataFrame({"error": [f"Excel reading error: {str(e)}"]})
@tool
def visit_webpage(url: str) -> str:
"""
Visits a webpage at the given URL and returns its content as a markdown string.
Use this to browse and extract readable content from webpages.
"""
try:
response = requests.get(url, timeout=20)
response.raise_for_status()
markdown_content = markdownify(response.text).strip()
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
MAX_LEN = 40000
if len(markdown_content) > MAX_LEN:
return (
markdown_content[:MAX_LEN//2]
+ f"\n\n...[Content truncated to {MAX_LEN} chars]...\n\n"
+ markdown_content[-MAX_LEN//2:]
)
return markdown_content
except requests.exceptions.Timeout:
return "Timeout while trying to access the webpage."
except RequestException as e:
return f"Request error: {str(e)}"
except Exception as e:
return f"Unexpected error: {str(e)}"
def build_tool():
"""
Initialize and return a list of built-in and custom LangChain tools.
"""
# Initialize built-in LangChain tools
wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(doc_content_chars_max=2000))
duckduckgo_search = DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(max_results=15))
arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
shell_tool = ShellTool()
python_repl = PythonREPLTool()
# Combine built-in tools with custom tools
all_tools = [
# Built-in LangChain tools
wikipedia_tool,
duckduckgo_search,
arxiv_tool,
shell_tool,
python_repl,
# Custom tools for specialized tasks
reverse_text,
reverse_words,
calculator,
advanced_math,
extract_text_multimodal,
read_excel_file,
visit_webpage,
]
return all_tools
class AgentState(TypedDict):
input_file: Optional[str]
messages: Annotated[list[AnyMessage], add_messages]
def build_langgraph(provider: str, model: Optional[str] = None, temperature: float = 0.1, all_tools: Optional[list[Any]] = None) -> StateGraph:
"""Builds and returns the LangGraph agent with the given provider."""
if all_tools is None:
all_tools = []
# Select model and provider
if provider == "google":
llm = ChatGoogleGenerativeAI(model=model or "gemini-2.5-flash", temperature=temperature)
elif provider == "groq":
llm = ChatGroq(model=model or "qwen/qwen3-32b", temperature=temperature)
elif provider == "huggingface":
llm = ChatHuggingFace(
llm=HuggingFaceEndpoint(
repo_id=model or "meta-llama/Llama-3.1-8B-Instruct",
temperature=temperature
)
)
elif provider == "ollama":
llm = ChatOllama(model=model or "qwen3:4b", base_url="http://localhost:11434", temperature=temperature)
else:
raise ValueError("Unsupported provider. Choose from 'google', 'groq', 'huggingface', or 'ollama'.")
llm_with_tools = llm.bind_tools(all_tools)
def assistant(state: AgentState):
tools_description = """
Available tools for the tasks:
WEB & SEARCH:
- duckduckgo_search: Search the web for information
- wikipedia_tool: Search Wikipedia for knowledge
- visit_webpage: Visit a webpage and extract readable markdown content
- arxiv_tool: Search arXiv for research papers
CALCULATIONS:
- calculator: Basic arithmetic operations (+, -, *, /, etc.)
- advanced_math: Advanced math functions (sqrt, log, trig)
- python_repl: Execute Python code for complex computations
TEXT PROCESSING:
- reverse_text: Reverse text character by character
- reverse_words: Reverse word order in text
IMAGE PROCESSING:
- extract_text_multimodal: Extract text using AI vision
DATA ANALYSIS:
- read_excel_file: Read and preview Excel files
SYSTEM:
- shell_tool: Execute shell commands (use carefully)
"""
file = state["input_file"]
sys_msg = SystemMessage(
content=(
"You are an intelligent AI agent designed to solve complex problems using the tools provided.\n\n"
"=== Available Tools ===\n"
f"{tools_description}\n\n"
"=== Optional Files ===\n"
f"Currently loaded file: {file}\n\n"
"=== Problem-Solving Process ===\n"
"Follow these steps carefully when answering a question:\n"
"1. Break the problem into smaller, manageable parts.\n"
"2. Choose the most suitable tool for each part.\n"
"3. Use multiple tools in sequence if needed.\n"
"4. Verify your results and explain your reasoning clearly.\n\n"
"Be precise and clear at every step. After your reasoning, provide ONLY the final answer.\n\n"
"=== Final Answer Format Rules ===\n"
"- For numbers: Use only digits (no commas or units) unless units are explicitly requested.\n"
"- For strings: Do not use articles (a, an, the) or abbreviations. Spell out all digits.\n"
"- For lists: Use commas to separate items. Apply the above number/string rules to each item.\n"
"- If the answer is unknown: Respond exactly with \"do not know\"\n\n"
"Example Question 1:\n\n"
"If Eliud Kipchoge could maintain his marathon pace indefinitely, how many thousand hours would it take him to run from Earth to the Moon at its closest approach? Use the minimum perigee distance from Wikipedia and round to the nearest 1000 hours. Do not use commas.\n\n"
"**Example Answer 1:**\n"
"17\n\n"
"**Example Reasoning Steps 1:**\n"
"1. Found Eliud Kipchoge's marathon pace: 4 minutes 37 seconds per mile.\n"
"2. Converted pace into hours per mile.\n"
"3. Found Moon's closest distance: 225623 miles.\n"
"4. Multiplied pace by distance to get total hours and rounded to nearest 1000.\n\n"
"Example Question 2:\n\n"
"Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\n\n"
"**Example Answer 2:**\n"
"Yoshida, Uehara\n\n"
"**Example Reasoning Steps 2:**\n"
"1. Looked up Taishō Tamai on Wikipedia.\n"
"2. Found the pitcher with number 18 is Kōsei Yoshida.\n"
"3. Found the pitcher with number 20 is Kenta Uehara.\n\n"
"Now answer the following questions:\n"
)
)
return {
"messages": [llm_with_tools.invoke([sys_msg] + state["messages"])],
"input_file": state["input_file"]
}
# Build the graph
builder = StateGraph(AgentState)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(all_tools))
builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant", tools_condition)
builder.add_edge("tools", "assistant")
return builder.compile()
if __name__ == "__main__":
all_tools = build_tool()
react_graph = build_langgraph("groq", all_tools=all_tools)
print("🚀 GAIA Dataset Agent with LangChain Built-in Tools!")
print("\n" + "="*60 + "\n")
# Example: Multi-step problem solving
print("Testing calculation capabilities...")
messages = [HumanMessage(content="Calculate the square root of 169, then multiply by 15")]
result = react_graph.invoke({"messages": messages, "input_file": None})
for m in result['messages']:
m.pretty_print()
print("\n" + "="*60 + "\n")
# Example: Knowledge retrieval
print("📚 Testing Wikipedia search...")
messages = [HumanMessage(content="Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?")]
config = RunnableConfig(recursion_limit=10)
result = react_graph.invoke({"messages": messages, "input_file": None}, config)
for m in result['messages']:
m.pretty_print()
print("\n" + "="*60 + "\n") |