File size: 13,401 Bytes
2f7413a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fbe1a5
2f7413a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fbe1a5
2f7413a
 
 
 
 
 
 
 
 
7fbe1a5
 
2f7413a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fbe1a5
 
2f7413a
7fbe1a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f7413a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fbe1a5
 
 
 
2f7413a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
import math
import re
import requests
import pandas as pd
import base64

from markdownify import markdownify
from requests.exceptions import RequestException
from typing import TypedDict, Annotated, Optional, Any

from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
from langchain_core.tools import tool
from langchain_ollama import ChatOllama
from langgraph.graph.message import add_messages
from langgraph.graph import START, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition
from langchain_core.runnables.config import RunnableConfig
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

# Built-in LangChain tools
from langchain_community.tools import (
    WikipediaQueryRun,
    DuckDuckGoSearchRun,
    ArxivQueryRun,
    ShellTool,
)
from langchain_community.utilities import (
    WikipediaAPIWrapper,
    DuckDuckGoSearchAPIWrapper,
    ArxivAPIWrapper,
)
from langchain_experimental.tools import PythonREPLTool


# Initialize vision_llm at module level (commented out by default)
# Uncomment and configure as needed
# vision_llm = ChatOllama(
#     model="qwen2-vl:7b",
#     base_url="http://localhost:11434"
# )
vision_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

# ============== CUSTOM TOOLS (not available in LangChain) ==============

@tool
def reverse_text(text: str) -> str:
    """Reverse the given text character by character."""
    return text[::-1]


@tool
def reverse_words(text: str) -> str:
    """Reverse the order of words in the given text."""
    return " ".join(text.split()[::-1])


@tool
def calculator(expression: str) -> str:
    """Perform mathematical calculations safely. Supports basic arithmetic operations."""
    try:
        # Safe evaluation - only allow basic math operations
        allowed_chars = set('0123456789+-*/.() ')
        if all(c in allowed_chars for c in expression):
            result = eval(expression)
            return str(result)
        else:
            return "Error: Invalid characters in expression"
    except Exception as e:
        return f"Calculation error: {str(e)}"


@tool
def advanced_math(operation: str, num1: float, num2: Optional[float] = None) -> str:
    """
    Perform advanced math operations like sqrt, log, sin, cos, tan, power.
    """
    try:
        if operation == "sqrt":
            return str(math.sqrt(num1))
        elif operation == "log":
            return str(math.log(num1))
        elif operation == "sin":
            return str(math.sin(num1))
        elif operation == "cos":
            return str(math.cos(num1))
        elif operation == "tan":
            return str(math.tan(num1))
        elif operation == "power":
            if num2 is None:
                return "power operation requires two numbers"
            return str(math.pow(num1, num2))
        else:
            return f"Unknown operation: {operation}"
    except Exception as e:
        return f"Math error: {str(e)}"


@tool
def extract_text_multimodal(img_path: str) -> str:
    """Extract text from image using multimodal LLM vision capabilities."""
    try:
        if 'vision_llm' not in globals():
            return "Error: Vision LLM not configured. Please uncomment and configure vision_llm."
        
        with open(img_path, "rb") as image_file:
            image_bytes = image_file.read()

        image_base64 = base64.b64encode(image_bytes).decode("utf-8")

        message = [
            HumanMessage(
                content=[
                    {
                        "type": "text",
                        "text": "Extract all the text from this image. Return only the extracted text, no explanations."
                    },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{image_base64}"}
                    }
                ]
            )
        ]

        response = vision_llm.invoke(message)
        return response.content.strip()
    except Exception as e:
        return f"Multimodal text extraction error: {str(e)}"


@tool
def read_excel_file(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame:
    """Read Excel file and return a pandas DataFrame."""
    try:
        if sheet_name:
            df = pd.read_excel(file_path, sheet_name=sheet_name)
        else:
            df = pd.read_excel(file_path)
            # summary = f"Shape: {df.shape}\nColumns: {list(df.columns)}\nFirst 5 rows:\n{df.head().to_string()}"
        return df
    except Exception as e:
        # Return error as a string if loading fails
        return pd.DataFrame({"error": [f"Excel reading error: {str(e)}"]})


@tool
def visit_webpage(url: str) -> str:
    """
    Visits a webpage at the given URL and returns its content as a markdown string.
    Use this to browse and extract readable content from webpages.
    """
    try:
        response = requests.get(url, timeout=20)
        response.raise_for_status()
        markdown_content = markdownify(response.text).strip()
        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
        MAX_LEN = 40000
        if len(markdown_content) > MAX_LEN:
            return (
                markdown_content[:MAX_LEN//2]
                + f"\n\n...[Content truncated to {MAX_LEN} chars]...\n\n"
                + markdown_content[-MAX_LEN//2:]
            )
        return markdown_content
    except requests.exceptions.Timeout:
        return "Timeout while trying to access the webpage."
    except RequestException as e:
        return f"Request error: {str(e)}"
    except Exception as e:
        return f"Unexpected error: {str(e)}"


def build_tool():
    """
    Initialize and return a list of built-in and custom LangChain tools.
    """
    # Initialize built-in LangChain tools
    wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(doc_content_chars_max=2000))
    duckduckgo_search = DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(max_results=15))
    arxiv_tool = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())
    shell_tool = ShellTool()
    python_repl = PythonREPLTool()

    # Combine built-in tools with custom tools
    all_tools = [
        # Built-in LangChain tools
        wikipedia_tool,
        duckduckgo_search,
        arxiv_tool,
        shell_tool,
        python_repl,

        # Custom tools for specialized tasks
        reverse_text,
        reverse_words,
        calculator,
        advanced_math,
        extract_text_multimodal,
        read_excel_file,
        visit_webpage,
    ]
    return all_tools


class AgentState(TypedDict):
    input_file: Optional[str]
    messages: Annotated[list[AnyMessage], add_messages]


def build_langgraph(provider: str, model: Optional[str] = None, temperature: float = 0.1, all_tools: Optional[list[Any]] = None) -> StateGraph:
    """Builds and returns the LangGraph agent with the given provider."""
    
    if all_tools is None:
        all_tools = []

    # Select model and provider
    if provider == "google":
        llm = ChatGoogleGenerativeAI(model=model or "gemini-2.5-flash", temperature=temperature)
    elif provider == "groq":
        llm = ChatGroq(model=model or "qwen/qwen3-32b", temperature=temperature)
    elif provider == "huggingface":
        llm = ChatHuggingFace(
            llm=HuggingFaceEndpoint(
                repo_id=model or "meta-llama/Llama-3.1-8B-Instruct",
                temperature=temperature
            )
        )
    elif provider == "ollama":
        llm = ChatOllama(model=model or "qwen3:4b", base_url="http://localhost:11434", temperature=temperature)
    else:
        raise ValueError("Unsupported provider. Choose from 'google', 'groq', 'huggingface', or 'ollama'.")
    
    llm_with_tools = llm.bind_tools(all_tools)

    def assistant(state: AgentState):
        tools_description = """
            Available tools for the tasks:

            WEB & SEARCH:
            - duckduckgo_search: Search the web for information
            - wikipedia_tool: Search Wikipedia for knowledge
            - visit_webpage: Visit a webpage and extract readable markdown content
            - arxiv_tool: Search arXiv for research papers

            CALCULATIONS:
            - calculator: Basic arithmetic operations (+, -, *, /, etc.)
            - advanced_math: Advanced math functions (sqrt, log, trig)
            - python_repl: Execute Python code for complex computations

            TEXT PROCESSING:
            - reverse_text: Reverse text character by character
            - reverse_words: Reverse word order in text

            IMAGE PROCESSING:
            - extract_text_multimodal: Extract text using AI vision

            DATA ANALYSIS:
            - read_excel_file: Read and preview Excel files

            SYSTEM:
            - shell_tool: Execute shell commands (use carefully)
            """
        
        file = state["input_file"]
        sys_msg = SystemMessage(
            content=(
                "You are an intelligent AI agent designed to solve complex problems using the tools provided.\n\n"
                "=== Available Tools ===\n"
                f"{tools_description}\n\n"
                "=== Optional Files ===\n"
                f"Currently loaded file: {file}\n\n"
                "=== Problem-Solving Process ===\n"
                "Follow these steps carefully when answering a question:\n"
                "1. Break the problem into smaller, manageable parts.\n"
                "2. Choose the most suitable tool for each part.\n"
                "3. Use multiple tools in sequence if needed.\n"
                "4. Verify your results and explain your reasoning clearly.\n\n"
                "Be precise and clear at every step. After your reasoning, provide ONLY the final answer.\n\n"
                "=== Final Answer Format Rules ===\n"
                "- For numbers: Use only digits (no commas or units) unless units are explicitly requested.\n"
                "- For strings: Do not use articles (a, an, the) or abbreviations. Spell out all digits.\n"
                "- For lists: Use commas to separate items. Apply the above number/string rules to each item.\n"
                "- If the answer is unknown: Respond exactly with \"do not know\"\n\n"
                "Example Question 1:\n\n"
                "If Eliud Kipchoge could maintain his marathon pace indefinitely, how many thousand hours would it take him to run from Earth to the Moon at its closest approach? Use the minimum perigee distance from Wikipedia and round to the nearest 1000 hours. Do not use commas.\n\n"
                "**Example Answer 1:**\n"
                "17\n\n"
                "**Example Reasoning Steps 1:**\n"
                "1. Found Eliud Kipchoge's marathon pace: 4 minutes 37 seconds per mile.\n"
                "2. Converted pace into hours per mile.\n"
                "3. Found Moon's closest distance: 225623 miles.\n"
                "4. Multiplied pace by distance to get total hours and rounded to nearest 1000.\n\n"
                "Example Question 2:\n\n"
                "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.\n\n"
                "**Example Answer 2:**\n"
                "Yoshida, Uehara\n\n"
                "**Example Reasoning Steps 2:**\n"
                "1. Looked up Taishō Tamai on Wikipedia.\n"
                "2. Found the pitcher with number 18 is Kōsei Yoshida.\n"
                "3. Found the pitcher with number 20 is Kenta Uehara.\n\n"
                "Now answer the following questions:\n"
            )
        )
        
        return {
            "messages": [llm_with_tools.invoke([sys_msg] + state["messages"])],
            "input_file": state["input_file"]
        }

    # Build the graph
    builder = StateGraph(AgentState)
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(all_tools))
    builder.add_edge(START, "assistant")
    builder.add_conditional_edges("assistant", tools_condition)
    builder.add_edge("tools", "assistant")
    return builder.compile()


if __name__ == "__main__":

    all_tools = build_tool()
    react_graph = build_langgraph("groq", all_tools=all_tools)

    print("🚀 GAIA Dataset Agent with LangChain Built-in Tools!")
    print("\n" + "="*60 + "\n")

    # Example: Multi-step problem solving
    print("Testing calculation capabilities...")
    messages = [HumanMessage(content="Calculate the square root of 169, then multiply by 15")]
    result = react_graph.invoke({"messages": messages, "input_file": None})
    
    for m in result['messages']:
        m.pretty_print()

    print("\n" + "="*60 + "\n")

    # Example: Knowledge retrieval
    print("📚 Testing Wikipedia search...")
    messages = [HumanMessage(content="Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?")]

    config = RunnableConfig(recursion_limit=10)
    result = react_graph.invoke({"messages": messages, "input_file": None}, config)
    
    for m in result['messages']:
        m.pretty_print()

    print("\n" + "="*60 + "\n")