File size: 13,773 Bytes
13b79a2
f0250b1
13b79a2
 
7a7bd49
 
 
f0250b1
04968b6
7a7bd49
13b79a2
 
04968b6
13b79a2
 
7a7bd49
f0250b1
 
13b79a2
 
64ae9fa
7a7bd49
04968b6
64ae9fa
 
13b79a2
f0250b1
04968b6
 
13b79a2
f0250b1
04968b6
f0250b1
 
04968b6
f0250b1
 
04968b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0250b1
04968b6
 
 
 
f0250b1
 
13b79a2
f0250b1
 
 
13b79a2
f0250b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13b79a2
f0250b1
 
 
13b79a2
f0250b1
 
 
 
 
 
 
7a7bd49
 
 
 
f0250b1
 
 
 
 
 
 
13b79a2
f0250b1
 
 
13b79a2
f0250b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13b79a2
f0250b1
 
13b79a2
04968b6
f0250b1
13b79a2
 
 
04968b6
 
 
 
 
 
 
 
f0250b1
13b79a2
 
 
04968b6
 
 
 
 
 
 
 
 
 
 
7a7bd49
 
f0250b1
7a7bd49
f0250b1
a43ac31
7a7bd49
f0250b1
13b79a2
7a7bd49
 
13b79a2
 
 
 
f0250b1
7a7bd49
13b79a2
 
 
04968b6
13b79a2
f0250b1
13b79a2
 
f0250b1
 
 
 
13b79a2
 
f0250b1
 
7a7bd49
 
 
 
 
 
 
 
04968b6
 
 
7a7bd49
 
 
 
 
13b79a2
f0250b1
 
13b79a2
 
 
f0250b1
13b79a2
 
 
 
f0250b1
13b79a2
 
 
f0250b1
 
 
 
 
13b79a2
 
 
 
 
 
 
7a7bd49
 
 
 
 
 
 
 
 
f0250b1
 
 
13b79a2
f0250b1
 
 
04968b6
 
 
 
 
 
 
 
 
 
 
f0250b1
 
 
04968b6
 
 
 
 
 
 
 
 
 
f0250b1
 
 
04968b6
 
 
 
 
 
 
 
 
f0250b1
13b79a2
f0250b1
04968b6
 
 
 
 
 
 
 
 
 
 
f0250b1
13b79a2
f0250b1
 
 
 
 
13b79a2
 
 
 
f0250b1
 
 
 
 
 
 
 
 
 
 
13b79a2
 
f0250b1
 
04968b6
 
13b79a2
 
04968b6
 
13b79a2
04968b6
13b79a2
 
 
 
04968b6
13b79a2
 
04968b6
f0250b1
13b79a2
 
f0250b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
import os
from typing import List, Dict, Any, Optional
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

from langchain.agents import AgentType, initialize_agent, Tool
from langchain.memory import ConversationBufferWindowMemory, ConversationSummaryBufferMemory
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_groq import ChatGroq
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
from langchain_core.tools import tool
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Load environment variables
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
HUGGINGFACE_API_TOKEN = os.getenv('HUGGINGFACE_API_TOKEN')
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')


@tool
def calculator_tool(expression: str) -> str:
    """Perform mathematical calculations and evaluate expressions
    
    Args:
        expression: A mathematical expression to evaluate (e.g., "2+2", "25*34", "sqrt(16)", "sin(0.5)")
    
    Returns:
        Result of the mathematical expression
    """
    try:
        import math
        import re
        
        # Clean the expression and make it safe
        expression = expression.strip()
        
        # Replace common mathematical functions
        expression = expression.replace('sqrt', 'math.sqrt')
        expression = expression.replace('sin', 'math.sin')
        expression = expression.replace('cos', 'math.cos')
        expression = expression.replace('tan', 'math.tan')
        expression = expression.replace('log', 'math.log')
        expression = expression.replace('ln', 'math.log')
        expression = expression.replace('log10', 'math.log10')
        expression = expression.replace('pi', 'math.pi')
        expression = expression.replace('e', 'math.e')
        expression = expression.replace('^', '**')  # Python uses ** for power
        expression = expression.replace('pow', '**')
        
        # More comprehensive regex for mathematical expressions
        safe_pattern = r'^[0-9+\-*/.() mathsqrtsincolgtanpienpow]+$'
        if re.match(safe_pattern, expression.replace(' ', '')):
            # Create a safe namespace for eval
            safe_dict = {
                "__builtins__": {},
                "math": math,
                "abs": abs,
                "round": round,
                "min": min,
                "max": max
            }
            result = eval(expression, safe_dict)
            return str(result)
        else:
            return "Error: Invalid characters in expression. Use only numbers and basic math operations."
            
    except ZeroDivisionError:
        return "Error: Cannot divide by zero"
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def wikipedia_search_tool(query: str) -> str:
    """Search Wikipedia for information on any topic
    
    Args:
        query: The search query for Wikipedia
    
    Returns:
        Formatted Wikipedia search results
    """
    try:
        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
        formatted_results = "\n\n---\n\n".join([
            f'Source: {doc.metadata["source"]}\nPage: {doc.metadata.get("page", "")}\n\nContent:\n{doc.page_content[:2000]}...'
            for doc in search_docs
        ])
        return formatted_results
    except Exception as e:
        return f"Error searching Wikipedia: {str(e)}"

@tool
def web_search_tool(query: str) -> str:
    """Search the web for current information using Tavily
    
    Args:
        query: The search query for web search
    
    Returns:
        Formatted web search results
    """
    try:
        if not TAVILY_API_KEY:
            return "Error: TAVILY_API_KEY not found in environment variables"
        
        search_results = TavilySearchResults(max_results=3, api_key=TAVILY_API_KEY).invoke(query)
        formatted_results = "\n\n---\n\n".join([
            f'Source: {result.get("url", "")}\n\nContent:\n{result.get("content", "")}'
            for result in search_results
        ])
        return formatted_results
    except Exception as e:
        return f"Error searching web: {str(e)}"

@tool
def arxiv_search_tool(query: str) -> str:
    """Search ArXiv for academic papers and research
    
    Args:
        query: The search query for ArXiv
    
    Returns:
        Formatted ArXiv search results
    """
    try:
        search_docs = ArxivLoader(query=query, load_max_docs=3).load()
        formatted_results = "\n\n---\n\n".join([
            f'Source: {doc.metadata["source"]}\nTitle: {doc.metadata.get("Title", "")}\n\nContent:\n{doc.page_content[:1500]}...'
            for doc in search_docs
        ])
        return formatted_results
    except Exception as e:
        return f"Error searching ArXiv: {str(e)}"

class LangChainAgent:
    """Multi-purpose LangChain agent with various capabilities."""
    
    def __init__(self, provider: str = "groq"):
        """Initialize the LangChain agent with specified LLM provider."""
        self.provider = provider
        self.llm = self._get_llm(provider)
        self.tools = self._initialize_tools()
        # Use ConversationSummaryBufferMemory for better long-term memory management
        self.memory = ConversationSummaryBufferMemory(
            llm=self.llm,
            memory_key="chat_history",
            return_messages=True,
            max_token_limit=2000,  # Limit memory to prevent token overflow
            moving_summary_buffer="The human and AI are having a conversation about various topics."
        )
        self.agent = self._create_agent()
        
    def _get_llm(self, provider: str):
        """Get the specified LLM."""
        if provider == "groq":
            if not GROQ_API_KEY:
                raise ValueError("GROQ_API_KEY not found in environment variables")
            return ChatGroq(
                model="llama-3.3-70b-versatile",  # Latest Llama model available on Groq
                temperature=0.1,
                max_tokens=8192,  # Increased token limit for better responses
                api_key=GROQ_API_KEY,
                streaming=False
            )
        elif provider == "google":
            if not GOOGLE_API_KEY:
                raise ValueError("GOOGLE_API_KEY not found in environment variables")
            return ChatGoogleGenerativeAI(
                model="gemini-1.5-flash", 
                temperature=0,
                max_tokens=2048,
                google_api_key=GOOGLE_API_KEY
            )
        elif provider == "huggingface":
            if not HUGGINGFACE_API_TOKEN:
                raise ValueError("HUGGINGFACE_API_TOKEN not found in environment variables")
            return ChatHuggingFace(
                llm=HuggingFaceEndpoint(
                    repo_id="microsoft/DialoGPT-medium",
                    temperature=0,
                    max_length=2048,
                    huggingfacehub_api_token=HUGGINGFACE_API_TOKEN
                ),
            )
        else:
            raise ValueError("Invalid provider. Choose 'groq', 'google' or 'huggingface'.")
    
    def _initialize_tools(self) -> List[Tool]:
        """Initialize all available tools."""
        return [
            calculator_tool,
            wikipedia_search_tool,
            web_search_tool,
            arxiv_search_tool,
        ]
    
    def _create_agent(self):
        """Create the LangChain agent with tools."""
        try:
            return initialize_agent(
                tools=self.tools,
                llm=self.llm,
                agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                memory=self.memory,
                verbose=True,
                handle_parsing_errors=True,
                max_iterations=4,
                early_stopping_method="generate",
                return_intermediate_steps=False
            )
        except Exception as e:
            print(f"Error creating agent: {e}")
            # Return a simple agent without tools as fallback
            return None
    
    def _determine_approach(self, question: str) -> str:
        """Determine the best approach for answering the question."""
        question_lower = question.lower()
        
        # Check for mathematical operations
        math_keywords = ['calculate', 'compute', 'add', 'subtract', 'multiply', 'divide', 'math', 'equation', '+', '-', '*', '/', '%']
        if any(keyword in question_lower for keyword in math_keywords):
            return 'calculation'
        
        # Check for research-related queries
        research_keywords = ['search', 'find', 'research', 'information', 'what is', 'who is', 'when', 'where', 'how', 'why']
        if any(keyword in question_lower for keyword in research_keywords):
            return 'research'
        
        # Check for academic/scientific queries
        academic_keywords = ['paper', 'study', 'research', 'academic', 'scientific', 'arxiv', 'journal']
        if any(keyword in question_lower for keyword in academic_keywords):
            return 'academic'
        
        return 'general'
    
    def __call__(self, question: str) -> str:
        """Process a question and return an answer."""
        try:
            print(f"Processing question: {question[:100]}...")
            
            # If agent initialization failed, use direct LLM
            if self.agent is None:
                print("Agent not available, using direct LLM response")
                try:
                    response = self.llm.invoke([HumanMessage(content=question)])
                    return response.content
                except Exception as llm_error:
                    return f"Error: Unable to process question. {str(llm_error)}"
            
            # Determine the best approach for this question
            approach = self._determine_approach(question)
            print(f"Selected approach: {approach}")
            
            # Create a comprehensive prompt based on the approach
            if approach == 'calculation':
                enhanced_question = f"""
You are a mathematical assistant. Solve this problem step by step:

{question}

IMPORTANT: Use the calculator_tool for ALL mathematical calculations, even simple ones.
Examples:
- For "25 * 34", use: calculator_tool("25 * 34")
- For "sqrt(16)", use: calculator_tool("sqrt(16)")
- For "2 + 2", use: calculator_tool("2 + 2")

Always show your work and use the tools provided.
                """
            elif approach == 'research':
                enhanced_question = f"""
You are a research assistant. Provide comprehensive information about:

{question}

IMPORTANT: Use the appropriate search tools to gather information:
- wikipedia_search_tool("your search query") for general knowledge
- web_search_tool("your search query") for current information
- arxiv_search_tool("your search query") for academic papers

Always cite your sources and provide detailed explanations.
                """
            elif approach == 'academic':
                enhanced_question = f"""
You are an academic research assistant. Find scholarly information about:

{question}

IMPORTANT: Use research tools to find information:
- arxiv_search_tool("your search query") for academic papers
- wikipedia_search_tool("your search query") for background information

Provide citations and summarize key findings.
                """
            else:
                enhanced_question = f"""
You are a helpful assistant. Answer this question comprehensively:

{question}

IMPORTANT: Use the appropriate tools as needed:
- calculator_tool("expression") for mathematical calculations
- wikipedia_search_tool("query") for general information
- web_search_tool("query") for current information
- arxiv_search_tool("query") for academic research

Always use tools when they can help provide better answers.
                """
            
            # Use the agent to process the question
            result = self.agent.run(enhanced_question)
            
            print(f"Generated answer: {str(result)[:200]}...")
            return str(result)
            
        except Exception as e:
            error_msg = f"Error processing question: {str(e)}"
            print(error_msg)
            # Provide a fallback response
            try:
                # Try a simple LLM response without tools
                fallback_result = self.llm.invoke([HumanMessage(content=question)])
                return fallback_result.content
            except Exception as fallback_error:
                return f"Error: Unable to process question. {str(e)}"

    def reset_memory(self):
        """Reset the conversation memory."""
        self.memory.clear()

# Test function
def test_langchain_agent():
    """Test the LangChain agent with sample questions."""
    print("Testing LangChain Agent with Groq Llama...")
    agent = LangChainAgent(provider="groq")
    
    test_questions = [
        "What is 25 * 34 + 100?",
        "Who was Albert Einstein and what were his major contributions?",
        "Search for recent developments in artificial intelligence",
        "What is quantum computing?"
    ]
    
    for question in test_questions:
        print(f"\nQuestion: {question}")
        print("-" * 50)
        answer = agent(question)
        print(f"Answer: {answer}")
        print("=" * 80)
        agent.reset_memory()  # Reset memory between questions for testing

if __name__ == "__main__":
    test_langchain_agent()