File size: 6,095 Bytes
5de5d19
 
2f65b93
 
 
5de5d19
 
 
2f65b93
 
5de5d19
 
 
 
 
 
2f65b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5de5d19
2f65b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5de5d19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f65b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5de5d19
2f65b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
from dotenv import load_dotenv
from langgraph.graph import START, StateGraph, MessagesState
from langgraph.prebuilt import tools_condition
from langgraph.prebuilt import ToolNode
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.tools import tool
import requests
from bs4 import BeautifulSoup
import urllib.parse

load_dotenv()

@tool
def wiki_search(query: str) -> str:
    """Search Wikipedia for information.
    
    Args:
        query: The search query."""
    try:
        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
        formatted_search_docs = "\n\n---\n\n".join(
            [
                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
                for doc in search_docs
            ])
        return {"wiki_results": formatted_search_docs}
    except Exception as e:
        return f"Error searching Wikipedia: {str(e)}"

@tool
def web_search(query: str) -> str:
    """Search the web using DuckDuckGo.
    
    Args:
        query: The search query."""
    try:
        encoded_query = urllib.parse.quote(query)
        url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
        
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        results = []
        for result in soup.find_all('div', class_='result__body'):
            title = result.find('h2', class_='result__title')
            snippet = result.find('a', class_='result__snippet')
            
            if title and snippet:
                results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
            
            if len(results) >= 3:
                break
        
        return {"web_results": "\n\n".join(results) if results else "No results found"}
            
    except Exception as e:
        return f"Error searching web: {str(e)}"

@tool
def arxiv_search(query: str) -> str:
    """Search Arxiv for scientific papers.
    
    Args:
        query: The search query."""
    try:
        search_docs = ArxivLoader(query=query, load_max_docs=2).load()
        formatted_search_docs = "\n\n---\n\n".join(
            [
                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
                for doc in search_docs
            ])
        return {"arxiv_results": formatted_search_docs}
    except Exception as e:
        return f"Error searching Arxiv: {str(e)}"

# System prompt
system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.

Key Rules:
1. Answer Format:
   - For numbers: Provide only the number without units, commas, or formatting
   - For text: Use minimal words, no articles or abbreviations
   - For lists: Use comma-separated values without additional formatting
   - For dates: Use YYYY-MM-DD format unless specified otherwise
   - For names: Use full names without titles or honorifics
   - For country codes: Use official IOC codes (3 letters)
   - For chess moves: Use standard algebraic notation
   - For currency: Use numbers only, no symbols

2. Answer Guidelines:
   - Be extremely precise and direct
   - Do not include any explanatory text
   - Do not use phrases like "FINAL ANSWER" or any markers
   - Do not include units unless explicitly requested
   - Do not use abbreviations unless they are standard (e.g., DNA, RNA)
   - For multiple choice: Provide only the letter or number of the correct answer
   - For reversed text: Provide the answer in normal text
   - For file-based questions: Focus on the specific information requested

3. Error Handling:
   - If uncertain, provide the most likely answer based on available information
   - If completely unsure, provide a reasonable default rather than an error message
   - For file processing errors, indicate the specific issue

4. Special Cases:
   - For mathematical questions: Provide the exact numerical result
   - For historical dates: Use the most widely accepted date
   - For scientific terms: Use the standard scientific notation
   - For geographical locations: Use official names without abbreviations
   - For audio/video questions: Focus on the specific detail requested"""

# System message
sys_msg = SystemMessage(content=system_prompt)

# Tools list
tools = [
    wiki_search,
    web_search,
    arxiv_search,
]

def build_graph():
    """Build the graph"""
    # Initialize Groq LLM
    llm = ChatGroq(
        model="meta-llama/llama-4-maverick-17b-128e-instruct",
        temperature=0.1
    )
    
    # Bind tools to LLM
    llm_with_tools = llm.bind_tools(tools)

    # Node
    def assistant(state: MessagesState):
        """Assistant node"""
        return {"messages": [llm_with_tools.invoke(state["messages"])]}

    # Build graph
    builder = StateGraph(MessagesState)
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools))
    builder.add_edge(START, "assistant")
    builder.add_conditional_edges(
        "assistant",
        tools_condition,
    )
    builder.add_edge("tools", "assistant")

    # Compile graph
    return builder.compile()

# Test
if __name__ == "__main__":
    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
    # Build the graph
    graph = build_graph()
    # Run the graph
    messages = [HumanMessage(content=question)]
    messages = graph.invoke({"messages": messages})
    for m in messages["messages"]:
        m.pretty_print()