Spaces:
Sleeping
Sleeping
File size: 6,095 Bytes
5de5d19 2f65b93 5de5d19 2f65b93 5de5d19 2f65b93 5de5d19 2f65b93 5de5d19 2f65b93 5de5d19 2f65b93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | import os
from dotenv import load_dotenv
from langgraph.graph import START, StateGraph, MessagesState
from langgraph.prebuilt import tools_condition
from langgraph.prebuilt import ToolNode
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.tools import tool
import requests
from bs4 import BeautifulSoup
import urllib.parse
load_dotenv()
@tool
def wiki_search(query: str) -> str:
"""Search Wikipedia for information.
Args:
query: The search query."""
try:
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
])
return {"wiki_results": formatted_search_docs}
except Exception as e:
return f"Error searching Wikipedia: {str(e)}"
@tool
def web_search(query: str) -> str:
"""Search the web using DuckDuckGo.
Args:
query: The search query."""
try:
encoded_query = urllib.parse.quote(query)
url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
results = []
for result in soup.find_all('div', class_='result__body'):
title = result.find('h2', class_='result__title')
snippet = result.find('a', class_='result__snippet')
if title and snippet:
results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
if len(results) >= 3:
break
return {"web_results": "\n\n".join(results) if results else "No results found"}
except Exception as e:
return f"Error searching web: {str(e)}"
@tool
def arxiv_search(query: str) -> str:
"""Search Arxiv for scientific papers.
Args:
query: The search query."""
try:
search_docs = ArxivLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
for doc in search_docs
])
return {"arxiv_results": formatted_search_docs}
except Exception as e:
return f"Error searching Arxiv: {str(e)}"
# System prompt
system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.
Key Rules:
1. Answer Format:
- For numbers: Provide only the number without units, commas, or formatting
- For text: Use minimal words, no articles or abbreviations
- For lists: Use comma-separated values without additional formatting
- For dates: Use YYYY-MM-DD format unless specified otherwise
- For names: Use full names without titles or honorifics
- For country codes: Use official IOC codes (3 letters)
- For chess moves: Use standard algebraic notation
- For currency: Use numbers only, no symbols
2. Answer Guidelines:
- Be extremely precise and direct
- Do not include any explanatory text
- Do not use phrases like "FINAL ANSWER" or any markers
- Do not include units unless explicitly requested
- Do not use abbreviations unless they are standard (e.g., DNA, RNA)
- For multiple choice: Provide only the letter or number of the correct answer
- For reversed text: Provide the answer in normal text
- For file-based questions: Focus on the specific information requested
3. Error Handling:
- If uncertain, provide the most likely answer based on available information
- If completely unsure, provide a reasonable default rather than an error message
- For file processing errors, indicate the specific issue
4. Special Cases:
- For mathematical questions: Provide the exact numerical result
- For historical dates: Use the most widely accepted date
- For scientific terms: Use the standard scientific notation
- For geographical locations: Use official names without abbreviations
- For audio/video questions: Focus on the specific detail requested"""
# System message
sys_msg = SystemMessage(content=system_prompt)
# Tools list
tools = [
wiki_search,
web_search,
arxiv_search,
]
def build_graph():
"""Build the graph"""
# Initialize Groq LLM
llm = ChatGroq(
model="meta-llama/llama-4-maverick-17b-128e-instruct",
temperature=0.1
)
# Bind tools to LLM
llm_with_tools = llm.bind_tools(tools)
# Node
def assistant(state: MessagesState):
"""Assistant node"""
return {"messages": [llm_with_tools.invoke(state["messages"])]}
# Build graph
builder = StateGraph(MessagesState)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
"assistant",
tools_condition,
)
builder.add_edge("tools", "assistant")
# Compile graph
return builder.compile()
# Test
if __name__ == "__main__":
question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
# Build the graph
graph = build_graph()
# Run the graph
messages = [HumanMessage(content=question)]
messages = graph.invoke({"messages": messages})
for m in messages["messages"]:
m.pretty_print() |