Spaces:

Tingusto
/

Final-Assignment-Agent

Sleeping

File size: 6,095 Bytes

import os
from dotenv import load_dotenv
from langgraph.graph import START, StateGraph, MessagesState
from langgraph.prebuilt import tools_condition
from langgraph.prebuilt import ToolNode
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.tools import tool
import requests
from bs4 import BeautifulSoup
import urllib.parse

load_dotenv()

@tool
def wiki_search(query: str) -> str:
    """Search Wikipedia for information.
    
    Args:
        query: The search query."""
    try:
        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
        formatted_search_docs = "\n\n---\n\n".join(
            [
                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
                for doc in search_docs
            ])
        return {"wiki_results": formatted_search_docs}
    except Exception as e:
        return f"Error searching Wikipedia: {str(e)}"

@tool
def web_search(query: str) -> str:
    """Search the web using DuckDuckGo.
    
    Args:
        query: The search query."""
    try:
        encoded_query = urllib.parse.quote(query)
        url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
        
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        results = []
        for result in soup.find_all('div', class_='result__body'):
            title = result.find('h2', class_='result__title')
            snippet = result.find('a', class_='result__snippet')
            
            if title and snippet:
                results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
            
            if len(results) >= 3:
                break
        
        return {"web_results": "\n\n".join(results) if results else "No results found"}
            
    except Exception as e:
        return f"Error searching web: {str(e)}"

@tool
def arxiv_search(query: str) -> str:
    """Search Arxiv for scientific papers.
    
    Args:
        query: The search query."""
    try:
        search_docs = ArxivLoader(query=query, load_max_docs=2).load()
        formatted_search_docs = "\n\n---\n\n".join(
            [
                f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
                for doc in search_docs
            ])
        return {"arxiv_results": formatted_search_docs}
    except Exception as e:
        return f"Error searching Arxiv: {str(e)}"

# System prompt
system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.

Key Rules:
1. Answer Format:
   - For numbers: Provide only the number without units, commas, or formatting
   - For text: Use minimal words, no articles or abbreviations
   - For lists: Use comma-separated values without additional formatting
   - For dates: Use YYYY-MM-DD format unless specified otherwise
   - For names: Use full names without titles or honorifics
   - For country codes: Use official IOC codes (3 letters)
   - For chess moves: Use standard algebraic notation
   - For currency: Use numbers only, no symbols

2. Answer Guidelines:
   - Be extremely precise and direct
   - Do not include any explanatory text
   - Do not use phrases like "FINAL ANSWER" or any markers
   - Do not include units unless explicitly requested
   - Do not use abbreviations unless they are standard (e.g., DNA, RNA)
   - For multiple choice: Provide only the letter or number of the correct answer
   - For reversed text: Provide the answer in normal text
   - For file-based questions: Focus on the specific information requested

3. Error Handling:
   - If uncertain, provide the most likely answer based on available information
   - If completely unsure, provide a reasonable default rather than an error message
   - For file processing errors, indicate the specific issue

4. Special Cases:
   - For mathematical questions: Provide the exact numerical result
   - For historical dates: Use the most widely accepted date
   - For scientific terms: Use the standard scientific notation
   - For geographical locations: Use official names without abbreviations
   - For audio/video questions: Focus on the specific detail requested"""

# System message
sys_msg = SystemMessage(content=system_prompt)

# Tools list
tools = [
    wiki_search,
    web_search,
    arxiv_search,
]

def build_graph():
    """Build the graph"""
    # Initialize Groq LLM
    llm = ChatGroq(
        model="meta-llama/llama-4-maverick-17b-128e-instruct",
        temperature=0.1
    )
    
    # Bind tools to LLM
    llm_with_tools = llm.bind_tools(tools)

    # Node
    def assistant(state: MessagesState):
        """Assistant node"""
        return {"messages": [llm_with_tools.invoke(state["messages"])]}

    # Build graph
    builder = StateGraph(MessagesState)
    builder.add_node("assistant", assistant)
    builder.add_node("tools", ToolNode(tools))
    builder.add_edge(START, "assistant")
    builder.add_conditional_edges(
        "assistant",
        tools_condition,
    )
    builder.add_edge("tools", "assistant")

    # Compile graph
    return builder.compile()

# Test
if __name__ == "__main__":
    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
    # Build the graph
    graph = build_graph()
    # Run the graph
    messages = [HumanMessage(content=question)]
    messages = graph.invoke({"messages": messages})
    for m in messages["messages"]:
        m.pretty_print()