Final_Assignment_Template

Sleeping

App Files Files Community

tsrrus commited on Jun 30, 2025

Commit

497b4bc

verified ·

1 Parent(s): d708ed8

Update agent.py

Browse files

Files changed (1) hide show

agent.py +34 -225

agent.py CHANGED Viewed

@@ -1,77 +1,55 @@
 """LangGraph Agent"""
 import os
-import json
-from typing import Optional, Dict, Any, List
 from dotenv import load_dotenv
-from langgraph.graph import START, END, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_groq import ChatGroq
-from langchain_huggingface import (
-    ChatHuggingFace,
-    HuggingFaceEndpoint,
-    HuggingFaceEmbeddings,
-)
-from langchain_community.utilities import GoogleSerperAPIWrapper
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
 from langchain_community.vectorstores import SupabaseVectorStore
-from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 from langchain_core.tools import tool
 from langchain.tools.retriever import create_retriever_tool
 from supabase.client import Client, create_client
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-import os
-from supabase import Client, create_client
-supabase: Client = create_client(
-    os.environ.get("SUPABASE_URL"),
-    os.environ.get("SUPABASE_SERVICE_KEY"))
 load_dotenv()
 @tool
 def multiply(a: int, b: int) -> int:
     """Multiply two numbers.
     Args:
         a: first int
         b: second int
     """
     return a * b
 @tool
 def add(a: int, b: int) -> int:
     """Add two numbers.
     Args:
         a: first int
         b: second int
     """
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
     """Subtract two numbers.
     Args:
         a: first int
         b: second int
     """
     return a - b
 @tool
 def divide(a: int, b: int) -> int:
     """Divide two numbers.
     Args:
         a: first int
         b: second int
@@ -80,22 +58,18 @@ def divide(a: int, b: int) -> int:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
     """Get the modulus of two numbers.
     Args:
         a: first int
         b: second int
     """
     return a % b
 @tool
 def wiki_search(query: str) -> str:
     """Search Wikipedia for a query and return maximum 2 results.
     Args:
         query: The search query."""
     search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
@@ -103,26 +77,25 @@ def wiki_search(query: str) -> str:
         [
             f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
             for doc in search_docs
-        ]
-    )
     return {"wiki_results": formatted_search_docs}
 @tool
 def web_search(query: str) -> str:
     """Search Tavily for a query and return maximum 3 results.
     Args:
         query: The search query."""
-    search = GoogleSerperAPIWrapper()
-    result = search.run(query)
-    return {"web_results": result}
 @tool
 def arvix_search(query: str) -> str:
     """Search Arxiv for a query and return maximum 3 result.
     Args:
         query: The search query."""
     search_docs = ArxivLoader(query=query, load_max_docs=3).load()
@@ -130,86 +103,12 @@ def arvix_search(query: str) -> str:
         [
             f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
             for doc in search_docs
-        ]
-    )
     return {"arvix_results": formatted_search_docs}
-def load_gaia_answers() -> List[Dict[str, Any]]:
-    """Load the GAIA questions and answers from the JSON file."""
-    try:
-        with open("gaia.json", "r", encoding="utf-8") as f:
-            return json.load(f)
-    except Exception as e:
-        print(f"Error loading GAIA answers: {e}")
-        return []
-def find_gaia_answer(question: str) -> Optional[str]:
-    """
-    Find the most relevant answer in the GAIA dataset for the given question using LLM.
-    Returns the answer if found, None otherwise.
-    """
-    try:
-        # Load GAIA data
-        gaia_data = load_gaia_answers()
-        if not gaia_data:
-            return None
-        # First, try exact match for efficiency
-        for entry in gaia_data:
-            if entry.get("Question", "").strip() == question.strip():
-                return entry.get("Final answer", "")
-        # Initialize LLM (using the same provider as the main graph for consistency)
-        llm = ChatHuggingFace(
-            llm=HuggingFaceEndpoint(
-                repo_id="meta-llama/Llama-3.1-8B-Instruct",
-                temperature=0,
-            ),
-        )
-        # Create a prompt template
-        template = """You are an expert at matching questions to answers.
-        Given the following question and a list of question-answer pairs from the GAIA dataset,
-        find the most relevant answer. If no good match is found, return 'NO_MATCH'.
-        Question: {question}
-        Available question-answer pairs:
-        {qa_pairs}
-        Return ONLY the answer text if a match is found, or 'NO_MATCH' if no good match is found.
-        """
-        # Prepare the QA pairs string
-        qa_pairs = "\n\n".join([
-            f"Q: {entry.get('Question', '')}\nA: {entry.get('Final answer', '')}"
-            for entry in gaia_data
-        ])
-        # Create and run the chain
-        prompt = ChatPromptTemplate.from_template(template)
-        chain = prompt | llm | StrOutputParser()
-        # Get the response
-        response = chain.invoke({
-            "question": question,
-            "qa_pairs": qa_pairs
-        })
-        # Parse the response
-        response = response.strip()
-        if response and response.upper() != "NO_MATCH":
-            return response
-    except Exception as e:
-        print(f"Error in find_gaia_answer: {e}")
-    return None
-# Load the system prompt from the file
 with open("system_prompt.txt", "r", encoding="utf-8") as f:
     system_prompt = f.read()
@@ -217,15 +116,13 @@ with open("system_prompt.txt", "r", encoding="utf-8") as f:
 sys_msg = SystemMessage(content=system_prompt)
 # build a retriever
-embeddings = HuggingFaceEmbeddings(
-    model_name="sentence-transformers/all-mpnet-base-v2"
-)  #  dim=768
 supabase: Client = create_client(
-    os.environ.get("SUPABASE_URL"), os.environ.get("SUPABAS_SERVICE_KEY")
-)
 vector_store = SupabaseVectorStore(
     client=supabase,
-    embedding=embeddings,
     table_name="documents",
     query_name="match_documents_langchain",
 )
@@ -236,22 +133,20 @@ create_retriever_tool = create_retriever_tool(
 )
 tools = [
     multiply,
     add,
     subtract,
     divide,
     modulus,
-    # wiki_search,
     web_search,
     arvix_search,
 ]
-class AgentState(MessagesState):
-    cheating_used: bool = False
 # Build graph function
-def build_graph(provider: str = "huggingface"):
     """Build the graph"""
     # Load environment variables from .env file
     if provider == "google":
@@ -259,29 +154,27 @@ def build_graph(provider: str = "huggingface"):
         llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
     elif provider == "groq":
         # Groq https://console.groq.com/docs/models
-        llm = ChatGroq(
-            model="qwen-qwq-32b", temperature=0
-        )  # optional : qwen-qwq-32b gemma2-9b-it
     elif provider == "huggingface":
-        # TODO: Add huggingface endpoint
         llm = ChatHuggingFace(
             llm=HuggingFaceEndpoint(
-                repo_id="meta-llama/Llama-3.1-8B-Instruct",
                 temperature=0,
             ),
         )
     else:
         raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
     # Bind tools to LLM
     llm_with_tools = llm.bind_tools(tools)
-    # Node: Assistant
     def assistant(state: MessagesState):
         """Assistant node"""
         return {"messages": [llm_with_tools.invoke(state["messages"])]}
-    # Node: Retriever
     def retriever(state: MessagesState):
         """Retriever node"""
         similar_question = vector_store.similarity_search(state["messages"][0].content)
@@ -289,63 +182,13 @@ def build_graph(provider: str = "huggingface"):
             content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
         )
         return {"messages": [sys_msg] + state["messages"] + [example_msg]}
-    # Node: Cheating - Check if question exists in GAIA dataset
-    def cheating_node(state: MessagesState):
-        """Cheating node that checks if question exists in GAIA dataset"""
-        if not state["messages"] or not isinstance(state["messages"][-1], HumanMessage):
-            return {"messages": state["messages"], "cheating_used": False}
-        question = state["messages"][-1].content
-        print("Checking if question exists in GAIA dataset...")
-        answer = find_gaia_answer(question)
-        if answer:
-            # If answer found in GAIA, return it directly
-            print("Answer found in GAIA dataset.")
-            return {
-                "messages": state["messages"] + [AIMessage(content=f"FINAL ANSWER: {answer}")],
-                "cheating_used": True
-            }
-        # If not found, continue with normal flow
-        return {
-            "messages": state["messages"],
-            "cheating_used": False
-        }
-    # Build the graph
-    builder = StateGraph(AgentState)
-    # Add nodes
-    builder.add_node("cheating", cheating_node)
     builder.add_node("assistant", assistant)
     builder.add_node("tools", ToolNode(tools))
-    # Define the workflow
-    builder.add_edge(START, "cheating")
-    # After cheating node, check if we found an answer
-    def route_after_cheating(state: AgentState):
-        """Route to end if cheating was used, otherwise to assistant"""
-        cheating_used = state.get("cheating_used", False)
-        print(f"Routing after cheating - cheating_used: {cheating_used}")
-        # If we found an answer in GAIA, end the flow
-        if cheating_used:
-            print("Cheating was used, ending flow")
-            return END
-        # Otherwise, continue to assistant
-        print("No cheating, continuing to assistant")
-        return "assistant"
-    builder.add_conditional_edges(
-        "cheating",
-        route_after_cheating
-    )
-    # Normal flow edges
     builder.add_conditional_edges(
         "assistant",
         tools_condition,
@@ -353,38 +196,4 @@ def build_graph(provider: str = "huggingface"):
     builder.add_edge("tools", "assistant")
     # Compile graph
-    return builder.compile()
-class Agent():
-    def __init__(self):
-        self.graph = build_graph(provider="huggingface")
-    def __call__(self, question: str) -> str:
-        messages = [HumanMessage(content=question)]
-        result = self.graph.invoke({"messages": messages})
-        # Print all messages for debugging
-        for m in result["messages"]:
-            m.pretty_print()
-        # Return the final answer if found
-        if result["messages"] and result["messages"][-1].content.startswith("FINAL ANSWER: "):
-            return result["messages"][-1].content.removeprefix("FINAL ANSWER: ")
-        # If no final answer found but we have messages, return the last message
-        if result["messages"]:
-            return result["messages"][-1].content
-        raise ValueError("No response generated.")
-# test
-if __name__ == "__main__":
-    question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
-    # Build the graph
-    agent = Agent()
-    print(agent.graph.get_graph().draw_ascii())
-    # # Run the graph
-    answer = agent(question)
-    print("\n\nSubmitted answer:")
-    print(answer)

 """LangGraph Agent"""
 import os
 from dotenv import load_dotenv
+from langgraph.graph import START, StateGraph, MessagesState
 from langgraph.prebuilt import tools_condition
 from langgraph.prebuilt import ToolNode
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_openai import ChatOpenAI
+from langchain.agents import initialize_agent, Tool
 from langchain_groq import ChatGroq
+from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
+from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_community.document_loaders import WikipediaLoader
 from langchain_community.document_loaders import ArxivLoader
 from langchain_community.vectorstores import SupabaseVectorStore
+from langchain_core.messages import SystemMessage, HumanMessage
 from langchain_core.tools import tool
 from langchain.tools.retriever import create_retriever_tool
 from supabase.client import Client, create_client
 load_dotenv()
 @tool
 def multiply(a: int, b: int) -> int:
     """Multiply two numbers.
     Args:
         a: first int
         b: second int
     """
     return a * b
 @tool
 def add(a: int, b: int) -> int:
     """Add two numbers.
     Args:
         a: first int
         b: second int
     """
     return a + b
 @tool
 def subtract(a: int, b: int) -> int:
     """Subtract two numbers.
     Args:
         a: first int
         b: second int
     """
     return a - b
 @tool
 def divide(a: int, b: int) -> int:
     """Divide two numbers.
     Args:
         a: first int
         b: second int
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
 def modulus(a: int, b: int) -> int:
     """Get the modulus of two numbers.
     Args:
         a: first int
         b: second int
     """
     return a % b
 @tool
 def wiki_search(query: str) -> str:
     """Search Wikipedia for a query and return maximum 2 results.
     Args:
         query: The search query."""
     search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
         [
             f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
             for doc in search_docs
+        ])
     return {"wiki_results": formatted_search_docs}
 @tool
 def web_search(query: str) -> str:
     """Search Tavily for a query and return maximum 3 results.
     Args:
         query: The search query."""
+    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
+            for doc in search_docs
+        ])
+    return {"web_results": formatted_search_docs}
 @tool
 def arvix_search(query: str) -> str:
     """Search Arxiv for a query and return maximum 3 result.
     Args:
         query: The search query."""
     search_docs = ArxivLoader(query=query, load_max_docs=3).load()
         [
             f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
             for doc in search_docs
+        ])
     return {"arvix_results": formatted_search_docs}
+# load the system prompt from the file
 with open("system_prompt.txt", "r", encoding="utf-8") as f:
     system_prompt = f.read()
 sys_msg = SystemMessage(content=system_prompt)
 # build a retriever
+embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") #  dim=768
 supabase: Client = create_client(
+    os.environ.get("SUPABASE_URL"),
+    os.environ.get("SUPABASE_SERVICE_KEY"))
 vector_store = SupabaseVectorStore(
     client=supabase,
+    embedding= embeddings,
     table_name="documents",
     query_name="match_documents_langchain",
 )
 )
 tools = [
     multiply,
     add,
     subtract,
     divide,
     modulus,
+    wiki_search,
     web_search,
     arvix_search,
 ]
 # Build graph function
+def build_graph(provider: str = "groq"):
     """Build the graph"""
     # Load environment variables from .env file
     if provider == "google":
         llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
     elif provider == "groq":
         # Groq https://console.groq.com/docs/models
+        llm = ChatGroq(model="qwen-qwq-32b", temperature=0) # optional : qwen-qwq-32b gemma2-9b-it
+    elif provider == "openai":
+        # OpenAI
+        llm = ChatOpenAI(model="gpt-4", temperature=0)
     elif provider == "huggingface":
         llm = ChatHuggingFace(
             llm=HuggingFaceEndpoint(
+                url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
                 temperature=0,
             ),
         )
     else:
         raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
     # Bind tools to LLM
     llm_with_tools = llm.bind_tools(tools)
+    # Node
     def assistant(state: MessagesState):
         """Assistant node"""
         return {"messages": [llm_with_tools.invoke(state["messages"])]}
     def retriever(state: MessagesState):
         """Retriever node"""
         similar_question = vector_store.similarity_search(state["messages"][0].content)
             content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
         )
         return {"messages": [sys_msg] + state["messages"] + [example_msg]}
+    builder = StateGraph(MessagesState)
+    builder.add_node("retriever", retriever)
     builder.add_node("assistant", assistant)
     builder.add_node("tools", ToolNode(tools))
+    builder.add_edge(START, "retriever")
+    builder.add_edge("retriever", "assistant")
     builder.add_conditional_edges(
         "assistant",
         tools_condition,
     builder.add_edge("tools", "assistant")
     # Compile graph
+    return builder.compile()