Final_Assignment_Template

Runtime error

App Files Files Community

CheeYung commited on May 8, 2025

Commit

7d88664

1 Parent(s): 065bc2a

Update basic agent framework

Browse files

Files changed (6) hide show

agent.py +100 -3
app.py +11 -4
prompt.txt +49 -0
requirements.txt +14 -1
sample.ipynb +43 -20
supabase.sql +34 -26

agent.py CHANGED Viewed

@@ -1,13 +1,56 @@
 import os
 from typing import TypedDict, Annotated
 from langgraph.graph import MessagesState, START, StateGraph
 from langgraph.graph.message import add_messages
 from langgraph.prebuilt import tools_condition, ToolNode
 from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
 from langchain_core.tools import tool
 from langchain_community.tools.tavily_search import TavilySearchResults
 from langchain_google_genai import ChatGoogleGenerativeAI
 @tool
 def add(a: int, b: int) -> int:
     """Add two numbers.
@@ -65,6 +108,49 @@ def modulus(a: int, b: int) -> int:
     """
     return a % b
 # list of tools
 tools = [
     add,
@@ -72,7 +158,10 @@ tools = [
     multiply,
     power,
     divide,
-    modulus
 ]
 # Generate the AgentState and Agent graph
@@ -90,25 +179,33 @@ def build_graph():
         return { "messages": [llm_with_tools.invoke(state['messages'])] }
     def retriever(state: AgentState):
-        return None
     builder = StateGraph(AgentState)
     # Define nodes: these do the work
     builder.add_node("assistant", assistant)
     builder.add_node("tools", ToolNode(tools))
     builder.add_conditional_edges(
         "assistant",
         tools_condition
     )
     builder.add_edge("tools", "assistant")
     # Compile graph
     return builder.compile()
 # Test
 if __name__ == "__main__":
-    question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
     graph = build_graph()
     messages = [HumanMessage(content=question)]
     messages = graph.invoke({ "messages": messages })

 import os
+from dotenv import load_dotenv
 from typing import TypedDict, Annotated
 from langgraph.graph import MessagesState, START, StateGraph
 from langgraph.graph.message import add_messages
 from langgraph.prebuilt import tools_condition, ToolNode
 from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage
 from langchain_core.tools import tool
+from langchain.tools.retriever import create_retriever_tool
 from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain.schema.document import Document
+from supabase import create_client, Client
+load_dotenv()
+__embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-mpnet-base-v2",
+    model_kwargs= { 'device': 'cpu' })
+# connect to supabase
+url: str = os.environ.get("SUPABASE_URL")
+key: str = os.environ.get("SUPABASE_SECRET_KEY")
+__supabase: Client = create_client(url, key)
+# build retriever
+vector_store = SupabaseVectorStore(
+    client=__supabase,
+    embedding=__embeddings,
+    table_name="documents",
+    query_name="match_documents",
+)
+question_retrieval_tool = create_retriever_tool(
+    vector_store.as_retriever(),
+    name="Question retriever",
+    description="Find similar questions in the vector database for the given question."
+)
+# load prompt message from txt file and convert to System Message
+with open('prompt.txt', 'r', encoding='utf-8') as f:
+    sys_prompt = f.read()
+__sys_msg = SystemMessage(content=sys_prompt)
 @tool
 def add(a: int, b: int) -> int:
     """Add two numbers.
     """
     return a % b
+@tool
+def wiki_search(query: str) -> str:
+    """Search Wikipedia for a query and return maximum 2 results.
+    Args:
+        query: The search query.
+    """
+    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+    formatted_search_docs = "\n\n---\n\n".join([
+        f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n\t{doc.page_content}\n<Document>'
+        for doc in search_docs
+    ])
+    return { "wiki_results": formatted_search_docs }
+@tool
+def web_search(query: str) -> str:
+    """Search Tavily for a query and return maximum 3 results.
+    Args:
+        query: The search query.
+    """
+    search_docs = TavilySearchResults(max_results=3).invoke(query=query)
+    formatted_search_docs = "\n\n---\n\n".join([
+        f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n\t{doc.page_content}\n<Document>'
+        for doc in search_docs
+    ])
+    return { "web_results": formatted_search_docs }
+@tool
+def arxiv_search(query: str) -> str:
+    """Search Arxiv for a query and return maximum 3 result.
+    Args:
+        query: The search query.
+    """
+    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+    formatted_search_docs = "\n\n---\n\n".join([
+        f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n\t{doc.page_content[:1000]}\n<Document>'
+        for doc in search_docs
+    ])
+    return { "arxiv_results": formatted_search_docs }
 # list of tools
 tools = [
     add,
     multiply,
     power,
     divide,
+    modulus,
+    wiki_search,
+    web_search,
+    arxiv_search
 ]
 # Generate the AgentState and Agent graph
         return { "messages": [llm_with_tools.invoke(state['messages'])] }
     def retriever(state: AgentState):
+        similar_question = vector_store.similarity_search(state['messages'][0].content)
+        example_msg = HumanMessage(
+            content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
+        )
+        return { "messages": [__sys_msg] + state['messages'] + [example_msg] }
     builder = StateGraph(AgentState)
     # Define nodes: these do the work
     builder.add_node("assistant", assistant)
+    builder.add_node("retriever", retriever)
     builder.add_node("tools", ToolNode(tools))
+    builder.add_edge(START, "retriever")
     builder.add_conditional_edges(
         "assistant",
         tools_condition
     )
     builder.add_edge("tools", "assistant")
+    builder.add_edge("retriever", "assistant")
     # Compile graph
     return builder.compile()
 # Test
 if __name__ == "__main__":
+    # question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
+    question = "Data feed 100ms happened once. If 50second "
     graph = build_graph()
     messages = [HumanMessage(content=question)]
     messages = graph.invoke({ "messages": messages })

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
@@ -12,12 +14,17 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 import requests
 import inspect
 import pandas as pd
+from langchain_core.messages import HumanMessage
+from agent import build_graph
 # (Keep Constants as is)
 # --- Constants ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
+        self.graph = build_graph()
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
+        messages = [HumanMessage(content=question)]
+        messages = self.graph.invoke({ "messages": messages })
+        answer = messages['messages'][-1].content
+        # print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # fixed_answer = "This is a default answer."
+        # print(f"Agent returning fixed answer: {fixed_answer}")
+        return answer[14:]
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

prompt.txt ADDED Viewed

	@@ -0,0 +1,49 @@

+You are a helpful agent responsible for answering questions using a set of tools provided.
+If the tool(s) not available, you can try to search and find the solution or information online.
+You can also use your own knowledge to answer the question.
+==========================
+Here is a few examples showing you how to answer the question step by step.
+Question 1: In terms of geographical distance between capital cities, which 2 countries are the furthest from each other within the ASEAN bloc according to wikipedia? Answer using a comma separated list, ordering the countries by alphabetical order.
+Steps:
+1. Search the web for "ASEAN bloc".
+2. Click the Wikipedia result for the ASEAN Free Trade Area.
+3. Scroll down to find the list of member states.
+4. Click into the Wikipedia pages for each member state, and note its capital.
+5. Search the web for the distance between the first two capitals. The results give travel distance, not geographic distance, which might affect the answer.
+6. Thinking it might be faster to judge the distance by looking at a map, search the web for "ASEAN bloc" and click into the images tab.
+7. View a map of the member countries. Since they're clustered together in an arrangement that's not very linear, it's difficult to judge distances by eye.
+8. Return to the Wikipedia page for each country. Click the GPS coordinates for each capital to get the coordinates in decimal notation.
+9. Place all these coordinates into a spreadsheet.
+10. Write formulas to calculate the distance between each capital.
+11. Write formula to get the largest distance value in the spreadsheet.
+12. Note which two capitals that value corresponds to: Jakarta and Naypyidaw.
+13. Return to the Wikipedia pages to see which countries those respective capitals belong to: Indonesia, Myanmar.
+Tools:
+1. Search engine
+2. Web browser
+3. Microsoft Excel / Google Sheets
+Final Answer: Indonesia, Myanmar
+Question 2: Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.
+Steps:
+Step 1: Evaluate the position of the pieces in the chess position
+Step 2: Report the best move available for black: "Rd5"
+Tools:
+1. Image recognition tools
+Final Answer: Rd5
+Question 3: Solve the equation x^2 + 5x = -6
+Steps:
+Step 1: Moving all terms to left-hand side until the right-hand side become zero.
+Step 2: Identify the highest power of polynomial in left-hand side. In this case the highest power is 2, this equation is a quadratic equation.
+Step 3: Identify the coefficients of each term in this quadratic equation.
+Step 3: Write quadratic formula and calculate the possible solutions.
+Tools:
+1. Search engine
+2. Web browser
+3. Calculator
+Final Answer: x=-2, x=-3
+==========================
+Now, please answer the following question step by step.

requirements.txt CHANGED Viewed

@@ -1,4 +1,17 @@
 gradio
 requests
 langchain
-langchain-google-genai

 gradio
 requests
 langchain
+langchain-community
+langchain-core
+langchain-google-genai
+langchain-huggingface
+langchain-tavily
+langchain-chroma
+langgraph
+huggingface_hub
+supabase
+arxiv
+pymupdf
+wikipedia
+pgvector
+python-dotenv

sample.ipynb CHANGED Viewed

@@ -230,7 +230,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "id": "42263deb",
    "metadata": {},
    "outputs": [],
@@ -246,14 +246,14 @@
     "    docs.append(doc)\n",
     "\n",
     "# insert the documents to the vector database\n",
-    "try:\n",
-    "    response = (\n",
-    "        supabase.table('documents')\n",
-    "        .insert(docs)\n",
-    "        .execute()\n",
-    "    )\n",
-    "except Exception as exception:\n",
-    "    print(\"Error inserting data into Supabase:\", exception)"
    ]
   },
   {
@@ -273,17 +273,6 @@
     "retriever = vector_store.as_retriever()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ff5934c3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# query = \"What did the president say about Ketanji Brown Jackson\"\n",
-    "# matched_docs = vector_store.similarity_search(query, 2)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 11,
@@ -307,6 +296,40 @@
     "docs = retriever.invoke(query)\n",
     "docs[0]"
    ]
   }
  ],
  "metadata": {

   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "42263deb",
    "metadata": {},
    "outputs": [],
     "    docs.append(doc)\n",
     "\n",
     "# insert the documents to the vector database\n",
+    "#try:\n",
+    "#    response = (\n",
+    "#        supabase.table('documents')\n",
+    "#        .insert(docs)\n",
+    "#        .execute()\n",
+    "#    )\n",
+    "#except Exception as exception:\n",
+    "#    print(\"Error inserting data into Supabase:\", exception)"
    ]
   },
   {
     "retriever = vector_store.as_retriever()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 11,
     "docs = retriever.invoke(query)\n",
     "docs[0]"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a2e6497a",
+   "metadata": {},
+   "source": [
+    "# Tavily Search"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "a9448c8c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.tools.tavily_search import TavilySearchResults\n",
+    "from langchain_community.document_loaders import WikipediaLoader\n",
+    "from langchain_community.document_loaders import ArxivLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "c3de569e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "question_retrieval_tool = create_retriever_tool(\n",
+    "    vector_store.as_retriever(),\n",
+    "    name=\"Question retriever\",\n",
+    "    description=\"Find similar questions in the vector database for the given question.\"\n",
+    ")"
+   ]
   }
  ],
  "metadata": {

supabase.sql CHANGED Viewed

@@ -1,30 +1,38 @@
 -- Drop old function
 drop function if exists match_documents (vector(1536), int);
 -- Create a function to search for documents
-create function match_documents (
-  query_embedding vector(1536),
-  match_count int DEFAULT null,
-  filter jsonb DEFAULT '{}'
-) returns table (
-  id bigint,
-  content text,
-  metadata jsonb,
-  similarity float
-)
-language plpgsql
-as $$
-#variable_conflict use_column
-begin
-  return query
-  select
-    id,
-    content,
-    metadata,
-    1 - (documents.embedding <=> query_embedding) as similarity
-  from documents
-  where metadata @> filter
-  order by documents.embedding <=> query_embedding
-  limit match_count;
-end;
-$$;

 -- Drop old function
 drop function if exists match_documents (vector(1536), int);
+-- Create a table to store your documents
+    create table documents (
+        id bigserial primary key,
+        content text, -- corresponds to Document.pageContent
+        metadata jsonb, -- corresponds to Document.metadata
+        embedding vector(768) -- 768 works for Gemini embeddings, change if needed
+    );
 -- Create a function to search for documents
+    create function match_documents (
+            query_embedding vector(768),
+            match_count int DEFAULT null,
+            filter jsonb DEFAULT '{}'
+        ) returns table (
+            id bigint,
+            content text,
+            metadata jsonb,
+            similarity float
+        )
+        language plpgsql
+        as $$
+        #variable_conflict use_column
+        begin
+        return query
+        select
+            id,
+            content,
+            metadata,
+            1 - (documents.embedding <=> query_embedding) as similarity
+        from documents
+        where metadata @> filter
+        order by documents.embedding <=> query_embedding
+        limit match_count;
+        end;
+        $$;