AIxGAIA

Sleeping

App Files Files Community

AC-Angelo93 commited on Apr 29, 2025

Commit

8d47cbc

verified ·

1 Parent(s): e4b06ec

Update agent.py

Browse files

Files changed (1) hide show

agent.py +49 -17

agent.py CHANGED Viewed

@@ -1,17 +1,34 @@
 # agent.py
 import os
-from supabase import create_client
 from sentence_transformers import SentenceTransformers
 from serpapi import GoogleSearch
 from langgraph import Graph, LLM, tool #or other graph library
 # ----Supabase setup----
 SUPABASE_URL = os.getenv("SUPABASE_URL")
 SUPABASE_KEY = os.getenv("SUPABASE_SERVICE_KEY")
 EMBED_MODEL_ID = os.getenv("HF_EMBEDDING_MODEL")
-sb_client = create_client(SUPABASE_URL, SUPABASE_KEY)
 embedder = SentenceTransformers(EMBED_MODEL_ID)
 # 1) Define tools
@@ -27,26 +44,41 @@ def calculator(expr: str) -> str:
 # @tool
 # def web_search(query:str) -> str:
 # ...
-@tool
-def retrieve_docs(query: str, k: int = 3) -> str:
-    """
-    Fetch tpo-k docs from Supabase vector store.
-    Returns the concatenated text.
-    """
     # --- embed the query
-    q_emb = embedder.encode(query).tolist()
     # --- query the embedding table
-    response = (
-        sb_client
-        .rpc("match_documents", {"query_embedding": q_emb, "match_count": k})
-        .execute()
-    )
-    rows = response.data
     # ---- concatenate the content field
-    docs = [row["content"] for row in rows]
-    return "\n\n---\n\n".join(docs)
 SERPAPI_KEY = os.getenv("SERPAPY_KEY")
 # ---- web_search tool

 # agent.py
 import os
+#from supabase import create_client
 from sentence_transformers import SentenceTransformers
 from serpapi import GoogleSearch
+import pandas as pd
+import faiss
 from langgraph import Graph, LLM, tool #or other graph library
+# ─── 1) Load & embed all documents at startup ───
+# 1a) Read CSV of docs
+df = pd.read_csv("documents.csv")
+DOCS = df["content"].tolist()
+# 1b) Create an embedding model
+EMBEDDER = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
+# 1c) Compute embeddings (float32) and build FAISS index
+EMBS = EMBEDDER.encode(DOCS, show_progress_bar=True).astype("float32")
+INDEX = faiss.IndexFlatL2(EMBS.shape[1])
+INDEX.add(EMBS)
 # ----Supabase setup----
 SUPABASE_URL = os.getenv("SUPABASE_URL")
 SUPABASE_KEY = os.getenv("SUPABASE_SERVICE_KEY")
 EMBED_MODEL_ID = os.getenv("HF_EMBEDDING_MODEL")
+#sb_client = create_client(SUPABASE_URL, SUPABASE_KEY)
 embedder = SentenceTransformers(EMBED_MODEL_ID)
 # 1) Define tools
 # @tool
 # def web_search(query:str) -> str:
 # ...
+#@tool
+#def retrieve_docs(query: str, k: int = 3) -> str:
+    #"""
+    #Fetch tpo-k docs from Supabase vector store.
+    #Returns the concatenated text.
+    #"""
     # --- embed the query
+    #q_emb = embedder.encode(query).tolist()
     # --- query the embedding table
+    #response = (
+   #     sb_client
+   #     .rpc("match_documents", {"query_embedding": q_emb, "match_count": k})
+   #     .execute()
+   # )
+   # rows = response.data
     # ---- concatenate the content field
+   # docs = [row["content"] for row in rows]
+   # return "\n\n---\n\n".join(docs)
+@tool
+def retrieve_docs(query: str, k: int = 3) -> str:
+    """
+    k-NN search over our in-memory FAISS index.
+    Returns the top-k documents concatenated.
+    """
+    # 1) Embed the query
+    q_emb = EMBEDDER.encode([query]).astype("float32")
+    # 2) Search FAISS
+    D, I = INDEX.search(q_emb, k)
+    # 3) Gather and return the texts
+    hits = [DOCS[i] for i in I[0]]
+    return "\n\n---\n\n".join(hits)
 SERPAPI_KEY = os.getenv("SERPAPY_KEY")
 # ---- web_search tool