itsskofficial's picture
added util files
7018286
raw
history blame
2.87 kB
# test_retrieval.py
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import SupabaseVectorStore
from supabase.client import Client, create_client
def test_retrieval():
"""
A simple script to test similarity search on your Supabase vector store.
"""
# Load environment variables from .env file
load_dotenv()
# --- 1. Connect to the Database ---
print("Connecting to Supabase...")
supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
if not supabase_url or not supabase_key:
print("Error: SUPABASE_URL and SUPABASE_SERVICE_KEY must be set in your .env file.")
return
try:
supabase: Client = create_client(supabase_url, supabase_key)
print("Successfully connected to Supabase.")
except Exception as e:
print(f"Error connecting to Supabase: {e}")
return
# --- 2. Initialize Embeddings and Vector Store ---
print("Initializing embeddings model...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vector_store = SupabaseVectorStore(
client=supabase,
embedding=embeddings,
table_name="documents",
query_name="match_documents",
)
print("Vector store initialized.")
# --- 3. Start the Interactive Test Loop ---
print("\nEnter a question to test the similarity search.")
print("Type 'exit' or 'quit' to stop the script.\n")
while True:
try:
# Get user input
query = input("Question: ")
if query.lower() in ['exit', 'quit']:
print("Exiting...")
break
if not query:
continue
# --- 4. Perform the Similarity Search ---
print("\nSearching for similar documents...")
# We ask for the top 3 matches (k=3) to get more context
similar_docs = vector_store.similarity_search_with_relevance_scores(query, k=3)
# --- 5. Display the Results ---
if not similar_docs:
print("\n--- No similar documents found in the database. ---")
print("This might mean your database is empty. Please run the data upload cell in test.ipynb.\n")
else:
print(f"\n--- Found {len(similar_docs)} similar document(s) ---")
for i, (doc, score) in enumerate(similar_docs):
print(f"\n--- Result {i+1} (Similarity Score: {score:.4f}) ---")
print(doc.page_content)
print("\n-------------------------------------\n")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
test_retrieval()