|
|
|
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
from langchain_community.vectorstores import SupabaseVectorStore |
|
|
from supabase.client import Client, create_client |
|
|
|
|
|
def test_retrieval(): |
|
|
""" |
|
|
A simple script to test similarity search on your Supabase vector store. |
|
|
""" |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
print("Connecting to Supabase...") |
|
|
supabase_url = os.environ.get("SUPABASE_URL") |
|
|
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY") |
|
|
|
|
|
if not supabase_url or not supabase_key: |
|
|
print("Error: SUPABASE_URL and SUPABASE_SERVICE_KEY must be set in your .env file.") |
|
|
return |
|
|
|
|
|
try: |
|
|
supabase: Client = create_client(supabase_url, supabase_key) |
|
|
print("Successfully connected to Supabase.") |
|
|
except Exception as e: |
|
|
print(f"Error connecting to Supabase: {e}") |
|
|
return |
|
|
|
|
|
|
|
|
print("Initializing embeddings model...") |
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") |
|
|
|
|
|
vector_store = SupabaseVectorStore( |
|
|
client=supabase, |
|
|
embedding=embeddings, |
|
|
table_name="documents", |
|
|
query_name="match_documents", |
|
|
) |
|
|
print("Vector store initialized.") |
|
|
|
|
|
|
|
|
print("\nEnter a question to test the similarity search.") |
|
|
print("Type 'exit' or 'quit' to stop the script.\n") |
|
|
|
|
|
while True: |
|
|
try: |
|
|
|
|
|
query = input("Question: ") |
|
|
if query.lower() in ['exit', 'quit']: |
|
|
print("Exiting...") |
|
|
break |
|
|
|
|
|
if not query: |
|
|
continue |
|
|
|
|
|
|
|
|
print("\nSearching for similar documents...") |
|
|
|
|
|
similar_docs = vector_store.similarity_search_with_relevance_scores(query, k=3) |
|
|
|
|
|
|
|
|
if not similar_docs: |
|
|
print("\n--- No similar documents found in the database. ---") |
|
|
print("This might mean your database is empty. Please run the data upload cell in test.ipynb.\n") |
|
|
else: |
|
|
print(f"\n--- Found {len(similar_docs)} similar document(s) ---") |
|
|
for i, (doc, score) in enumerate(similar_docs): |
|
|
print(f"\n--- Result {i+1} (Similarity Score: {score:.4f}) ---") |
|
|
print(doc.page_content) |
|
|
print("\n-------------------------------------\n") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"An error occurred: {e}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
test_retrieval() |