File size: 2,867 Bytes
7018286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# test_retrieval.py
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import SupabaseVectorStore
from supabase.client import Client, create_client

def test_retrieval():
    """
    A simple script to test similarity search on your Supabase vector store.
    """
    # Load environment variables from .env file
    load_dotenv()

    # --- 1. Connect to the Database ---
    print("Connecting to Supabase...")
    supabase_url = os.environ.get("SUPABASE_URL")
    supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")

    if not supabase_url or not supabase_key:
        print("Error: SUPABASE_URL and SUPABASE_SERVICE_KEY must be set in your .env file.")
        return

    try:
        supabase: Client = create_client(supabase_url, supabase_key)
        print("Successfully connected to Supabase.")
    except Exception as e:
        print(f"Error connecting to Supabase: {e}")
        return

    # --- 2. Initialize Embeddings and Vector Store ---
    print("Initializing embeddings model...")
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    
    vector_store = SupabaseVectorStore(
        client=supabase,
        embedding=embeddings,
        table_name="documents",
        query_name="match_documents",
    )
    print("Vector store initialized.")

    # --- 3. Start the Interactive Test Loop ---
    print("\nEnter a question to test the similarity search.")
    print("Type 'exit' or 'quit' to stop the script.\n")

    while True:
        try:
            # Get user input
            query = input("Question: ")
            if query.lower() in ['exit', 'quit']:
                print("Exiting...")
                break
            
            if not query:
                continue

            # --- 4. Perform the Similarity Search ---
            print("\nSearching for similar documents...")
            # We ask for the top 3 matches (k=3) to get more context
            similar_docs = vector_store.similarity_search_with_relevance_scores(query, k=3)

            # --- 5. Display the Results ---
            if not similar_docs:
                print("\n--- No similar documents found in the database. ---")
                print("This might mean your database is empty. Please run the data upload cell in test.ipynb.\n")
            else:
                print(f"\n--- Found {len(similar_docs)} similar document(s) ---")
                for i, (doc, score) in enumerate(similar_docs):
                    print(f"\n--- Result {i+1} (Similarity Score: {score:.4f}) ---")
                    print(doc.page_content)
                print("\n-------------------------------------\n")

        except Exception as e:
            print(f"An error occurred: {e}")

if __name__ == "__main__":
    test_retrieval()