File size: 1,191 Bytes
17205ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
from dotenv import load_dotenv
from pymongo import MongoClient
from sentence_transformers import SentenceTransformer

load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("MONGO_DB", "legal_chatbot_db")
COLLECTION_NAME = os.getenv("MONGO_COLLECTION", "datasets")
EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")

client = MongoClient(MONGO_URI, tls=True, tlsAllowInvalidCertificates=True)
col = client[DB_NAME][COLLECTION_NAME]
embedder = SentenceTransformer(EMBED_MODEL_NAME)

print("Docs count:", col.count_documents({}))
print("One doc keys:", list(col.find_one({}, {"_id": 0}).keys()))

query = "What are my rights in case of workplace harassment?"
q_vec = embedder.encode([query], normalize_embeddings=True)[0].tolist()

pipe = [
    {
        "$vectorSearch": {
            "index": "kb_vector_index",
            "path": "embedding",
            "queryVector": q_vec,
            "numCandidates": 100,
            "limit": 3
        }
    },
    {"$project": {"_id": 0, "intent": 1, "question": 1, "answer": 1, "score": {"$meta": "vectorSearchScore"}}}
]
print("Query:", query)
for h in col.aggregate(pipe):
    print(h)