Sai809701
Added model ,dataset and dockerfile
17205ab
import os
from dotenv import load_dotenv
from pymongo import MongoClient
from sentence_transformers import SentenceTransformer
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("MONGO_DB", "legal_chatbot_db")
COLLECTION_NAME = os.getenv("MONGO_COLLECTION", "datasets")
EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
client = MongoClient(MONGO_URI, tls=True, tlsAllowInvalidCertificates=True)
col = client[DB_NAME][COLLECTION_NAME]
embedder = SentenceTransformer(EMBED_MODEL_NAME)
print("Docs count:", col.count_documents({}))
print("One doc keys:", list(col.find_one({}, {"_id": 0}).keys()))
query = "What are my rights in case of workplace harassment?"
q_vec = embedder.encode([query], normalize_embeddings=True)[0].tolist()
pipe = [
{
"$vectorSearch": {
"index": "kb_vector_index",
"path": "embedding",
"queryVector": q_vec,
"numCandidates": 100,
"limit": 3
}
},
{"$project": {"_id": 0, "intent": 1, "question": 1, "answer": 1, "score": {"$meta": "vectorSearchScore"}}}
]
print("Query:", query)
for h in col.aggregate(pipe):
print(h)