Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from pymongo import MongoClient | |
| from sentence_transformers import SentenceTransformer | |
| load_dotenv() | |
| MONGO_URI = os.getenv("MONGO_URI") | |
| DB_NAME = os.getenv("MONGO_DB", "legal_chatbot_db") | |
| COLLECTION_NAME = os.getenv("MONGO_COLLECTION", "datasets") | |
| EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") | |
| client = MongoClient(MONGO_URI, tls=True, tlsAllowInvalidCertificates=True) | |
| col = client[DB_NAME][COLLECTION_NAME] | |
| embedder = SentenceTransformer(EMBED_MODEL_NAME) | |
| print("Docs count:", col.count_documents({})) | |
| print("One doc keys:", list(col.find_one({}, {"_id": 0}).keys())) | |
| query = "What are my rights in case of workplace harassment?" | |
| q_vec = embedder.encode([query], normalize_embeddings=True)[0].tolist() | |
| pipe = [ | |
| { | |
| "$vectorSearch": { | |
| "index": "kb_vector_index", | |
| "path": "embedding", | |
| "queryVector": q_vec, | |
| "numCandidates": 100, | |
| "limit": 3 | |
| } | |
| }, | |
| {"$project": {"_id": 0, "intent": 1, "question": 1, "answer": 1, "score": {"$meta": "vectorSearchScore"}}} | |
| ] | |
| print("Query:", query) | |
| for h in col.aggregate(pipe): | |
| print(h) | |