import os from typing import List, Dict, Any from pymongo import MongoClient from sentence_transformers import SentenceTransformer import numpy as np class MongoKB: def __init__(self, uri: str, db: str, kb_collection: str, embed_model_name: str): self.client = MongoClient(uri, tls=True, tlsAllowInvalidCertificates=True) self.db = self.client[db] self.col = self.db[kb_collection] self.embedder = SentenceTransformer(embed_model_name) self.dim = self.embedder.get_sentence_embedding_dimension() def embed(self, texts: List[str]) -> np.ndarray: return self.embedder.encode(texts, normalize_embeddings=True) def search(self, query: str, k: int = 3) -> List[Dict[str, Any]]: q_vec = self.embed([query])[0].tolist() # MongoDB Atlas Vector Search $vectorSearch (MongoDB 7.0+ / Atlas) results = self.db.command({ "aggregate": self.col.name, "pipeline": [{ "$vectorSearch": { "index": "kb_vector_index", # name you create in Atlas "path": "embedding", "queryVector": q_vec, "numCandidates": 100, "limit": k } }, {"$project": {"_id": 0, "question": 1, "answer": 1, "intent": 1, "score": {"$meta": "vectorSearchScore"}}}], "cursor": {} }) return list(results["cursor"]["firstBatch"]) def insert_many(self, docs: List[Dict[str, Any]]): self.col.insert_many(docs) def ensure_indexes(self): # normal fallback indexes self.col.create_index("intent") self.col.create_index([("question", "text"), ("answer", "text")])