File size: 1,734 Bytes
17205ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
from typing import List, Dict, Any
from pymongo import MongoClient
from sentence_transformers import SentenceTransformer
import numpy as np

class MongoKB:
    def __init__(self, uri: str, db: str, kb_collection: str, embed_model_name: str):
        self.client = MongoClient(uri, tls=True, tlsAllowInvalidCertificates=True)
        self.db = self.client[db]
        self.col = self.db[kb_collection]
        self.embedder = SentenceTransformer(embed_model_name)
        self.dim = self.embedder.get_sentence_embedding_dimension()

    def embed(self, texts: List[str]) -> np.ndarray:
        return self.embedder.encode(texts, normalize_embeddings=True)

    def search(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
        q_vec = self.embed([query])[0].tolist()
        # MongoDB Atlas Vector Search $vectorSearch (MongoDB 7.0+ / Atlas)
        results = self.db.command({
            "aggregate": self.col.name,
            "pipeline": [{
                "$vectorSearch": {
                    "index": "kb_vector_index",      # name you create in Atlas
                    "path": "embedding",
                    "queryVector": q_vec,
                    "numCandidates": 100,
                    "limit": k
                }
            }, {"$project": {"_id": 0, "question": 1, "answer": 1, "intent": 1, "score": {"$meta": "vectorSearchScore"}}}],
            "cursor": {}
        })
        return list(results["cursor"]["firstBatch"])

    def insert_many(self, docs: List[Dict[str, Any]]):
        self.col.insert_many(docs)

    def ensure_indexes(self):
        # normal fallback indexes
        self.col.create_index("intent")
        self.col.create_index([("question", "text"), ("answer", "text")])