File size: 1,804 Bytes
1813edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
Embedding Manager - Singleton for BGE-M3 embeddings
Handles text → vector conversion for RAG queries and persistence
"""

from sentence_transformers import SentenceTransformer
from typing import List
import numpy as np
from backend.config import settings


class EmbeddingManager:
    """Singleton for managing embeddings with BGE-M3"""
    
    _instance = None
    _model = None
    
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance._initialized = False
        return cls._instance
    
    def __init__(self):
        if not self._initialized:
            self.model = SentenceTransformer(settings.embedding_model)
            self._initialized = True
    
    def embed(self, text: str) -> List[float]:
        """
        Convert single text to embedding vector
        
        Args:
            text: Input text to embed
            
        Returns:
            List of floats (1024-dimensional for BGE-M3)
        """
        embedding = self.model.encode(text, convert_to_tensor=False)
        return embedding.tolist()
    
    def embed_batch(self, texts: List[str]) -> List[List[float]]:
        """
        Convert multiple texts to embeddings (efficient batch processing)
        
        Args:
            texts: List of texts to embed
            
        Returns:
            List of embedding vectors
        """
        embeddings = self.model.encode(
            texts,
            batch_size=settings.embedding_batch_size,
            convert_to_tensor=False
        )
        return embeddings.tolist()
    
    def get_dimension(self) -> int:
        """Get embedding dimension"""
        return settings.vector_dimension


# Global singleton instance
embedding_manager = EmbeddingManager()