File size: 10,437 Bytes
0a4529c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# DEPENDENCIES
import os
import gc
import torch
from typing import Optional
from config.settings import get_settings
from config.logging_config import get_logger
from utils.error_handler import handle_errors
from utils.error_handler import EmbeddingError
from sentence_transformers import SentenceTransformer


# Setup Settings and Logging
settings = get_settings()
logger   = get_logger(__name__)


class EmbeddingModelLoader:
    """
    Manages loading and caching of embedding models: Supports multiple models with efficient resource management
    """
    def __init__(self):
        self.logger        = logger
        self._loaded_model = None
        self._model_name   = None
        self._device       = None
        
        # Model cache for multiple models
        self._model_cache  = dict()
    

    @handle_errors(error_type = EmbeddingError, log_error = True, reraise = True)
    def load_model(self, model_name: Optional[str] = None, device: Optional[str] = None, force_reload: bool = False) -> SentenceTransformer:
        """
        Load embedding model with caching and device optimization
        
        Arguments:
        ----------
            model_name   { str }    : Name of model to load (default from settings)
            
            device       { str }    : Device to load on ('cpu', 'cuda', 'mps', 'auto')
            
            force_reload { bool }   : Force reload even if model is cached
        
        Returns:
        --------
            { SentenceTransformer } : Loaded model instance
        
        Raises:
        -------
            EmbeddingError          : If model loading fails
        """
        model_name = model_name or settings.EMBEDDING_MODEL
        device     = self._resolve_device(device)
        
        # Check cache first
        cache_key  = f"{model_name}_{device}"
        
        if ((not force_reload) and (cache_key in self._model_cache)):
            self.logger.debug(f"Using cached model: {cache_key}")
            
            self._loaded_model = self._model_cache[cache_key]
            self._model_name   = model_name
            self._device       = device
            
            return self._loaded_model
        
        try:
            self.logger.info(f"Loading embedding model: {model_name} on device: {device}")
            
            # Load model with optimized settings
            model                        = SentenceTransformer(model_name,
                                                               device       = device,
                                                               cache_folder = os.path.expanduser("~/.cache/sentence_transformers"),
                                                              )
            
            # Model-specific optimizations
            model                        = self._optimize_model(model  = model, 
                                                                device = device,
                                                               )
            
            # Cache the model
            self._model_cache[cache_key] = model
            self._loaded_model           = model
            self._model_name             = model_name
            self._device                 = device
            
            # Log model info
            self._log_model_info(model  = model, 
                                 device = device,
                                )
            
            self.logger.info(f"Successfully loaded model: {model_name}")
            
            return model
            
        except Exception as e:
            self.logger.error(f"Failed to load model {model_name}: {repr(e)}")
            raise EmbeddingError(f"Model loading failed: {repr(e)}")
    

    def _resolve_device(self, device: Optional[str] = None) -> str:
        """
        Resolve the best available device
        
        Arguments:
        ----------
            device { str } : Requested device
        
        Returns:
        --------
               { str }     : Actual device to use
        """
        if (device and (device != "auto")):
            return device
        
        # Auto device selection
        if (settings.EMBEDDING_DEVICE != "auto"):
            return settings.EMBEDDING_DEVICE
        
        # Automatic detection
        if torch.cuda.is_available():
            return "cuda"
        
        elif torch.backends.mps.is_available():
            return "mps"
        
        else:
            return "cpu"
    

    def _optimize_model(self, model: SentenceTransformer, device: str) -> SentenceTransformer:
        """
        Apply optimizations to the model
        
        Arguments:
        ----------
            model  { SentenceTransformer } : Model to optimize

            device { str }                 : Device model is on
        
        Returns:
        --------
            { SentenceTransformer }        : Optimized model
        """
        # Enable eval mode for inference
        model.eval()
        
        # GPU optimizations
        if (device == "cuda"):
            # Use half precision for GPU if supported
            try:
                model = model.half()
                self.logger.debug("Enabled half precision for GPU")
            
            except Exception as e:
                self.logger.warning(f"Could not enable half precision: {repr(e)}")
        
        # Disable gradient computation
        for param in model.parameters():
            param.requires_grad = False
        
        return model
    

    def _log_model_info(self, model: SentenceTransformer, device: str):
        """
        Log detailed model information
        
        Arguments:
        ----------
            model  { SentenceTransformer } : Model to log info for

            device { str }                 : Device model is on
        """
        try:
            # Get model architecture info
            if hasattr(model, '_modules'):
                modules = list(model._modules.keys())
            
            else:
                modules = ["unknown"]
            
            # Get embedding dimension
            if hasattr(model, 'get_sentence_embedding_dimension'):
                dimension = model.get_sentence_embedding_dimension()
            
            else:
                dimension = "unknown"
            
            # Count parameters
            total_params = sum(p.numel() for p in model.parameters())
            
            self.logger.info(f"Model Info: {len(modules)} modules, dimension={dimension}, parameters={total_params:,}, device={device}")
                           
        except Exception as e:
            self.logger.debug(f"Could not get detailed model info: {repr(e)}")
    

    def get_loaded_model(self) -> Optional[SentenceTransformer]:
        """
        Get currently loaded model
        
        Returns:
        --------
            { SentenceTransformer } : Currently loaded model or None
        """
        return self._loaded_model
    

    def get_model_info(self) -> dict:
        """
        Get information about loaded model
        
        Returns:
        --------
            { dict }    : Model information dictionary
        """
        if self._loaded_model is None:
            return {"loaded": False}
        
        info = {"loaded"       : True,
                "model_name"   : self._model_name,
                "device"       : self._device,
                "cache_size"   : len(self._model_cache),
               }
        
        try:
            if hasattr(self._loaded_model, 'get_sentence_embedding_dimension'):
                info["embedding_dimension"] = self._loaded_model.get_sentence_embedding_dimension()
            
            info["model_class"] = type(self._loaded_model).__name__
            
        except Exception as e:
            self.logger.warning(f"Could not get detailed model info: {e}")
        
        return info
    

    def clear_cache(self, model_name: Optional[str] = None):
        """
        Clear model cache
        
        Arguments:
        ----------
            model_name { str } : Specific model to clear (None = all)
        """
        if model_name:
            # Clear specific model from all devices
            keys_to_remove = [k for k in self._model_cache.keys() if k.startswith(model_name)]
            
            for key in keys_to_remove:
                del self._model_cache[key]
            
            self.logger.info(f"Cleared cache for model: {model_name}")
        
        else:
            # Clear all cache
            cache_size = len(self._model_cache)
            self._model_cache.clear()
            
            self.logger.info(f"Cleared all model cache ({cache_size} models)")
    

    def unload_model(self):
        """
        Unload current model and free memory
        """
        if self._loaded_model:
            model_name = self._model_name
            
            # Clear from cache
            if self._model_name and self._device:
                cache_key = f"{self._model_name}_{self._device}"
                self._model_cache.pop(cache_key, None)
            
            # Clear references
            self._loaded_model = None
            self._model_name   = None
            self._device       = None
            
            # Force garbage collection            
            gc.collect()
            
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            
            self.logger.info(f"Unloaded model: {model_name}")


# Global model loader instance
_model_loader = None


def get_model_loader() -> EmbeddingModelLoader:
    """
    Get global model loader instance (singleton)
    
    Returns:
    --------
        { EmbeddingModelLoader } : Model loader instance
    """
    global _model_loader

    if _model_loader is None:
        _model_loader = EmbeddingModelLoader()
    
    return _model_loader


def load_embedding_model(model_name: Optional[str] = None, device: Optional[str] = None) -> SentenceTransformer:
    """
    Convenience function to load embedding model
    
    Arguments:
    ----------
        model_name { str } : Model name
        
        device     { str } : Device
    
    Returns:
    --------
        { SentenceTransformer } : Loaded model
    """
    loader = get_model_loader()

    return loader.load_model(model_name, device)