File size: 3,089 Bytes
76d540d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | """
AI Research Paper Helper - Model Manager
Lightweight version: Only handles local embeddings.
LLM inference goes through API (Groq/OpenRouter).
"""
import asyncio
import logging
from typing import Optional
from sentence_transformers import SentenceTransformer
from config import settings
logger = logging.getLogger(__name__)
class ModelManager:
"""Singleton manager for ML models with lazy loading and caching.
Only loads the lightweight embedding model locally.
All LLM inference is delegated to external APIs.
"""
_instance: Optional['ModelManager'] = None
_lock = asyncio.Lock()
def __init__(self):
self.device = "cpu" # Embeddings run fine on CPU
logger.info(f"ModelManager initialized (embeddings only, device: {self.device})")
# Only embedding model (lightweight, ~80MB)
self._embedding_model: Optional[SentenceTransformer] = None
@classmethod
def get_instance(cls) -> 'ModelManager':
"""Get singleton instance."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
@property
def embedding_model(self) -> Optional[SentenceTransformer]:
return self._embedding_model
async def load_embedding_model(self) -> SentenceTransformer:
"""Load the embedding model asynchronously."""
if self._embedding_model is not None:
return self._embedding_model
async with self._lock:
if self._embedding_model is not None:
return self._embedding_model
logger.info(f"Loading embedding model: {settings.embedding_model}")
# Run in thread pool to not block event loop
loop = asyncio.get_event_loop()
self._embedding_model = await loop.run_in_executor(
None,
lambda: SentenceTransformer(
settings.embedding_model,
cache_folder=str(settings.model_cache_dir),
device=self.device
)
)
logger.info("Embedding model loaded successfully")
return self._embedding_model
async def get_embeddings(self, texts: list[str]) -> list[list[float]]:
"""Generate embeddings for a list of texts."""
model = await self.load_embedding_model()
loop = asyncio.get_event_loop()
embeddings = await loop.run_in_executor(
None,
lambda: model.encode(texts, convert_to_tensor=False, show_progress_bar=False)
)
return embeddings.tolist()
async def cleanup(self):
"""Clean up model resources."""
logger.info("Cleaning up model resources...")
if self._embedding_model is not None:
del self._embedding_model
self._embedding_model = None
logger.info("Model cleanup complete")
|