rag_template / src /embeddings.py
Guilherme Favaron
Major update: Add hybrid search, reranking, multiple LLMs, and UI improvements
1b447de
"""
Gerenciamento de modelos de embeddings com cache
"""
from typing import List, Optional
import numpy as np
from sentence_transformers import SentenceTransformer
from .config import EMBEDDING_MODEL_ID
from .cache import EmbeddingCache
class EmbeddingManager:
"""Gerenciador de embeddings com cache"""
def __init__(self, model_id: str = EMBEDDING_MODEL_ID, use_cache: bool = True):
self.model_id = model_id
self.model: Optional[SentenceTransformer] = None
self.use_cache = use_cache
self.cache = EmbeddingCache(max_size=1000, ttl_seconds=3600) if use_cache else None
def load_model(self) -> SentenceTransformer:
"""Carrega modelo de embeddings (lazy loading)"""
if self.model is None:
self.model = SentenceTransformer(self.model_id)
return self.model
def encode(
self,
texts: List[str],
normalize: bool = True,
show_progress: bool = False
) -> np.ndarray:
"""
Gera embeddings para lista de textos com cache
Args:
texts: Lista de textos para embedar
normalize: Se True, normaliza embeddings (recomendado para cosine)
show_progress: Se True, mostra barra de progresso
Returns:
Array numpy com embeddings
"""
if not self.use_cache or self.cache is None:
# Sem cache, processa direto
model = self.load_model()
embeddings = model.encode(
texts,
normalize_embeddings=normalize,
show_progress_bar=show_progress
)
return embeddings
# Com cache, verifica cada texto
embeddings_list = []
texts_to_encode = []
indices_to_encode = []
for i, text in enumerate(texts):
cached_embedding = self.cache.get(text, self.model_id)
if cached_embedding is not None:
embeddings_list.append(cached_embedding)
else:
embeddings_list.append(None)
texts_to_encode.append(text)
indices_to_encode.append(i)
# Processa textos não cacheados
if texts_to_encode:
model = self.load_model()
new_embeddings = model.encode(
texts_to_encode,
normalize_embeddings=normalize,
show_progress_bar=show_progress
)
# Armazena no cache e insere na lista
for idx, embedding in zip(indices_to_encode, new_embeddings):
self.cache.set(texts[idx], self.model_id, embedding)
embeddings_list[idx] = embedding
return np.array(embeddings_list)
def encode_single(self, text: str, normalize: bool = True) -> List[float]:
"""
Gera embedding para um único texto
Args:
text: Texto para embedar
normalize: Se True, normaliza embedding
Returns:
Lista de floats representando o embedding
"""
embeddings = self.encode([text], normalize=normalize)
return embeddings[0].astype(np.float32).tolist()
def get_dimension(self) -> int:
"""Retorna dimensão do embedding"""
model = self.load_model()
return model.get_sentence_embedding_dimension()
def get_cache_stats(self) -> dict:
"""
Retorna estatísticas do cache
Returns:
Dicionário com métricas do cache
"""
if not self.use_cache or self.cache is None:
return {"cache_enabled": False}
stats = self.cache.get_stats()
stats["cache_enabled"] = True
return stats
def clear_cache(self) -> None:
"""Limpa o cache de embeddings"""
if self.cache is not None:
self.cache.clear()