File size: 753 Bytes
e820a8a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | # embedding_generator.py
import numpy as np
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from utils import setup_logger
from config import Config
logger = setup_logger('embedding_generator')
def generate_embeddings(documents):
model = SentenceTransformer(Config.EMBEDDING_MODEL)
embeddings = []
for i in tqdm(range(0, len(documents), Config.BATCH_SIZE), desc="Generating embeddings"):
batch = documents['content'][i:i+Config.BATCH_SIZE].tolist()
try:
batch_embeddings = model.encode(batch, show_progress_bar=False)
embeddings.extend(batch_embeddings)
except Exception as e:
logger.error(f"Error encoding batch: {e}")
return np.array(embeddings) |