File size: 753 Bytes
e820a8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# embedding_generator.py
import numpy as np
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from utils import setup_logger
from config import Config

logger = setup_logger('embedding_generator')

def generate_embeddings(documents):
    model = SentenceTransformer(Config.EMBEDDING_MODEL)
    embeddings = []
    for i in tqdm(range(0, len(documents), Config.BATCH_SIZE), desc="Generating embeddings"):
        batch = documents['content'][i:i+Config.BATCH_SIZE].tolist()
        try:
            batch_embeddings = model.encode(batch, show_progress_bar=False)
            embeddings.extend(batch_embeddings)
        except Exception as e:
            logger.error(f"Error encoding batch: {e}")
    return np.array(embeddings)