# ---------------------------- # Hugging Face cache bootstrap # ---------------------------- import os CACHE_DIR = "/home/user/huggingface" os.makedirs(CACHE_DIR, exist_ok=True) os.environ["HF_HOME"] = CACHE_DIR os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR os.environ["HF_DATASETS_CACHE"] = CACHE_DIR os.environ["HF_MODULES_CACHE"] = CACHE_DIR # ---------------------------- # Imports AFTER cache bootstrap # ---------------------------- from sentence_transformers import SentenceTransformer print("✅ embeddings.py loaded from:", __file__) # ---------------------------- # Load embedding model once # ---------------------------- _model = SentenceTransformer( "sentence-transformers/all-MiniLM-L6-v2", cache_folder=CACHE_DIR ) # ---------------------------- # Function: generate embeddings # ---------------------------- def generate_embeddings(chunks: list) -> list: """ 📌 Generate embeddings for a list of text chunks. Args: chunks (list): List of text chunks. Returns: list: List of embedding vectors (plain Python lists). """ embeddings = _model.encode(chunks, convert_to_numpy=True) # numpy array return embeddings.tolist() # convert to lists for FAISS / JSON