File size: 1,227 Bytes
83f6641 4953cc0 83f6641 4953cc0 aa8b614 83f6641 6e9d1d0 4953cc0 6e9d1d0 d5c19a2 8ee2155 83f6641 4953cc0 83f6641 6e9d1d0 83f6641 6e9d1d0 83f6641 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# ----------------------------
# Hugging Face cache bootstrap
# ----------------------------
import os
CACHE_DIR = "/tmp/hf_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
os.environ["HF_HOME"] = CACHE_DIR
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
os.environ["HF_MODULES_CACHE"] = CACHE_DIR
print(f"β
Using Hugging Face cache at {CACHE_DIR}")
# ----------------------------
# Imports AFTER cache bootstrap
# ----------------------------
from sentence_transformers import SentenceTransformer
# ----------------------------
# Load embedding model once
# ----------------------------
_model = SentenceTransformer(
"sentence-transformers/all-MiniLM-L6-v2",
cache_folder=CACHE_DIR
)
# ----------------------------
# Function: generate embeddings
# ----------------------------
def generate_embeddings(chunks: list) -> list:
"""
π Generate embeddings for a list of text chunks.
Args:
chunks (list): List of text chunks.
Returns:
list: List of embedding vectors (plain Python lists).
"""
embeddings = _model.encode(chunks, convert_to_numpy=True) # numpy array
return embeddings.tolist() # convert to lists for FAISS / JSON
|