File size: 1,227 Bytes
83f6641
4953cc0
83f6641
4953cc0
 
aa8b614
83f6641
6e9d1d0
 
 
 
4953cc0
6e9d1d0
d5c19a2
8ee2155
83f6641
4953cc0
83f6641
 
 
 
 
 
 
 
 
 
6e9d1d0
83f6641
 
 
6e9d1d0
83f6641
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# ----------------------------
# Hugging Face cache bootstrap
# ----------------------------
import os

CACHE_DIR = "/tmp/hf_cache"
os.makedirs(CACHE_DIR, exist_ok=True)

os.environ["HF_HOME"] = CACHE_DIR
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
os.environ["HF_MODULES_CACHE"] = CACHE_DIR

print(f"βœ… Using Hugging Face cache at {CACHE_DIR}")

# ----------------------------
# Imports AFTER cache bootstrap
# ----------------------------
from sentence_transformers import SentenceTransformer

# ----------------------------
# Load embedding model once
# ----------------------------
_model = SentenceTransformer(
    "sentence-transformers/all-MiniLM-L6-v2",
    cache_folder=CACHE_DIR
)

# ----------------------------
# Function: generate embeddings
# ----------------------------
def generate_embeddings(chunks: list) -> list:
    """
    πŸ“Œ Generate embeddings for a list of text chunks.
    Args:
        chunks (list): List of text chunks.
    Returns:
        list: List of embedding vectors (plain Python lists).
    """
    embeddings = _model.encode(chunks, convert_to_numpy=True)   # numpy array
    return embeddings.tolist()   # convert to lists for FAISS / JSON