Shubham170793 commited on
Commit
6e9d1d0
·
verified ·
1 Parent(s): 70d0b6e

Create embeddings.py

Browse files
Files changed (1) hide show
  1. src/embeddings.py +28 -0
src/embeddings.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+ print("✅ embeddings.py loaded from:", __file__)
6
+
7
+ # Always use a writable cache directory
8
+ CACHE_DIR = "/tmp/huggingface"
9
+ MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
10
+ MODEL_PATH = os.path.join(CACHE_DIR, MODEL_NAME)
11
+
12
+ os.environ["HF_HOME"] = CACHE_DIR
13
+ os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
14
+ os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
15
+
16
+ # If model not already cached → download once into /tmp
17
+ if not os.path.exists(MODEL_PATH):
18
+ print(f"⬇️ Downloading model {MODEL_NAME} to {MODEL_PATH}")
19
+ _model = SentenceTransformer(MODEL_NAME, cache_folder=CACHE_DIR)
20
+ # Force save a copy into MODEL_PATH
21
+ _model.save(MODEL_PATH)
22
+ else:
23
+ print(f"✅ Loading model from local path {MODEL_PATH}")
24
+ _model = SentenceTransformer(MODEL_PATH)
25
+
26
+ def generate_embeddings(chunks: list) -> list:
27
+ embeddings = _model.encode(chunks, convert_to_numpy=True)
28
+ return embeddings.tolist()