Shubham170793 commited on
Commit
83f6641
Β·
verified Β·
1 Parent(s): 0c81fa1

Update src/embeddings.py

Browse files
Files changed (1) hide show
  1. src/embeddings.py +30 -18
src/embeddings.py CHANGED
@@ -1,28 +1,40 @@
1
  import os
2
- import shutil
3
- from sentence_transformers import SentenceTransformer
4
-
5
- print("βœ… embeddings.py loaded from:", __file__)
6
 
7
- # Always use a writable cache directory
 
 
8
  CACHE_DIR = "/tmp/huggingface"
9
- MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
10
- MODEL_PATH = os.path.join(CACHE_DIR, MODEL_NAME)
11
 
12
  os.environ["HF_HOME"] = CACHE_DIR
13
  os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
14
  os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
15
 
16
- # If model not already cached β†’ download once into /tmp
17
- if not os.path.exists(MODEL_PATH):
18
- print(f"⬇️ Downloading model {MODEL_NAME} to {MODEL_PATH}")
19
- _model = SentenceTransformer(MODEL_NAME, cache_folder=CACHE_DIR)
20
- # Force save a copy into MODEL_PATH
21
- _model.save(MODEL_PATH)
22
- else:
23
- print(f"βœ… Loading model from local path {MODEL_PATH}")
24
- _model = SentenceTransformer(MODEL_PATH)
 
 
 
 
 
25
 
 
 
 
26
  def generate_embeddings(chunks: list) -> list:
27
- embeddings = _model.encode(chunks, convert_to_numpy=True)
28
- return embeddings.tolist()
 
 
 
 
 
 
 
 
1
  import os
 
 
 
 
2
 
3
+ # ----------------------------
4
+ # Force Hugging Face to use /tmp for cache
5
+ # ----------------------------
6
  CACHE_DIR = "/tmp/huggingface"
7
+ os.makedirs(CACHE_DIR, exist_ok=True)
 
8
 
9
  os.environ["HF_HOME"] = CACHE_DIR
10
  os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
11
  os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
12
 
13
+ # ----------------------------
14
+ # Imports AFTER env vars
15
+ # ----------------------------
16
+ from sentence_transformers import SentenceTransformer
17
+
18
+ print("βœ… embeddings.py loaded from:", __file__)
19
+
20
+ # ----------------------------
21
+ # Load embedding model once
22
+ # ----------------------------
23
+ _model = SentenceTransformer(
24
+ "sentence-transformers/all-MiniLM-L6-v2",
25
+ cache_folder=CACHE_DIR
26
+ )
27
 
28
+ # ----------------------------
29
+ # Function: generate embeddings
30
+ # ----------------------------
31
  def generate_embeddings(chunks: list) -> list:
32
+ """
33
+ πŸ“Œ Generate embeddings for a list of text chunks.
34
+ Args:
35
+ chunks (list): List of text chunks.
36
+ Returns:
37
+ list: List of embedding vectors (plain Python lists).
38
+ """
39
+ embeddings = _model.encode(chunks, convert_to_numpy=True) # numpy array
40
+ return embeddings.tolist() # convert to lists for FAISS / JSON