NimrodDev commited on
Commit
bea62da
·
1 Parent(s): 33d913e

force cache into writable /code/.cache

Browse files
Files changed (1) hide show
  1. rag.py +15 -4
rag.py CHANGED
@@ -117,17 +117,28 @@ def get_texts() -> List[str]:
117
  @lru_cache(maxsize=1)
118
  def get_vectorstore() -> FAISS:
119
  texts = get_texts()
120
- if not texts: # no data → return empty FAISS
 
 
 
121
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
122
- return FAISS.from_texts([""], embeddings) # dummy, retriever will be empty
123
 
124
  splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
125
  docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
 
 
 
 
126
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
127
  return FAISS.from_documents(docs, embeddings)
128
 
129
- # ------------------------------------------------------------------
130
- # LLM
 
 
 
 
131
  # ------------------------------------------------------------------
132
  @lru_cache(maxsize=1)
133
  def get_llm():
 
117
  @lru_cache(maxsize=1)
118
  def get_vectorstore() -> FAISS:
119
  texts = get_texts()
120
+ if not texts: # no data → empty FAISS
121
+ import os
122
+ os.environ["HF_HOME"] = "/code/.cache"
123
+ os.environ["TRANSFORMERS_CACHE"] = "/code/.cache"
124
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
125
+ return FAISS.from_texts([""], embeddings) # dummy
126
 
127
  splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
128
  docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
129
+
130
+ import os
131
+ os.environ["HF_HOME"] = "/code/.cache"
132
+ os.environ["TRANSFORMERS_CACHE"] = "/code/.cache"
133
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
134
  return FAISS.from_documents(docs, embeddings)
135
 
136
+
137
+
138
+
139
+
140
+
141
+ # ------------------------------------------------------------------# LLM
142
  # ------------------------------------------------------------------
143
  @lru_cache(maxsize=1)
144
  def get_llm():