force cache into writable /code/.cache
Browse files
rag.py
CHANGED
|
@@ -117,17 +117,28 @@ def get_texts() -> List[str]:
|
|
| 117 |
@lru_cache(maxsize=1)
|
| 118 |
def get_vectorstore() -> FAISS:
|
| 119 |
texts = get_texts()
|
| 120 |
-
if not texts: #
|
|
|
|
|
|
|
|
|
|
| 121 |
embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
| 122 |
-
return FAISS.from_texts([""], embeddings) # dummy
|
| 123 |
|
| 124 |
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
|
| 125 |
docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
| 127 |
return FAISS.from_documents(docs, embeddings)
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
# ------------------------------------------------------------------
|
| 132 |
@lru_cache(maxsize=1)
|
| 133 |
def get_llm():
|
|
|
|
| 117 |
@lru_cache(maxsize=1)
|
| 118 |
def get_vectorstore() -> FAISS:
|
| 119 |
texts = get_texts()
|
| 120 |
+
if not texts: # no data → empty FAISS
|
| 121 |
+
import os
|
| 122 |
+
os.environ["HF_HOME"] = "/code/.cache"
|
| 123 |
+
os.environ["TRANSFORMERS_CACHE"] = "/code/.cache"
|
| 124 |
embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
| 125 |
+
return FAISS.from_texts([""], embeddings) # dummy
|
| 126 |
|
| 127 |
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
|
| 128 |
docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
|
| 129 |
+
|
| 130 |
+
import os
|
| 131 |
+
os.environ["HF_HOME"] = "/code/.cache"
|
| 132 |
+
os.environ["TRANSFORMERS_CACHE"] = "/code/.cache"
|
| 133 |
embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
|
| 134 |
return FAISS.from_documents(docs, embeddings)
|
| 135 |
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# ------------------------------------------------------------------# LLM
|
| 142 |
# ------------------------------------------------------------------
|
| 143 |
@lru_cache(maxsize=1)
|
| 144 |
def get_llm():
|