force local st_model folder (no internet, no cache)
Browse files
rag.py
CHANGED
|
@@ -104,15 +104,18 @@ def get_texts() -> List[str]:
|
|
| 104 |
return []
|
| 105 |
|
| 106 |
## ------------------------------------------------------------------
|
| 107 |
-
# ------# ------------------------------------------------------------------
|
| 108 |
-
# ------------------------------------------------------------------
|
| 109 |
@lru_cache(maxsize=1)
|
| 110 |
def get_vectorstore() -> FAISS:
|
| 111 |
texts = get_texts()
|
| 112 |
|
| 113 |
-
# --- FINAL: load
|
|
|
|
|
|
|
|
|
|
| 114 |
from sentence_transformers import SentenceTransformer
|
| 115 |
-
model = SentenceTransformer(
|
|
|
|
| 116 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
| 117 |
embeddings = SentenceTransformerEmbeddings(model=model)
|
| 118 |
# ------------------------------------------------------------------------
|
|
@@ -123,7 +126,6 @@ def get_vectorstore() -> FAISS:
|
|
| 123 |
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
|
| 124 |
docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
|
| 125 |
return FAISS.from_documents(docs, embeddings)
|
| 126 |
-
|
| 127 |
# ------------------------------------------------------------------# LLM
|
| 128 |
# ------------------------------------------------------------------
|
| 129 |
@lru_cache(maxsize=1)
|
|
|
|
| 104 |
return []
|
| 105 |
|
| 106 |
## ------------------------------------------------------------------
|
| 107 |
+
# ------# --------# ------------------------------------------------------------------
|
|
|
|
| 108 |
@lru_cache(maxsize=1)
|
| 109 |
def get_vectorstore() -> FAISS:
|
| 110 |
texts = get_texts()
|
| 111 |
|
| 112 |
+
# --- FINAL: load model from repo folder (no internet, no cache) --------
|
| 113 |
+
import os
|
| 114 |
+
local_model_path = os.path.join(os.path.dirname(__file__), "st_model")
|
| 115 |
+
|
| 116 |
from sentence_transformers import SentenceTransformer
|
| 117 |
+
model = SentenceTransformer(local_model_path, device="cpu", cache_folder=None)
|
| 118 |
+
|
| 119 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
| 120 |
embeddings = SentenceTransformerEmbeddings(model=model)
|
| 121 |
# ------------------------------------------------------------------------
|
|
|
|
| 126 |
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
|
| 127 |
docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
|
| 128 |
return FAISS.from_documents(docs, embeddings)
|
|
|
|
| 129 |
# ------------------------------------------------------------------# LLM
|
| 130 |
# ------------------------------------------------------------------
|
| 131 |
@lru_cache(maxsize=1)
|