Spaces:

NimrodDev
/

RAG_SPACE

Build error

NimrodDev commited on Nov 2, 2025

Commit

f688f90

1 Parent(s): ea6472f

load local MiniLM folder (no optimum, no internet)

Files changed (1) hide show

rag.py CHANGED Viewed

@@ -103,27 +103,23 @@ def get_texts() -> List[str]:
         print(f"⚠ Dataset fetch failed: {e} – using empty corpus")
         return []
 # ------------------------------------------------------------------
 @lru_cache(maxsize=1)
 def get_vectorstore() -> FAISS:
     texts = get_texts()
-    # --- FINAL: use optimum ONNX MiniLM (already on disk) -----------------
     import os
-    from optimum.pipelines import pipeline
-    from langchain.embeddings.base import Embeddings
-    class OptimumMiniLM(Embeddings):
-        def __init__(self):
-            self.pipe = pipeline("feature-extraction",
-                                 model="optimum/all-MiniLM-L6-v2",
-                                 device="cpu")
-        def embed_documents(self, texts):
-            return [self.pipe(t)[0][0] for t in texts]
-        def embed_query(self, text):
-            return self.embed_documents([text])[0]
-    embeddings = OptimumMiniLM()
     # ------------------------------------------------------------------------
     if not texts:                       # no data → empty FAISS

         print(f"⚠ Dataset fetch failed: {e} – using empty corpus")
         return []
+# ------------------------------------------------------------------
 # ------------------------------------------------------------------
 @lru_cache(maxsize=1)
 def get_vectorstore() -> FAISS:
     texts = get_texts()
+    # --- FINAL: load local MiniLM (no internet, no cache) -----------------
     import os
+    local_model_path = os.path.abspath(
+        os.path.join(os.path.dirname(__file__), "st_model")
+    )
+    from sentence_transformers import SentenceTransformer
+    model = SentenceTransformer(local_model_path, device="cpu", cache_folder=None)
+    from langchain.embeddings import SentenceTransformerEmbeddings
+    embeddings = SentenceTransformerEmbeddings(model=model)
     # ------------------------------------------------------------------------
     if not texts:                       # no data → empty FAISS