Spaces:

NimrodDev
/

RAG_SPACE

Build error

App Files Files Community

NimrodDev commited on Nov 2, 2025

Commit

932be48

1 Parent(s): 622b23f

migrate to HF Inference Providers router (no disk, no cache)

Browse files

Files changed (1) hide show

rag.py +11 -15

rag.py CHANGED Viewed

@@ -103,26 +103,21 @@ def get_texts() -> List[str]:
         print(f"⚠ Dataset fetch failed: {e} – using empty corpus")
         return []
 # ------------------------------------------------------------------
 @lru_cache(maxsize=1)
 def get_vectorstore() -> FAISS:
     texts = get_texts()
-    # --- FINAL: optimum ONNX MiniLM (already on disk) ---------------------
-    from optimum.pipelines import pipeline
-    from langchain.embeddings.base import Embeddings
-    class OptimumMiniLM(Embeddings):
-        def __init__(self):
-            self.pipe = pipeline("feature-extraction",
-                                 model="optimum/all-MiniLM-L6-v2",
-                                 device="cpu")
-        def embed_documents(self, texts):
-            return [self.pipe(t)[0][0] for t in texts]
-        def embed_query(self, text):
-            return self.embed_documents([text])[0]
-    embeddings = OptimumMiniLM()
     # ------------------------------------------------------------------------
     if not texts:                       # no data → empty FAISS
@@ -131,6 +126,7 @@ def get_vectorstore() -> FAISS:
     splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
     docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
     return FAISS.from_documents(docs, embeddings)
 # ------------------------------------------------------------------# LLM
 # ------------------------------------------------------------------
 @lru_cache(maxsize=1)

         print(f"⚠ Dataset fetch failed: {e} – using empty corpus")
         return []
+# ------------------------------------------------------------------
+# ------------------------------------------------------------------
 # ------------------------------------------------------------------
 @lru_cache(maxsize=1)
 def get_vectorstore() -> FAISS:
     texts = get_texts()
+    # --- FINAL: use HF Inference Providers router (no disk) ---------------
+    from langchain_huggingface import HuggingFaceEndpointEmbeddings
+    embeddings = HuggingFaceEndpointEmbeddings(
+        model="sentence-transformers/all-MiniLM-L6-v2",
+        task="feature-extraction",
+        huggingfacehub_api_token=HF_TOKEN,
+        api_url="https://router.huggingface.co/hf-inference"  # new serverless
+    )
     # ------------------------------------------------------------------------
     if not texts:                       # no data → empty FAISS
     splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
     docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
     return FAISS.from_documents(docs, embeddings)
 # ------------------------------------------------------------------# LLM
 # ------------------------------------------------------------------
 @lru_cache(maxsize=1)