NimrodDev commited on
Commit
932be48
·
1 Parent(s): 622b23f

migrate to HF Inference Providers router (no disk, no cache)

Browse files
Files changed (1) hide show
  1. rag.py +11 -15
rag.py CHANGED
@@ -103,26 +103,21 @@ def get_texts() -> List[str]:
103
  print(f"⚠ Dataset fetch failed: {e} – using empty corpus")
104
  return []
105
 
 
 
106
  # ------------------------------------------------------------------
107
  @lru_cache(maxsize=1)
108
  def get_vectorstore() -> FAISS:
109
  texts = get_texts()
110
 
111
- # --- FINAL: optimum ONNX MiniLM (already on disk) ---------------------
112
- from optimum.pipelines import pipeline
113
- from langchain.embeddings.base import Embeddings
114
-
115
- class OptimumMiniLM(Embeddings):
116
- def __init__(self):
117
- self.pipe = pipeline("feature-extraction",
118
- model="optimum/all-MiniLM-L6-v2",
119
- device="cpu")
120
- def embed_documents(self, texts):
121
- return [self.pipe(t)[0][0] for t in texts]
122
- def embed_query(self, text):
123
- return self.embed_documents([text])[0]
124
-
125
- embeddings = OptimumMiniLM()
126
  # ------------------------------------------------------------------------
127
 
128
  if not texts: # no data → empty FAISS
@@ -131,6 +126,7 @@ def get_vectorstore() -> FAISS:
131
  splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
132
  docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
133
  return FAISS.from_documents(docs, embeddings)
 
134
  # ------------------------------------------------------------------# LLM
135
  # ------------------------------------------------------------------
136
  @lru_cache(maxsize=1)
 
103
  print(f"⚠ Dataset fetch failed: {e} – using empty corpus")
104
  return []
105
 
106
+ # ------------------------------------------------------------------
107
+ # ------------------------------------------------------------------
108
  # ------------------------------------------------------------------
109
  @lru_cache(maxsize=1)
110
  def get_vectorstore() -> FAISS:
111
  texts = get_texts()
112
 
113
+ # --- FINAL: use HF Inference Providers router (no disk) ---------------
114
+ from langchain_huggingface import HuggingFaceEndpointEmbeddings
115
+ embeddings = HuggingFaceEndpointEmbeddings(
116
+ model="sentence-transformers/all-MiniLM-L6-v2",
117
+ task="feature-extraction",
118
+ huggingfacehub_api_token=HF_TOKEN,
119
+ api_url="https://router.huggingface.co/hf-inference" # new serverless
120
+ )
 
 
 
 
 
 
 
121
  # ------------------------------------------------------------------------
122
 
123
  if not texts: # no data → empty FAISS
 
126
  splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
127
  docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
128
  return FAISS.from_documents(docs, embeddings)
129
+
130
  # ------------------------------------------------------------------# LLM
131
  # ------------------------------------------------------------------
132
  @lru_cache(maxsize=1)