migrate to HF Inference Providers router (no disk, no cache)
Browse files
rag.py
CHANGED
|
@@ -103,26 +103,21 @@ def get_texts() -> List[str]:
|
|
| 103 |
print(f"⚠ Dataset fetch failed: {e} – using empty corpus")
|
| 104 |
return []
|
| 105 |
|
|
|
|
|
|
|
| 106 |
# ------------------------------------------------------------------
|
| 107 |
@lru_cache(maxsize=1)
|
| 108 |
def get_vectorstore() -> FAISS:
|
| 109 |
texts = get_texts()
|
| 110 |
|
| 111 |
-
# --- FINAL:
|
| 112 |
-
from
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
device="cpu")
|
| 120 |
-
def embed_documents(self, texts):
|
| 121 |
-
return [self.pipe(t)[0][0] for t in texts]
|
| 122 |
-
def embed_query(self, text):
|
| 123 |
-
return self.embed_documents([text])[0]
|
| 124 |
-
|
| 125 |
-
embeddings = OptimumMiniLM()
|
| 126 |
# ------------------------------------------------------------------------
|
| 127 |
|
| 128 |
if not texts: # no data → empty FAISS
|
|
@@ -131,6 +126,7 @@ def get_vectorstore() -> FAISS:
|
|
| 131 |
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
|
| 132 |
docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
|
| 133 |
return FAISS.from_documents(docs, embeddings)
|
|
|
|
| 134 |
# ------------------------------------------------------------------# LLM
|
| 135 |
# ------------------------------------------------------------------
|
| 136 |
@lru_cache(maxsize=1)
|
|
|
|
| 103 |
print(f"⚠ Dataset fetch failed: {e} – using empty corpus")
|
| 104 |
return []
|
| 105 |
|
| 106 |
+
# ------------------------------------------------------------------
|
| 107 |
+
# ------------------------------------------------------------------
|
| 108 |
# ------------------------------------------------------------------
|
| 109 |
@lru_cache(maxsize=1)
|
| 110 |
def get_vectorstore() -> FAISS:
|
| 111 |
texts = get_texts()
|
| 112 |
|
| 113 |
+
# --- FINAL: use HF Inference Providers router (no disk) ---------------
|
| 114 |
+
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
| 115 |
+
embeddings = HuggingFaceEndpointEmbeddings(
|
| 116 |
+
model="sentence-transformers/all-MiniLM-L6-v2",
|
| 117 |
+
task="feature-extraction",
|
| 118 |
+
huggingfacehub_api_token=HF_TOKEN,
|
| 119 |
+
api_url="https://router.huggingface.co/hf-inference" # new serverless
|
| 120 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
# ------------------------------------------------------------------------
|
| 122 |
|
| 123 |
if not texts: # no data → empty FAISS
|
|
|
|
| 126 |
splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
|
| 127 |
docs = splitter.create_documents(texts, metadatas=[{"source": DATASET}] * len(texts))
|
| 128 |
return FAISS.from_documents(docs, embeddings)
|
| 129 |
+
|
| 130 |
# ------------------------------------------------------------------# LLM
|
| 131 |
# ------------------------------------------------------------------
|
| 132 |
@lru_cache(maxsize=1)
|