use optimum ONNX MiniLM (no disk, no internet)
Browse files
rag.py
CHANGED
|
@@ -108,22 +108,21 @@ def get_texts() -> List[str]:
|
|
| 108 |
def get_vectorstore() -> FAISS:
|
| 109 |
texts = get_texts()
|
| 110 |
|
| 111 |
-
# --- FINAL:
|
| 112 |
-
import
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
embeddings = SentenceTransformerEmbeddings(model=model)
|
| 127 |
# ------------------------------------------------------------------------
|
| 128 |
|
| 129 |
if not texts: # no data → empty FAISS
|
|
|
|
| 108 |
def get_vectorstore() -> FAISS:
|
| 109 |
texts = get_texts()
|
| 110 |
|
| 111 |
+
# --- FINAL: optimum ONNX MiniLM (already on disk) ---------------------
|
| 112 |
+
from optimum.pipelines import pipeline
|
| 113 |
+
from langchain.embeddings.base import Embeddings
|
| 114 |
+
|
| 115 |
+
class OptimumMiniLM(Embeddings):
|
| 116 |
+
def __init__(self):
|
| 117 |
+
self.pipe = pipeline("feature-extraction",
|
| 118 |
+
model="optimum/all-MiniLM-L6-v2",
|
| 119 |
+
device="cpu")
|
| 120 |
+
def embed_documents(self, texts):
|
| 121 |
+
return [self.pipe(t)[0][0] for t in texts]
|
| 122 |
+
def embed_query(self, text):
|
| 123 |
+
return self.embed_documents([text])[0]
|
| 124 |
+
|
| 125 |
+
embeddings = OptimumMiniLM()
|
|
|
|
| 126 |
# ------------------------------------------------------------------------
|
| 127 |
|
| 128 |
if not texts: # no data → empty FAISS
|