Spaces:
Sleeping
Sleeping
João Lima
commited on
Commit
·
4598ede
1
Parent(s):
0db224f
fixing imports
Browse files- rag/retriever.py +38 -4
rag/retriever.py
CHANGED
|
@@ -1,8 +1,42 @@
|
|
| 1 |
-
from
|
| 2 |
-
|
|
|
|
| 3 |
from config import EMBEDDING_MODEL
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def build_vectorstore(documents):
|
| 8 |
-
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
import faiss
|
| 3 |
+
import numpy as np
|
| 4 |
from config import EMBEDDING_MODEL
|
| 5 |
|
| 6 |
+
|
| 7 |
+
class SimpleVectorStore:
|
| 8 |
+
"""Vectorstore simples usando FAISS"""
|
| 9 |
+
|
| 10 |
+
def __init__(self, embeddings, documents):
|
| 11 |
+
self.embeddings = embeddings
|
| 12 |
+
self.documents = documents
|
| 13 |
+
self.index = None
|
| 14 |
+
self._build_index()
|
| 15 |
+
|
| 16 |
+
def _build_index(self):
|
| 17 |
+
"""Constrói índice FAISS"""
|
| 18 |
+
texts = [doc.page_content for doc in self.documents]
|
| 19 |
+
vectors = self.embeddings.encode(texts)
|
| 20 |
+
|
| 21 |
+
dimension = vectors.shape[1]
|
| 22 |
+
self.index = faiss.IndexFlatL2(dimension)
|
| 23 |
+
self.index.add(np.array(vectors).astype('float32'))
|
| 24 |
+
|
| 25 |
+
def similarity_search(self, query, k=3):
|
| 26 |
+
"""Busca os k documentos mais similares"""
|
| 27 |
+
query_vector = self.embeddings.encode([query])
|
| 28 |
+
distances, indices = self.index.search(
|
| 29 |
+
np.array(query_vector).astype('float32'),
|
| 30 |
+
k
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
return [self.documents[i] for i in indices[0]]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# Carrega o modelo de embeddings uma vez
|
| 37 |
+
_embeddings_model = SentenceTransformer(EMBEDDING_MODEL)
|
| 38 |
+
|
| 39 |
|
| 40 |
def build_vectorstore(documents):
|
| 41 |
+
"""Constrói vectorstore a partir de documentos"""
|
| 42 |
+
return SimpleVectorStore(_embeddings_model, documents)
|