João Lima commited on
Commit
4598ede
·
1 Parent(s): 0db224f

fixing imports

Browse files
Files changed (1) hide show
  1. rag/retriever.py +38 -4
rag/retriever.py CHANGED
@@ -1,8 +1,42 @@
1
- from langchain_huggingface import HuggingFaceEmbeddings
2
- from langchain_community.vectorstores import FAISS
 
3
  from config import EMBEDDING_MODEL
4
 
5
- _embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def build_vectorstore(documents):
8
- return FAISS.from_documents(documents, _embeddings)
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import faiss
3
+ import numpy as np
4
  from config import EMBEDDING_MODEL
5
 
6
+
7
+ class SimpleVectorStore:
8
+ """Vectorstore simples usando FAISS"""
9
+
10
+ def __init__(self, embeddings, documents):
11
+ self.embeddings = embeddings
12
+ self.documents = documents
13
+ self.index = None
14
+ self._build_index()
15
+
16
+ def _build_index(self):
17
+ """Constrói índice FAISS"""
18
+ texts = [doc.page_content for doc in self.documents]
19
+ vectors = self.embeddings.encode(texts)
20
+
21
+ dimension = vectors.shape[1]
22
+ self.index = faiss.IndexFlatL2(dimension)
23
+ self.index.add(np.array(vectors).astype('float32'))
24
+
25
+ def similarity_search(self, query, k=3):
26
+ """Busca os k documentos mais similares"""
27
+ query_vector = self.embeddings.encode([query])
28
+ distances, indices = self.index.search(
29
+ np.array(query_vector).astype('float32'),
30
+ k
31
+ )
32
+
33
+ return [self.documents[i] for i in indices[0]]
34
+
35
+
36
+ # Carrega o modelo de embeddings uma vez
37
+ _embeddings_model = SentenceTransformer(EMBEDDING_MODEL)
38
+
39
 
40
  def build_vectorstore(documents):
41
+ """Constrói vectorstore a partir de documentos"""
42
+ return SimpleVectorStore(_embeddings_model, documents)