|
|
|
|
|
import yaml |
|
|
from pathlib import Path |
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
from langchain_community.vectorstores import FAISS |
|
|
|
|
|
class QueryEngine: |
|
|
def __init__(self): |
|
|
with open('config.yaml') as f: |
|
|
cfg = yaml.safe_load(f) |
|
|
|
|
|
|
|
|
self.embeddings = HuggingFaceEmbeddings( |
|
|
model_name=cfg.get('embedding_model', 'sentence-transformers/all-MiniLM-L6-v2'), |
|
|
model_kwargs={'device': 'cpu'} |
|
|
) |
|
|
|
|
|
|
|
|
faiss_path = '/home/user/app/faiss_index' |
|
|
|
|
|
|
|
|
if not Path(faiss_path).exists(): |
|
|
raise FileNotFoundError(f"FAISS index não encontrado em {faiss_path}") |
|
|
|
|
|
|
|
|
self.vectorstore = FAISS.load_local( |
|
|
faiss_path, |
|
|
self.embeddings, |
|
|
allow_dangerous_deserialization=True |
|
|
) |
|
|
|
|
|
def search_by_embedding(self, query: str, top_k: int = 10): |
|
|
results = self.vectorstore.similarity_search_with_score(query, k=top_k) |
|
|
|
|
|
return { |
|
|
'query': query, |
|
|
'total': len(results), |
|
|
'results': [ |
|
|
{'id': doc.metadata.get('id'), 'ementa': doc.page_content, 'score': float(score)} |
|
|
for doc, score in results |
|
|
] |
|
|
} |
|
|
|