beeROOT_instancia_7 / query_engine.py
Carlex22's picture
Upload 12 files
83d664c verified
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Query Engine para beeRoot
Gerencia buscas vetoriais e por keyword
"""
import os
from typing import List, Dict, Optional
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import logging
logger = logging.getLogger(__name__)
class QueryEngine:
def __init__(self, faiss_path: str = "/home/user/app/faiss_index"):
self.faiss_path = faiss_path
self.vectorstore = None
self.embeddings = None
self._load()
def _load(self):
"""Carrega FAISS e embeddings"""
try:
if not os.path.exists(self.faiss_path):
logger.warning(f"FAISS path não existe: {self.faiss_path}")
return
# Embeddings
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'}
)
# FAISS
self.vectorstore = FAISS.load_local(
self.faiss_path,
self.embeddings,
allow_dangerous_deserialization=True
)
logger.info(f"✅ FAISS carregado: {self.faiss_path}")
except Exception as e:
logger.error(f"❌ Erro ao carregar FAISS: {e}")
self.vectorstore = None
def search_by_embedding(self, query: str, top_k: int = 10) -> List[Dict]:
"""Busca vetorial"""
if not self.vectorstore:
raise RuntimeError("FAISS não carregado")
# Busca
docs_with_scores = self.vectorstore.similarity_search_with_score(
query, k=top_k
)
# Formata resultados
results = []
for doc, score in docs_with_scores:
results.append({
"id": doc.metadata.get("id", "unknown"),
"ementa": doc.page_content,
"score": float(score),
"metadata": doc.metadata
})
return results
def search_by_keyword(self, query: str, top_k: int = 10) -> List[Dict]:
"""Busca por keywords (fallback)"""
if not self.vectorstore:
raise RuntimeError("FAISS não carregado")
# Por simplicidade, usa embedding também
# Em produção, implementar busca textual real
return self.search_by_embedding(query, top_k)
def get_status(self) -> Dict:
"""Status do engine"""
return {
"faiss_loaded": self.vectorstore is not None,
"faiss_path": self.faiss_path,
"num_documents": len(self.vectorstore.docstore._dict) if self.vectorstore else 0
}