#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Query Engine para beeRoot Gerencia buscas vetoriais e por keyword """ import os from typing import List, Dict, Optional from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings import logging logger = logging.getLogger(__name__) class QueryEngine: def __init__(self, faiss_path: str = "/home/user/app/faiss_index"): self.faiss_path = faiss_path self.vectorstore = None self.embeddings = None self._load() def _load(self): """Carrega FAISS e embeddings""" try: if not os.path.exists(self.faiss_path): logger.warning(f"FAISS path não existe: {self.faiss_path}") return # Embeddings self.embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'} ) # FAISS self.vectorstore = FAISS.load_local( self.faiss_path, self.embeddings, allow_dangerous_deserialization=True ) logger.info(f"✅ FAISS carregado: {self.faiss_path}") except Exception as e: logger.error(f"❌ Erro ao carregar FAISS: {e}") self.vectorstore = None def search_by_embedding(self, query: str, top_k: int = 10) -> List[Dict]: """Busca vetorial""" if not self.vectorstore: raise RuntimeError("FAISS não carregado") # Busca docs_with_scores = self.vectorstore.similarity_search_with_score( query, k=top_k ) # Formata resultados results = [] for doc, score in docs_with_scores: results.append({ "id": doc.metadata.get("id", "unknown"), "ementa": doc.page_content, "score": float(score), "metadata": doc.metadata }) return results def search_by_keyword(self, query: str, top_k: int = 10) -> List[Dict]: """Busca por keywords (fallback)""" if not self.vectorstore: raise RuntimeError("FAISS não carregado") # Por simplicidade, usa embedding também # Em produção, implementar busca textual real return self.search_by_embedding(query, top_k) def get_status(self) -> Dict: """Status do engine""" return { "faiss_loaded": self.vectorstore is not None, "faiss_path": self.faiss_path, "num_documents": len(self.vectorstore.docstore._dict) if self.vectorstore else 0 }