Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Query Engine para beeRoot | |
| Gerencia buscas vetoriais e por keyword | |
| """ | |
| import os | |
| from typing import List, Dict, Optional | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class QueryEngine: | |
| def __init__(self, faiss_path: str = "/home/user/app/faiss_index"): | |
| self.faiss_path = faiss_path | |
| self.vectorstore = None | |
| self.embeddings = None | |
| self._load() | |
| def _load(self): | |
| """Carrega FAISS e embeddings""" | |
| try: | |
| if not os.path.exists(self.faiss_path): | |
| logger.warning(f"FAISS path não existe: {self.faiss_path}") | |
| return | |
| # Embeddings | |
| self.embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'} | |
| ) | |
| # FAISS | |
| self.vectorstore = FAISS.load_local( | |
| self.faiss_path, | |
| self.embeddings, | |
| allow_dangerous_deserialization=True | |
| ) | |
| logger.info(f"✅ FAISS carregado: {self.faiss_path}") | |
| except Exception as e: | |
| logger.error(f"❌ Erro ao carregar FAISS: {e}") | |
| self.vectorstore = None | |
| def search_by_embedding(self, query: str, top_k: int = 10) -> List[Dict]: | |
| """Busca vetorial""" | |
| if not self.vectorstore: | |
| raise RuntimeError("FAISS não carregado") | |
| # Busca | |
| docs_with_scores = self.vectorstore.similarity_search_with_score( | |
| query, k=top_k | |
| ) | |
| # Formata resultados | |
| results = [] | |
| for doc, score in docs_with_scores: | |
| results.append({ | |
| "id": doc.metadata.get("id", "unknown"), | |
| "ementa": doc.page_content, | |
| "score": float(score), | |
| "metadata": doc.metadata | |
| }) | |
| return results | |
| def search_by_keyword(self, query: str, top_k: int = 10) -> List[Dict]: | |
| """Busca por keywords (fallback)""" | |
| if not self.vectorstore: | |
| raise RuntimeError("FAISS não carregado") | |
| # Por simplicidade, usa embedding também | |
| # Em produção, implementar busca textual real | |
| return self.search_by_embedding(query, top_k) | |
| def get_status(self) -> Dict: | |
| """Status do engine""" | |
| return { | |
| "faiss_loaded": self.vectorstore is not None, | |
| "faiss_path": self.faiss_path, | |
| "num_documents": len(self.vectorstore.docstore._dict) if self.vectorstore else 0 | |
| } | |