Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Query Engine - Busca semântica usando FAISS | |
| SUBSTITUA por sua implementação real | |
| """ | |
| import faiss | |
| import json | |
| import numpy as np | |
| from pathlib import Path | |
| from sentence_transformers import SentenceTransformer | |
| class QueryEngine: | |
| def __init__( | |
| self, | |
| faiss_index_path="/home/user/app/faiss_index", | |
| jsonl_path="/tmp/work/all_filtered.jsonl", | |
| model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2" | |
| ): | |
| print(f"🔧 Carregando Query Engine...") | |
| # Carrega modelo de embeddings | |
| self.model = SentenceTransformer(model_name) | |
| print(f" ✅ Modelo carregado: {model_name}") | |
| # Carrega índice FAISS | |
| index_file = Path(faiss_index_path) / "index.faiss" | |
| if not index_file.exists(): | |
| raise FileNotFoundError(f"Índice não encontrado: {index_file}") | |
| self.index = faiss.read_index(str(index_file)) | |
| print(f" ✅ FAISS index carregado: {self.index.ntotal} vetores") | |
| # Carrega metadados | |
| self.metadata = [] | |
| jsonl_file = Path(jsonl_path) | |
| if jsonl_file.exists(): | |
| with open(jsonl_file, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| if line.strip(): | |
| self.metadata.append(json.loads(line)) | |
| print(f" ✅ {len(self.metadata)} documentos carregados") | |
| def search_by_embedding(self, query: str, top_k: int = 10): | |
| """Busca por similaridade de embedding""" | |
| # Gera embedding da query | |
| query_embedding = self.model.encode([query])[0] | |
| query_embedding = np.array([query_embedding], dtype=np.float32) | |
| # Busca no FAISS | |
| distances, indices = self.index.search(query_embedding, top_k) | |
| # Prepara resultados | |
| results = [] | |
| for i, (dist, idx) in enumerate(zip(distances[0], indices[0])): | |
| if idx < len(self.metadata): | |
| result = self.metadata[idx].copy() | |
| result['score'] = float(1 / (1 + dist)) # Converte distância em score | |
| result['rank'] = i + 1 | |
| results.append(result) | |
| return results | |