#!/usr/bin/env python3 """ Query Engine - Busca semântica usando FAISS SUBSTITUA por sua implementação real """ import faiss import json import numpy as np from pathlib import Path from sentence_transformers import SentenceTransformer class QueryEngine: def __init__( self, faiss_index_path="/home/user/app/faiss_index", jsonl_path="/tmp/work/all_filtered.jsonl", model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2" ): print(f"🔧 Carregando Query Engine...") # Carrega modelo de embeddings self.model = SentenceTransformer(model_name) print(f" ✅ Modelo carregado: {model_name}") # Carrega índice FAISS index_file = Path(faiss_index_path) / "index.faiss" if not index_file.exists(): raise FileNotFoundError(f"Índice não encontrado: {index_file}") self.index = faiss.read_index(str(index_file)) print(f" ✅ FAISS index carregado: {self.index.ntotal} vetores") # Carrega metadados self.metadata = [] jsonl_file = Path(jsonl_path) if jsonl_file.exists(): with open(jsonl_file, 'r', encoding='utf-8') as f: for line in f: if line.strip(): self.metadata.append(json.loads(line)) print(f" ✅ {len(self.metadata)} documentos carregados") def search_by_embedding(self, query: str, top_k: int = 10): """Busca por similaridade de embedding""" # Gera embedding da query query_embedding = self.model.encode([query])[0] query_embedding = np.array([query_embedding], dtype=np.float32) # Busca no FAISS distances, indices = self.index.search(query_embedding, top_k) # Prepara resultados results = [] for i, (dist, idx) in enumerate(zip(distances[0], indices[0])): if idx < len(self.metadata): result = self.metadata[idx].copy() result['score'] = float(1 / (1 + dist)) # Converte distância em score result['rank'] = i + 1 results.append(result) return results