File size: 2,716 Bytes
83d664c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Query Engine para beeRoot
Gerencia buscas vetoriais e por keyword
"""

import os
from typing import List, Dict, Optional
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import logging

logger = logging.getLogger(__name__)

class QueryEngine:
    def __init__(self, faiss_path: str = "/home/user/app/faiss_index"):
        self.faiss_path = faiss_path
        self.vectorstore = None
        self.embeddings = None
        self._load()

    def _load(self):
        """Carrega FAISS e embeddings"""
        try:
            if not os.path.exists(self.faiss_path):
                logger.warning(f"FAISS path não existe: {self.faiss_path}")
                return

            # Embeddings
            self.embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-MiniLM-L6-v2",
                model_kwargs={'device': 'cpu'}
            )

            # FAISS
            self.vectorstore = FAISS.load_local(
                self.faiss_path,
                self.embeddings,
                allow_dangerous_deserialization=True
            )

            logger.info(f"✅ FAISS carregado: {self.faiss_path}")

        except Exception as e:
            logger.error(f"❌ Erro ao carregar FAISS: {e}")
            self.vectorstore = None

    def search_by_embedding(self, query: str, top_k: int = 10) -> List[Dict]:
        """Busca vetorial"""
        if not self.vectorstore:
            raise RuntimeError("FAISS não carregado")

        # Busca
        docs_with_scores = self.vectorstore.similarity_search_with_score(
            query, k=top_k
        )

        # Formata resultados
        results = []
        for doc, score in docs_with_scores:
            results.append({
                "id": doc.metadata.get("id", "unknown"),
                "ementa": doc.page_content,
                "score": float(score),
                "metadata": doc.metadata
            })

        return results

    def search_by_keyword(self, query: str, top_k: int = 10) -> List[Dict]:
        """Busca por keywords (fallback)"""
        if not self.vectorstore:
            raise RuntimeError("FAISS não carregado")

        # Por simplicidade, usa embedding também
        # Em produção, implementar busca textual real
        return self.search_by_embedding(query, top_k)

    def get_status(self) -> Dict:
        """Status do engine"""
        return {
            "faiss_loaded": self.vectorstore is not None,
            "faiss_path": self.faiss_path,
            "num_documents": len(self.vectorstore.docstore._dict) if self.vectorstore else 0
        }