Spaces:

ICA-PUC
/

beta-NORM

Sleeping

File size: 22,512 Bytes

6f54a86

import os
import sys
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)
import json
import requests
import streamlit as st
from utils import base_utils as bu
import re 

CONFIG = bu.load_config("configs/config.json")
API_URL = CONFIG.get("ui", {}).get("api_url", "http://127.0.0.1:8000/query")


def chamar_api(pergunta: str, mode: str, top_k: int, temperatura: float | None = None):
    """Chama a API e retorna resposta e fragmentos recuperados."""
    payload = {"question": pergunta, "top_k": top_k, "mode": mode}
    if temperatura is not None:
        payload["temperature"] = temperatura
    resp = requests.post(API_URL, json=payload, timeout=60)
    resp.raise_for_status()
    data = resp.json()
    return data["answer"], data.get("retrieved", [])


def formatar_referencias(fragmentos):
    """Formata referências numeradas coerentes com citation_id ([1], [2], ...)."""

    # Construir mapa (citation_id -> título)
    refs_por_id = {}
    for m in fragmentos:
        cit_id = m.get("citation_id")
        if cit_id is None:
            continue
        titulo = m.get("document_title")
        if titulo:
            titulo = re.sub(r"\[\d+\]", "", titulo).strip()
        else:
            titulo = "Documento"
        titulo_norm = titulo.replace("_", " ").replace("-", " ")
        refs_por_id[cit_id] = titulo_norm

    partes = []
    for cit_id in sorted(refs_por_id.keys()):
        partes.append(f"[{cit_id}] {refs_por_id[cit_id]}")

    return " | ".join(partes)


def listar_documentos_unicos(fragmentos):
    """Lista documentos únicos recuperados."""
    docs = set()
    for m in fragmentos:
        docs.add(m['document_id'])
    return sorted(list(docs))


def obter_lista_documentos():
    """Obtém a lista de documentos indexados no sistema."""
    try:
        # Você precisará ajustar esta URL conforme sua API
        list_url = API_URL.replace("/query", "/list_documents")
        resp = requests.get(list_url, timeout=30)
        resp.raise_for_status()
        return resp.json().get("documents", [])
    except Exception as e:
        st.error(f"Erro ao obter lista de documentos: {e}")
        return None


# Configuração da página
st.set_page_config(
    page_title="Chatbot NORM - Sistema de Consulta",
    page_icon="🤖",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# CSS customizado para melhorar a aparência
st.markdown("""
    <style>
    /* Estilo geral */
    .main {
        padding: 2rem;
    }
    
    /* Título principal */
    .title-container {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        padding: 2rem;
        border-radius: 15px;
        margin-bottom: 2rem;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    }
    
    .title-text {
        color: white;
        font-size: 2.5rem;
        font-weight: bold;
        margin: 0;
        text-align: center;
    }
    
    .subtitle-text {
        color: rgba(255, 255, 255, 0.9);
        font-size: 1.1rem;
        margin-top: 0.5rem;
        text-align: center;
    }
    
    /* Cards de conteúdo */
    .content-card {
        background: white;
        padding: 1.5rem;
        border-radius: 10px;
        border: 1px solid #e0e0e0;
        margin-bottom: 1rem;
        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
    }
    
    /* Botões personalizados */
    .stButton > button {
        width: 100%;
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
        border: none;
        padding: 0.75rem 1.5rem;
        border-radius: 8px;
        font-weight: bold;
        transition: all 0.3s ease;
    }
    
    .stButton > button:hover {
        transform: translateY(-2px);
        box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
    }
    
    /* Tabs personalizadas */
    .stTabs [data-baseweb="tab-list"] {
        gap: 2rem;
        background-color: #f8f9fa;
        padding: 1rem;
        border-radius: 10px;
    }
    
    .stTabs [data-baseweb="tab"] {
        padding: 1rem 2rem;
        background-color: white;
        border-radius: 8px;
        font-weight: 600;
    }
    
    .stTabs [aria-selected="true"] {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        color: white;
    }
    
    /* Expander personalizado */
    .streamlit-expanderHeader {
        background-color: #f8f9fa;
        border-radius: 8px;
        font-weight: 600;
    }
    
    /* Cards de referência */
    .reference-card {
        background: #f8f9fa;
        padding: 1rem;
        border-left: 4px solid #667eea;
        border-radius: 5px;
        margin: 0.5rem 0;
    }
    
    /* Badges */
    .badge {
        display: inline-block;
        padding: 0.25rem 0.75rem;
        border-radius: 12px;
        font-size: 0.85rem;
        font-weight: 600;
        margin: 0.25rem;
    }
    
    .badge-primary {
        background-color: #667eea;
        color: white;
    }
    
    .badge-success {
        background-color: #4ade80;
        color: white;
    }
    
    /* Animação de loading */
    @keyframes pulse {
        0%, 100% { opacity: 1; }
        50% { opacity: 0.5; }
    }
    
    .loading {
        animation: pulse 1.5s ease-in-out infinite;
    }
    </style>
""", unsafe_allow_html=True)

# Cabeçalho
st.markdown("""
    <div class="title-container">
        <h1 class="title-text">🤖 Chatbot NORM - Sistema Inteligente de Consulta</h1>
        <p class="subtitle-text">Sistema Inteligente de Consulta e Resumo de Documentos</p>
    </div>
""", unsafe_allow_html=True)

# Tabs principais
aba_resumos, aba_chat = st.tabs(["📄 Resumos de Documentos", "💬 Chatbot Interativo"])

# ========================
# ABA DE RESUMOS
# ========================
with aba_resumos:
    
    col1, col2 = st.columns([2, 1])
    
    with col1:
        st.markdown("### 📋 Gerador de Resumos")
        st.markdown("""
            <div class="content-card">
                <p style='color: #666; margin-bottom: 1rem;'>
                    Esta ferramenta gera resumos inteligentes a partir dos documentos indexados.
                    O sistema utiliza RAG (Retrieval-Augmented Generation) para buscar os trechos
                    mais relevantes e construir uma resposta contextualizada.
                </p>
            </div>
        """, unsafe_allow_html=True)

        # Estado para a pergunta de resumo (pode ser preenchido ao clicar em um documento)
        if 'pergunta_doc' not in st.session_state:
            st.session_state.pergunta_doc = "Faça um resumo claro sobre o tema principal dos documentos."

        pergunta_doc = st.text_area(
            "📝 Digite sua pergunta ou solicite um resumo",
            value=st.session_state.pergunta_doc,
            height=100,
            help="Descreva o tipo de resumo que você precisa ou faça uma pergunta específica"
        )
        # Atualiza o estado caso o usuário edite manualmente
        st.session_state.pergunta_doc = pergunta_doc
    
    with col2:
        st.markdown("### ⚙️ Configurações")
        top_k_resumo = st.slider(
            "Número de trechos para análise",
            min_value=3,
            max_value=15,
            value=8,
            help="Mais trechos = resumo mais completo, mas pode levar mais tempo"
        )

        temperatura_resumo = st.slider(
            "Temperatura do modelo",
            min_value=0.0,
            max_value=1.0,
            value=0.5,
            step=0.05,
            help="Valores baixos (0.0–0.3) deixam as respostas mais determinísticas; valores altos (0.7–1.0) geram respostas mais criativas e variadas."
        )
        
        st.markdown(f"""
            <div style='background: #f0f7ff; padding: 1rem; border-radius: 8px; margin-top: 1rem;'>
                <p style='margin: 0; color: #1e40af; font-size: 0.9rem;'>
                    <strong>ℹ️ Dica (trechos):</strong> Use valores menores (3–5) para resumos mais diretos
                    e valores maiores (10–15) para análises mais abrangentes.
                </p>
                <p style='margin: 0.5rem 0 0 0; color: #1e40af; font-size: 0.9rem;'>
                    <strong>🔥 Dica (temperatura):</strong> Para respostas mais consistentes, mantenha a temperatura entre 0.0 e 0.3.
                    Se quiser explorar diferentes formulações ou respostas mais criativas, aumente para 0.7–1.0.
                </p>
            </div>
        """, unsafe_allow_html=True)
    
    st.markdown("<br>", unsafe_allow_html=True)
    
    # Inicializar estado para toggle de documentos y paginação
    if 'mostrar_documentos' not in st.session_state:
        st.session_state.mostrar_documentos = False
    if 'docs_page' not in st.session_state:
        st.session_state.docs_page = 0
    if 'docs_page_size' not in st.session_state:
        st.session_state.docs_page_size = 20
    
    col_btn1, col_btn2, col_btn3 = st.columns([1, 1, 1])
    with col_btn1:
        gerar_resumo = st.button("🚀 Gerar Resumo", use_container_width=True)
    with col_btn3:
        if st.button("📚 Listar Documentos", use_container_width=True):
            st.session_state.mostrar_documentos = not st.session_state.mostrar_documentos
    
    # Mostrar/ocultar lista según el estado
    if st.session_state.mostrar_documentos:
        with st.spinner("🔍 Buscando documentos..."):
            documentos = obter_lista_documentos()
            if documentos:
                total_docs = len(documentos)
                page_size = st.session_state.docs_page_size
                total_pages = max((total_docs - 1) // page_size + 1, 1)

                # Corrigir página atual se sair do intervalo
                if st.session_state.docs_page >= total_pages:
                    st.session_state.docs_page = total_pages - 1
                if st.session_state.docs_page < 0:
                    st.session_state.docs_page = 0

                current_page = st.session_state.docs_page
                start_idx = current_page * page_size
                end_idx = min(start_idx + page_size, total_docs)
                page_docs = documentos[start_idx:end_idx]

                st.success(f"✅ Total de documentos indexados: **{total_docs}**")
                st.markdown(
                    f"Mostrando documentos {start_idx + 1}–{end_idx} de {total_docs} "
                )
                st.markdown("---")

                # Mostrar documentos em formato de grid; clique em cada um preenche o texto de resumo
                cols_per_row = 2
                for i in range(0, len(page_docs), cols_per_row):
                    cols = st.columns(cols_per_row)
                    for j, col in enumerate(cols):
                        idx_local = i + j
                        if idx_local < len(page_docs):
                            with col:
                                global_idx = start_idx + idx_local
                                doc_info = page_docs[idx_local]

                                # Compatibilidade: aceitar tanto string quanto objeto {id, title}
                                if isinstance(doc_info, str):
                                    doc_id = doc_info
                                    doc_title = doc_info
                                else:
                                    doc_id = doc_info.get("id") or ""
                                    doc_title = doc_info.get("title") or doc_id

                                doc_title_norm = doc_title.replace("_", " ").replace("-", " ")

                                if doc_title_norm.isupper():
                                    doc_title_norm = doc_title_norm.title()

                                display_name = (
                                    doc_title_norm if len(doc_title_norm) <= 60 else doc_title_norm[:57] + "..."
                                )

                                if st.button(
                                    f"#{global_idx+1} {display_name}",
                                    key=f"doc_btn_{global_idx}",
                                    use_container_width=True,
                                ):
                                    st.session_state.pergunta_doc = (
                                        f"Faça um resumo claro do documento {doc_title_norm}."
                                    )

                # Controles de paginação
                col_prev, col_page_info, col_next = st.columns([1, 2, 1])
                with col_prev:
                    if st.button("⬅️ Anterior", disabled=current_page == 0):
                        st.session_state.docs_page = max(current_page - 1, 0)
                        st.rerun()
                with col_page_info:
                    st.markdown(
                        f"<div style='text-align:center; color:#555;'>Página <strong>{current_page+1}</strong> de <strong>{total_pages}</strong></div>",
                        unsafe_allow_html=True,
                    )
                with col_next:
                    if st.button(
                        "Próxima ➡️", disabled=current_page >= total_pages - 1
                    ):
                        st.session_state.docs_page = min(current_page + 1, total_pages - 1)
                        st.rerun()
            elif documentos is not None:
                st.info("ℹ️ Nenhum documento encontrado no sistema.")
    
    if gerar_resumo:
        st.session_state.mostrar_documentos = False
        if not pergunta_doc.strip():
            st.warning("⚠️ Por favor, digite uma pergunta ou solicitação de resumo.")
        else:
            with st.spinner("🔍 Analisando documentos e gerando resumo..."):
                try:
                    resposta, fragmentos = chamar_api(pergunta=pergunta_doc, mode="summary", top_k=top_k_resumo, temperatura=temperatura_resumo)
                    
                    # Exibe o resumo
                    st.markdown("### ✨ Resumo Gerado")
                    st.markdown(f"""
                        <div class="content-card">
                            <p style='font-size: 1.05rem; line-height: 1.8; color: #333;'>
                                {resposta}
                            </p>
                        </div>
                    """, unsafe_allow_html=True)

                    # Botão para download do resumo em formato texto
                    st.download_button(
                        label="💾 Baixar resumo em .txt",
                        data=resposta,
                        file_name="resumo_chatbot_norm.txt",
                        mime="text/plain",
                    )
                    
                    # Estatísticas
                    if fragmentos:
                        docs_unicos = listar_documentos_unicos(fragmentos)

                        # Construir mapa document_id -> título legible
                        titulos_por_doc = {}
                        for m in fragmentos:
                            doc_id = m.get("document_id")
                            if not doc_id:
                                continue
                            titulo = m.get("document_title") or doc_id
                            titulos_por_doc[doc_id] = titulo

                        col_stat1, col_stat2, col_stat3 = st.columns(3)
                        with col_stat1:
                            st.metric("📚 Documentos Consultados", len(docs_unicos))
                        with col_stat2:
                            st.metric("📄 Trechos Analisados", len(fragmentos))
                        with col_stat3:
                            st.metric("✅ Status", "Completo")
                        
                        # Documentos utilizados (mostrar títulos em vez de IDs brutos)
                        st.markdown("### 📚 Documentos Consultados")
                        for doc_id in docs_unicos:
                            titulo = titulos_por_doc.get(doc_id, doc_id)
                            st.markdown(
                                f"<span class='badge badge-primary'>📄 {titulo}</span>",
                                unsafe_allow_html=True,
                            )
                        
                        # Referências detalhadas
                        st.markdown("### 🔗 Referências Utilizadas")
                        st.info(f"**Citações:** {formatar_referencias(fragmentos)}")
                        
                        # Trechos detalhados
                    else:
                        st.warning("Nenhum trecho foi recuperado pela busca. Tente reformular sua pergunta.")
                
                except Exception as e:
                    st.error(f"Erro ao gerar resumo: {e}")
                    st.info("Verifique se a API está rodando e acessível.")

# ========================
# ABA DE CHAT
# ========================
with aba_chat:
    col1, col2 = st.columns([2, 1])
    
    with col1:
        st.markdown("### 💬 Chat com o Assistente")
        st.markdown("""
            <div class="content-card">
                <p style='color: #666; margin-bottom: 1rem;'>
                    Faça perguntas sobre os documentos indexados. O chatbot responde
                    utilizando <strong>apenas</strong> o conteúdo da base de dados e fornece
                    referências precisas para cada resposta.
                </p>
            </div>
        """, unsafe_allow_html=True)
        
        pergunta = st.text_input(
            "❓ Digite sua pergunta",
            placeholder="Ex: Quais são os principais conceitos de química orgânica?",
            help="Faça perguntas específicas para obter melhores respostas"
        )
    
    with col2:
        st.markdown("### ⚙️ Configurações")
        top_k_chat = st.slider(
            "Número de trechos para consulta",
            min_value=1,
            max_value=10,
            value=4,
            help="Quantidade de trechos que o chatbot utilizará para responder"
        )

        temperatura_chat = st.slider(
            "Temperatura do modelo",
            min_value=0.0,
            max_value=1.0,
            value=0.5,
            step=0.05,
            help="Valores baixos (0.0–0.3) deixam as respostas mais objetivas; valores altos (0.7–1.0) deixam o chatbot mais criativo e variado."
        )
        
        st.markdown("""
            <div style='background: #f0fdf4; padding: 1rem; border-radius: 8px; margin-top: 1rem;'>
                <p style='margin: 0; color: #15803d; font-size: 0.9rem;'>
                    <strong>✨ Sugestão:</strong> Para perguntas objetivas, use 2-4 trechos.
                    Para questões complexas, aumente para 6-10 trechos.
                </p>
            </div>
        """, unsafe_allow_html=True)
    
    st.markdown("<br>", unsafe_allow_html=True)
    
    col_btn1, col_btn2, col_btn3 = st.columns([1, 2, 1])
    with col_btn2:
        enviar = st.button("📤 Enviar Pergunta", use_container_width=True)
    
    if enviar and pergunta.strip():
        with st.spinner("Processando sua pergunta... 🤔 "):
            try:
                resposta, fragmentos = chamar_api(pergunta=pergunta, mode="chatbot", top_k=top_k_chat, temperatura=temperatura_chat)
                
                # Exibe a resposta
                st.markdown("### 💡 Resposta do Chatbot")
                st.markdown(f"""
                    <div class="content-card">
                        <p style='font-size: 1.05rem; line-height: 1.6; color: #333;'>
                            {resposta}
                        </p>
                    </div>
                """, unsafe_allow_html=True)
                
                # Informações sobre a resposta
                if fragmentos:
                    docs_unicos = listar_documentos_unicos(fragmentos)

                    # Construir mapa document_id -> título legível
                    titulos_por_doc = {}
                    for m in fragmentos:
                        doc_id = m.get("document_id")
                        if not doc_id:
                            continue
                        titulo = m.get("document_title") or doc_id
                        titulos_por_doc[doc_id] = titulo

                    col_stat1, col_stat2 = st.columns(2)
                    with col_stat1:
                        st.metric("📚 Fontes Consultadas", len(docs_unicos))
                    with col_stat2:
                        st.metric("📄 Trechos Utilizados", len(fragmentos))
                    
                    # Documentos fonte (usar títulos humanos quando disponíveis)
                    st.markdown("### 📚 Documentos Fonte")
                    for doc_id in docs_unicos:
                        titulo = titulos_por_doc.get(doc_id, doc_id)
                        st.markdown(
                            f"<span class='badge badge-primary'>📄 {titulo}</span>",
                            unsafe_allow_html=True,
                        )
                    
                    # Referências
                    st.markdown("### 🔗 Referências Citadas")
                    st.success(f"**Citações completas:** {formatar_referencias(fragmentos)}")
                    
                    # (Trechos detalhados das fontes ocultos)
                else:
                    # Apenas quando não há informação suficiente/trechos relevantes
                    st.warning("Nenhum trecho relevante encontrado na base de dados.")
                    st.info("Tente reformular sua pergunta ou usar termos diferentes.💡 ")
            
            except Exception as e:
                st.error(f"Erro ao processar pergunta: {e}")
                st.info("Verifique se a API está rodando corretamente.")
    
    elif enviar and not pergunta.strip():
        st.warning("Por favor, digite uma pergunta antes de enviar.")

# Rodapé
st.markdown("<br><br>", unsafe_allow_html=True)
st.markdown("""
    <div style='text-align: center; color: #888; padding: 2rem; border-top: 1px solid #e0e0e0;'>
        <p style='margin: 0;'>🤖 <strong>Chatbot NORM</strong> - Sistema Inteligente de Consulta</p>
        <p style='margin: 0.5rem 0 0 0; font-size: 0.9rem;'>
            Laboratório de Inteligência Computacional Aplicada ICA da PUC-RIO
        </p>
    </div>
""", unsafe_allow_html=True)