Spaces:

ICA-PUC
/

beta-NORM

Sleeping

beta-NORM / app /app_front.py

GitHub Actions

Snapshot from GitHub master for HF Space

6f54a86 23 days ago

22.5 kB

	import os
	import sys
	ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	if ROOT not in sys.path:
	sys.path.insert(0, ROOT)
	import json
	import requests
	import streamlit as st
	from utils import base_utils as bu
	import re

	CONFIG = bu.load_config("configs/config.json")
	API_URL = CONFIG.get("ui", {}).get("api_url", "http://127.0.0.1:8000/query")


	def chamar_api(pergunta: str, mode: str, top_k: int, temperatura: float \| None = None):
	"""Chama a API e retorna resposta e fragmentos recuperados."""
	payload = {"question": pergunta, "top_k": top_k, "mode": mode}
	if temperatura is not None:
	payload["temperature"] = temperatura
	resp = requests.post(API_URL, json=payload, timeout=60)
	resp.raise_for_status()
	data = resp.json()
	return data["answer"], data.get("retrieved", [])


	def formatar_referencias(fragmentos):
	"""Formata referências numeradas coerentes com citation_id ([1], [2], ...)."""

	# Construir mapa (citation_id -> título)
	refs_por_id = {}
	for m in fragmentos:
	cit_id = m.get("citation_id")
	if cit_id is None:
	continue
	titulo = m.get("document_title")
	if titulo:
	titulo = re.sub(r"\[\d+\]", "", titulo).strip()
	else:
	titulo = "Documento"
	titulo_norm = titulo.replace("_", " ").replace("-", " ")
	refs_por_id[cit_id] = titulo_norm

	partes = []
	for cit_id in sorted(refs_por_id.keys()):
	partes.append(f"[{cit_id}] {refs_por_id[cit_id]}")

	return " \| ".join(partes)


	def listar_documentos_unicos(fragmentos):
	"""Lista documentos únicos recuperados."""
	docs = set()
	for m in fragmentos:
	docs.add(m['document_id'])
	return sorted(list(docs))


	def obter_lista_documentos():
	"""Obtém a lista de documentos indexados no sistema."""
	try:
	# Você precisará ajustar esta URL conforme sua API
	list_url = API_URL.replace("/query", "/list_documents")
	resp = requests.get(list_url, timeout=30)
	resp.raise_for_status()
	return resp.json().get("documents", [])
	except Exception as e:
	st.error(f"Erro ao obter lista de documentos: {e}")
	return None


	# Configuração da página
	st.set_page_config(
	page_title="Chatbot NORM - Sistema de Consulta",
	page_icon="🤖",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# CSS customizado para melhorar a aparência
	st.markdown("""
	<style>
	/* Estilo geral */
	.main {
	padding: 2rem;
	}

	/* Título principal */
	.title-container {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 2rem;
	border-radius: 15px;
	margin-bottom: 2rem;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	}

	.title-text {
	color: white;
	font-size: 2.5rem;
	font-weight: bold;
	margin: 0;
	text-align: center;
	}

	.subtitle-text {
	color: rgba(255, 255, 255, 0.9);
	font-size: 1.1rem;
	margin-top: 0.5rem;
	text-align: center;
	}

	/* Cards de conteúdo */
	.content-card {
	background: white;
	padding: 1.5rem;
	border-radius: 10px;
	border: 1px solid #e0e0e0;
	margin-bottom: 1rem;
	box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
	}

	/* Botões personalizados */
	.stButton > button {
	width: 100%;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	border: none;
	padding: 0.75rem 1.5rem;
	border-radius: 8px;
	font-weight: bold;
	transition: all 0.3s ease;
	}

	.stButton > button:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
	}

	/* Tabs personalizadas */
	.stTabs [data-baseweb="tab-list"] {
	gap: 2rem;
	background-color: #f8f9fa;
	padding: 1rem;
	border-radius: 10px;
	}

	.stTabs [data-baseweb="tab"] {
	padding: 1rem 2rem;
	background-color: white;
	border-radius: 8px;
	font-weight: 600;
	}

	.stTabs [aria-selected="true"] {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	}

	/* Expander personalizado */
	.streamlit-expanderHeader {
	background-color: #f8f9fa;
	border-radius: 8px;
	font-weight: 600;
	}

	/* Cards de referência */
	.reference-card {
	background: #f8f9fa;
	padding: 1rem;
	border-left: 4px solid #667eea;
	border-radius: 5px;
	margin: 0.5rem 0;
	}

	/* Badges */
	.badge {
	display: inline-block;
	padding: 0.25rem 0.75rem;
	border-radius: 12px;
	font-size: 0.85rem;
	font-weight: 600;
	margin: 0.25rem;
	}

	.badge-primary {
	background-color: #667eea;
	color: white;
	}

	.badge-success {
	background-color: #4ade80;
	color: white;
	}

	/* Animação de loading */
	@keyframes pulse {
	0%, 100% { opacity: 1; }
	50% { opacity: 0.5; }
	}

	.loading {
	animation: pulse 1.5s ease-in-out infinite;
	}
	</style>
	""", unsafe_allow_html=True)

	# Cabeçalho
	st.markdown("""
	<div class="title-container">
	<h1 class="title-text">🤖 Chatbot NORM - Sistema Inteligente de Consulta</h1>
	<p class="subtitle-text">Sistema Inteligente de Consulta e Resumo de Documentos</p>
	</div>
	""", unsafe_allow_html=True)

	# Tabs principais
	aba_resumos, aba_chat = st.tabs(["📄 Resumos de Documentos", "💬 Chatbot Interativo"])

	# ========================
	# ABA DE RESUMOS
	# ========================
	with aba_resumos:

	col1, col2 = st.columns([2, 1])

	with col1:
	st.markdown("### 📋 Gerador de Resumos")
	st.markdown("""
	<div class="content-card">
	<p style='color: #666; margin-bottom: 1rem;'>
	Esta ferramenta gera resumos inteligentes a partir dos documentos indexados.
	O sistema utiliza RAG (Retrieval-Augmented Generation) para buscar os trechos
	mais relevantes e construir uma resposta contextualizada.
	</p>
	</div>
	""", unsafe_allow_html=True)

	# Estado para a pergunta de resumo (pode ser preenchido ao clicar em um documento)
	if 'pergunta_doc' not in st.session_state:
	st.session_state.pergunta_doc = "Faça um resumo claro sobre o tema principal dos documentos."

	pergunta_doc = st.text_area(
	"📝 Digite sua pergunta ou solicite um resumo",
	value=st.session_state.pergunta_doc,
	height=100,
	help="Descreva o tipo de resumo que você precisa ou faça uma pergunta específica"
	)
	# Atualiza o estado caso o usuário edite manualmente
	st.session_state.pergunta_doc = pergunta_doc

	with col2:
	st.markdown("### ⚙️ Configurações")
	top_k_resumo = st.slider(
	"Número de trechos para análise",
	min_value=3,
	max_value=15,
	value=8,
	help="Mais trechos = resumo mais completo, mas pode levar mais tempo"
	)

	temperatura_resumo = st.slider(
	"Temperatura do modelo",
	min_value=0.0,
	max_value=1.0,
	value=0.5,
	step=0.05,
	help="Valores baixos (0.0–0.3) deixam as respostas mais determinísticas; valores altos (0.7–1.0) geram respostas mais criativas e variadas."
	)

	st.markdown(f"""
	<div style='background: #f0f7ff; padding: 1rem; border-radius: 8px; margin-top: 1rem;'>
	<p style='margin: 0; color: #1e40af; font-size: 0.9rem;'>
	<strong>ℹ️ Dica (trechos):</strong> Use valores menores (3–5) para resumos mais diretos
	e valores maiores (10–15) para análises mais abrangentes.
	</p>
	<p style='margin: 0.5rem 0 0 0; color: #1e40af; font-size: 0.9rem;'>
	<strong>🔥 Dica (temperatura):</strong> Para respostas mais consistentes, mantenha a temperatura entre 0.0 e 0.3.
	Se quiser explorar diferentes formulações ou respostas mais criativas, aumente para 0.7–1.0.
	</p>
	</div>
	""", unsafe_allow_html=True)

	st.markdown("<br>", unsafe_allow_html=True)

	# Inicializar estado para toggle de documentos y paginação
	if 'mostrar_documentos' not in st.session_state:
	st.session_state.mostrar_documentos = False
	if 'docs_page' not in st.session_state:
	st.session_state.docs_page = 0
	if 'docs_page_size' not in st.session_state:
	st.session_state.docs_page_size = 20

	col_btn1, col_btn2, col_btn3 = st.columns([1, 1, 1])
	with col_btn1:
	gerar_resumo = st.button("🚀 Gerar Resumo", use_container_width=True)
	with col_btn3:
	if st.button("📚 Listar Documentos", use_container_width=True):
	st.session_state.mostrar_documentos = not st.session_state.mostrar_documentos

	# Mostrar/ocultar lista según el estado
	if st.session_state.mostrar_documentos:
	with st.spinner("🔍 Buscando documentos..."):
	documentos = obter_lista_documentos()
	if documentos:
	total_docs = len(documentos)
	page_size = st.session_state.docs_page_size
	total_pages = max((total_docs - 1) // page_size + 1, 1)

	# Corrigir página atual se sair do intervalo
	if st.session_state.docs_page >= total_pages:
	st.session_state.docs_page = total_pages - 1
	if st.session_state.docs_page < 0:
	st.session_state.docs_page = 0

	current_page = st.session_state.docs_page
	start_idx = current_page * page_size
	end_idx = min(start_idx + page_size, total_docs)
	page_docs = documentos[start_idx:end_idx]

	st.success(f"✅ Total de documentos indexados: {total_docs}")
	st.markdown(
	f"Mostrando documentos {start_idx + 1}–{end_idx} de {total_docs} "
	)
	st.markdown("---")

	# Mostrar documentos em formato de grid; clique em cada um preenche o texto de resumo
	cols_per_row = 2
	for i in range(0, len(page_docs), cols_per_row):
	cols = st.columns(cols_per_row)
	for j, col in enumerate(cols):
	idx_local = i + j
	if idx_local < len(page_docs):
	with col:
	global_idx = start_idx + idx_local
	doc_info = page_docs[idx_local]

	# Compatibilidade: aceitar tanto string quanto objeto {id, title}
	if isinstance(doc_info, str):
	doc_id = doc_info
	doc_title = doc_info
	else:
	doc_id = doc_info.get("id") or ""
	doc_title = doc_info.get("title") or doc_id

	doc_title_norm = doc_title.replace("_", " ").replace("-", " ")

	if doc_title_norm.isupper():
	doc_title_norm = doc_title_norm.title()

	display_name = (
	doc_title_norm if len(doc_title_norm) <= 60 else doc_title_norm[:57] + "..."
	)

	if st.button(
	f"#{global_idx+1} {display_name}",
	key=f"doc_btn_{global_idx}",
	use_container_width=True,
	):
	st.session_state.pergunta_doc = (
	f"Faça um resumo claro do documento {doc_title_norm}."
	)

	# Controles de paginação
	col_prev, col_page_info, col_next = st.columns([1, 2, 1])
	with col_prev:
	if st.button("⬅️ Anterior", disabled=current_page == 0):
	st.session_state.docs_page = max(current_page - 1, 0)
	st.rerun()
	with col_page_info:
	st.markdown(
	f"<div style='text-align:center; color:#555;'>Página <strong>{current_page+1}</strong> de <strong>{total_pages}</strong></div>",
	unsafe_allow_html=True,
	)
	with col_next:
	if st.button(
	"Próxima ➡️", disabled=current_page >= total_pages - 1
	):
	st.session_state.docs_page = min(current_page + 1, total_pages - 1)
	st.rerun()
	elif documentos is not None:
	st.info("ℹ️ Nenhum documento encontrado no sistema.")

	if gerar_resumo:
	st.session_state.mostrar_documentos = False
	if not pergunta_doc.strip():
	st.warning("⚠️ Por favor, digite uma pergunta ou solicitação de resumo.")
	else:
	with st.spinner("🔍 Analisando documentos e gerando resumo..."):
	try:
	resposta, fragmentos = chamar_api(pergunta=pergunta_doc, mode="summary", top_k=top_k_resumo, temperatura=temperatura_resumo)

	# Exibe o resumo
	st.markdown("### ✨ Resumo Gerado")
	st.markdown(f"""
	<div class="content-card">
	<p style='font-size: 1.05rem; line-height: 1.8; color: #333;'>
	{resposta}
	</p>
	</div>
	""", unsafe_allow_html=True)

	# Botão para download do resumo em formato texto
	st.download_button(
	label="💾 Baixar resumo em .txt",
	data=resposta,
	file_name="resumo_chatbot_norm.txt",
	mime="text/plain",
	)

	# Estatísticas
	if fragmentos:
	docs_unicos = listar_documentos_unicos(fragmentos)

	# Construir mapa document_id -> título legible
	titulos_por_doc = {}
	for m in fragmentos:
	doc_id = m.get("document_id")
	if not doc_id:
	continue
	titulo = m.get("document_title") or doc_id
	titulos_por_doc[doc_id] = titulo

	col_stat1, col_stat2, col_stat3 = st.columns(3)
	with col_stat1:
	st.metric("📚 Documentos Consultados", len(docs_unicos))
	with col_stat2:
	st.metric("📄 Trechos Analisados", len(fragmentos))
	with col_stat3:
	st.metric("✅ Status", "Completo")

	# Documentos utilizados (mostrar títulos em vez de IDs brutos)
	st.markdown("### 📚 Documentos Consultados")
	for doc_id in docs_unicos:
	titulo = titulos_por_doc.get(doc_id, doc_id)
	st.markdown(
	f"<span class='badge badge-primary'>📄 {titulo}</span>",
	unsafe_allow_html=True,
	)

	# Referências detalhadas
	st.markdown("### 🔗 Referências Utilizadas")
	st.info(f"Citações: {formatar_referencias(fragmentos)}")

	# Trechos detalhados
	else:
	st.warning("Nenhum trecho foi recuperado pela busca. Tente reformular sua pergunta.")

	except Exception as e:
	st.error(f"Erro ao gerar resumo: {e}")
	st.info("Verifique se a API está rodando e acessível.")

	# ========================
	# ABA DE CHAT
	# ========================
	with aba_chat:
	col1, col2 = st.columns([2, 1])

	with col1:
	st.markdown("### 💬 Chat com o Assistente")
	st.markdown("""
	<div class="content-card">
	<p style='color: #666; margin-bottom: 1rem;'>
	Faça perguntas sobre os documentos indexados. O chatbot responde
	utilizando <strong>apenas</strong> o conteúdo da base de dados e fornece
	referências precisas para cada resposta.
	</p>
	</div>
	""", unsafe_allow_html=True)

	pergunta = st.text_input(
	"❓ Digite sua pergunta",
	placeholder="Ex: Quais são os principais conceitos de química orgânica?",
	help="Faça perguntas específicas para obter melhores respostas"
	)

	with col2:
	st.markdown("### ⚙️ Configurações")
	top_k_chat = st.slider(
	"Número de trechos para consulta",
	min_value=1,
	max_value=10,
	value=4,
	help="Quantidade de trechos que o chatbot utilizará para responder"
	)

	temperatura_chat = st.slider(
	"Temperatura do modelo",
	min_value=0.0,
	max_value=1.0,
	value=0.5,
	step=0.05,
	help="Valores baixos (0.0–0.3) deixam as respostas mais objetivas; valores altos (0.7–1.0) deixam o chatbot mais criativo e variado."
	)

	st.markdown("""
	<div style='background: #f0fdf4; padding: 1rem; border-radius: 8px; margin-top: 1rem;'>
	<p style='margin: 0; color: #15803d; font-size: 0.9rem;'>
	<strong>✨ Sugestão:</strong> Para perguntas objetivas, use 2-4 trechos.
	Para questões complexas, aumente para 6-10 trechos.
	</p>
	</div>
	""", unsafe_allow_html=True)

	st.markdown("<br>", unsafe_allow_html=True)

	col_btn1, col_btn2, col_btn3 = st.columns([1, 2, 1])
	with col_btn2:
	enviar = st.button("📤 Enviar Pergunta", use_container_width=True)

	if enviar and pergunta.strip():
	with st.spinner("Processando sua pergunta... 🤔 "):
	try:
	resposta, fragmentos = chamar_api(pergunta=pergunta, mode="chatbot", top_k=top_k_chat, temperatura=temperatura_chat)

	# Exibe a resposta
	st.markdown("### 💡 Resposta do Chatbot")
	st.markdown(f"""
	<div class="content-card">
	<p style='font-size: 1.05rem; line-height: 1.6; color: #333;'>
	{resposta}
	</p>
	</div>
	""", unsafe_allow_html=True)

	# Informações sobre a resposta
	if fragmentos:
	docs_unicos = listar_documentos_unicos(fragmentos)

	# Construir mapa document_id -> título legível
	titulos_por_doc = {}
	for m in fragmentos:
	doc_id = m.get("document_id")
	if not doc_id:
	continue
	titulo = m.get("document_title") or doc_id
	titulos_por_doc[doc_id] = titulo

	col_stat1, col_stat2 = st.columns(2)
	with col_stat1:
	st.metric("📚 Fontes Consultadas", len(docs_unicos))
	with col_stat2:
	st.metric("📄 Trechos Utilizados", len(fragmentos))

	# Documentos fonte (usar títulos humanos quando disponíveis)
	st.markdown("### 📚 Documentos Fonte")
	for doc_id in docs_unicos:
	titulo = titulos_por_doc.get(doc_id, doc_id)
	st.markdown(
	f"<span class='badge badge-primary'>📄 {titulo}</span>",
	unsafe_allow_html=True,
	)

	# Referências
	st.markdown("### 🔗 Referências Citadas")
	st.success(f"Citações completas: {formatar_referencias(fragmentos)}")

	# (Trechos detalhados das fontes ocultos)
	else:
	# Apenas quando não há informação suficiente/trechos relevantes
	st.warning("Nenhum trecho relevante encontrado na base de dados.")
	st.info("Tente reformular sua pergunta ou usar termos diferentes.💡 ")

	except Exception as e:
	st.error(f"Erro ao processar pergunta: {e}")
	st.info("Verifique se a API está rodando corretamente.")

	elif enviar and not pergunta.strip():
	st.warning("Por favor, digite uma pergunta antes de enviar.")

	# Rodapé
	st.markdown("<br><br>", unsafe_allow_html=True)
	st.markdown("""
	<div style='text-align: center; color: #888; padding: 2rem; border-top: 1px solid #e0e0e0;'>
	<p style='margin: 0;'>🤖 <strong>Chatbot NORM</strong> - Sistema Inteligente de Consulta</p>
	<p style='margin: 0.5rem 0 0 0; font-size: 0.9rem;'>
	Laboratório de Inteligência Computacional Aplicada ICA da PUC-RIO
	</p>
	</div>
	""", unsafe_allow_html=True)