# app.py — Agentic RAG | Self-Critique Pipeline | Daniel Fonseca
import streamlit as st
import os
import time
st.set_page_config(
page_title="Agentic RAG · Daniel Fonseca",
page_icon="⚡",
layout="wide",
initial_sidebar_state="expanded",
)
# ── CUSTOM CSS ────────────────────────────────────────────────
st.markdown("""
""", unsafe_allow_html=True)
# ── SESSION STATE ─────────────────────────────────────────────
for k, v in {
'messages': [],
'openai_key': '',
'index_built': False,
'pipeline': None,
}.items():
if k not in st.session_state:
st.session_state[k] = v
# ── HELPERS ───────────────────────────────────────────────────
def get_openai_key():
try:
if 'OPENAI_API_KEY' in st.secrets:
return st.secrets['OPENAI_API_KEY']
except Exception:
pass
return os.getenv('OPENAI_API_KEY', st.session_state.openai_key)
def get_pipeline(key: str):
if st.session_state.pipeline is None:
from rag_pipeline import AgenticRAGPipeline
st.session_state.pipeline = AgenticRAGPipeline(key)
st.session_state.index_built = True
return st.session_state.pipeline
def score_class(s):
if s is None: return "score-mid"
if s >= 8: return "score-high"
if s >= 6: return "score-mid"
return "score-low"
def score_emoji(s):
if s is None: return "—"
if s >= 8: return "✦"
if s >= 6: return "◈"
return "◇"
# ── STEP LABELS ───────────────────────────────────────────────
STEP_META = {
"query_rewrite": ("🔄", "Query Rewriting"),
"retrieval": ("🔍", "Semantic Retrieval"),
"relevance_grade": ("⚖️", "Relevance Grading"),
"fallback": ("🔀", "Fallback Strategy"),
"generation": ("✍️", "Response Generation"),
"self_critique": ("🧪", "Self-Critique Judge"),
"refinement": ("🔁", "Refinement Loop"),
"final": ("✅", "Pipeline Complete"),
}
# ── SIDEBAR ───────────────────────────────────────────────────
with st.sidebar:
st.markdown("""
⚡ Agentic RAG
Self-Critique Pipeline v1.0
""", unsafe_allow_html=True)
st.divider()
st.markdown("#### 🔑 OpenAI API Key")
key_input = st.text_input("", type="password", value=st.session_state.openai_key,
placeholder="sk-...", label_visibility="collapsed")
if key_input:
st.session_state.openai_key = key_input
st.session_state.pipeline = None # reset pipeline on new key
oai_key = get_openai_key()
if oai_key:
st.success("✅ Key configurada")
else:
st.warning("Configure a OpenAI API Key")
st.divider()
st.markdown("#### 🏗️ Arquitetura do Pipeline")
steps = [
("🔄", "Query Rewriting", "Expande e otimiza a query"),
("🔍", "Semantic Retrieval", "FAISS + MiniLM embeddings"),
("⚖️", "Relevance Grading", "LLM avalia qualidade dos chunks"),
("🔀", "Fallback", "Broadens query se irrelevante"),
("✍️", "Generation", "GPT-4o-mini gera resposta"),
("🧪", "Self-Critique", "LLM-as-Judge score 0-10"),
("🔁", "Refinement Loop", "Refaz se score < 7 (max 2x)"),
]
for emoji, name, desc in steps:
st.markdown(f"""
""", unsafe_allow_html=True)
st.divider()
st.markdown("""
sentence-transformers/all-MiniLM-L6-v2
FAISS IndexFlatIP · gpt-4o-mini
CPU-only · HF Spaces free tier
""", unsafe_allow_html=True)
st.divider()
if st.button("🗑️ Limpar conversa", use_container_width=True):
st.session_state.messages = []
st.rerun()
# ── MAIN ──────────────────────────────────────────────────────
st.markdown('Agentic RAG Pipeline
', unsafe_allow_html=True)
st.markdown(
'Query Rewriting · Semantic Retrieval · Relevance Grading · Self-Critique · Refinement Loop
',
unsafe_allow_html=True
)
# ── WELCOME / SUGGESTIONS ─────────────────────────────────────
if not st.session_state.messages:
col_info, col_sug = st.columns([1, 1])
with col_info:
st.markdown("""
O que torna este pipeline Senior?
✦ Query Rewriting — query otimizada antes do retrieval
✦ Relevance Grading — filtra chunks irrelevantes
✦ Fallback Strategy — broadening automático se retrieval falha
✦ Self-Critique — LLM avalia própria resposta (0-10)
✦ Refinement Loop — refaz até score ≥ 7 (máx 2x)
✦ Live Trace — cada step visível em tempo real
""", unsafe_allow_html=True)
with col_sug:
st.markdown("""
💡 Sugestões de perguntas
""", unsafe_allow_html=True)
suggestions = [
"Qual projeto tem maior AUC e por quê?",
"Como funciona o Self-Critique no pipeline?",
"Diferença entre TGN e HetGNN",
"Explique Inductive Learning no GraphSAGE",
"O que é DOMINANT e como detecta anomalias?",
"Quais projetos usam PyTorch Geometric?",
]
cols = st.columns(2)
for i, sug in enumerate(suggestions):
with cols[i % 2]:
if st.button(sug, key=f"sug_{i}", use_container_width=True):
st.session_state["pending_q"] = sug
st.rerun()
# ── CHAT HISTORY ─────────────────────────────────────────────
for msg in st.session_state.messages:
if msg["role"] == "user":
st.markdown(f'🧑 {msg["content"]}
', unsafe_allow_html=True)
else:
# Mostra trace colapsável
if msg.get("trace"):
with st.expander(f"🔬 Pipeline Trace — Score: {msg.get('score','?')}/10 · {msg.get('total_ms','?')}ms · {msg.get('n_refinements',0)} refinamento(s)", expanded=False):
for event in msg["trace"]:
step = event.get("step","")
status = event.get("status","")
emoji, label = STEP_META.get(step, ("◈", step))
data = event.get("data", {})
css = "step-done" if status == "done" else "step-refine"
st.markdown(f'{emoji} {label} +{event.get("elapsed_ms")}ms
', unsafe_allow_html=True)
st.markdown(f'🤖 {msg["content"]}
', unsafe_allow_html=True)
# Métricas finais
if msg.get("metrics"):
m = msg["metrics"]
sc = msg.get("score", 0)
st.markdown(f"""
{score_emoji(sc)} Score: {sc}/10
Faithfulness: {m.get('faithfulness','?')}
Relevance: {m.get('relevance','?')}
Completeness: {m.get('completeness','?')}
⏱ {msg.get('total_ms','?')}ms
""", unsafe_allow_html=True)
# ── INPUT ─────────────────────────────────────────────────────
pending = st.session_state.pop("pending_q", None)
question = st.chat_input("Pergunte sobre os projetos GNN ou o pipeline...") or pending
if question:
if not get_openai_key():
st.warning("Configure a OpenAI API Key na sidebar.")
st.stop()
# Mostra pergunta
st.markdown(f'🧑 {question}
', unsafe_allow_html=True)
st.session_state.messages.append({"role": "user", "content": question})
# Trace container
trace_ph = st.empty()
answer_ph = st.empty()
trace_events = []
final_data = {}
# Container de trace ao vivo
with trace_ph.container():
st.markdown('', unsafe_allow_html=True)
step_placeholders = {}
try:
pipeline = get_pipeline(get_openai_key())
for event in pipeline.run(question):
trace_events.append(event)
step = event.get("step", "")
status = event.get("status", "")
data = event.get("data", {})
ms = event.get("elapsed_ms", 0)
emoji, label = STEP_META.get(step, ("◈", step))
if step == "final" and status == "done":
final_data = data
break
# Renderiza step
css = "step-running" if status == "running" else (
"step-refine" if step == "refinement" else "step-done"
)
indicator = "⟳" if status == "running" else "✓"
detail = ""
if step == "query_rewrite" and status == "done":
detail = f'
→ {data.get("rewritten","")[:80]}'
elif step == "retrieval" and status == "done":
chunks_html = " ".join(
f'
{c["title"][:25]} ({c["score"]})'
for c in data.get("chunks", [])[:3]
)
detail = f'
{chunks_html}'
elif step == "relevance_grade" and status == "done":
rel = data.get("relevant", True)
color = "#00d4aa" if rel else "#ff2d7b"
detail = f'
{"RELEVANT ✓" if rel else "IRRELEVANT ✗"} — {data.get("reason","")[:60]}'
elif step == "self_critique" and status == "done":
sc = data.get("score", 0)
verdict = data.get("verdict", "")
vcolor = "#00d4aa" if verdict == "APPROVE" else "#ff2d7b"
detail = f'
{sc}/10 {verdict}'
elif step == "refinement":
if status == "running":
detail = f' — tentativa {data.get("attempt")}, score anterior: {data.get("score")}'
else:
detail = f' — novo score: {data.get("new_score")} → {data.get("new_verdict")}'
key = f"{step}_{status}_{ms}"
if key not in step_placeholders:
step_placeholders[key] = st.empty()
step_placeholders[key].markdown(
f'
'
f'{emoji} {label} '
f'{indicator} +{ms}ms'
f'{detail}'
f'
',
unsafe_allow_html=True
)
except Exception as e:
st.error(f"Erro no pipeline: {e}")
st.stop()
st.markdown('
', unsafe_allow_html=True)
# ── Resposta final ────────────────────────────────────────
answer = final_data.get("answer", "Não foi possível gerar resposta.")
score = final_data.get("score", 0)
metrics = final_data.get("metrics", {})
total_ms = final_data.get("total_ms", 0)
n_ref = final_data.get("n_refinements", 0)
answer_ph.markdown(f'🤖 {answer}
', unsafe_allow_html=True)
st.markdown(f"""
{score_emoji(score)} Score: {score}/10
Faithfulness: {metrics.get('faithfulness','?')}
Relevance: {metrics.get('relevance','?')}
Completeness: {metrics.get('completeness','?')}
⏱ {total_ms}ms
🔁 {n_ref} refinamento(s)
""", unsafe_allow_html=True)
# Salva no histórico
st.session_state.messages.append({
"role": "assistant",
"content": answer,
"trace": trace_events,
"score": score,
"metrics": metrics,
"total_ms": total_ms,
"n_refinements": n_ref,
})