# app.py — Agentic RAG | Self-Critique Pipeline | Daniel Fonseca import streamlit as st import os import time st.set_page_config( page_title="Agentic RAG · Daniel Fonseca", page_icon="⚡", layout="wide", initial_sidebar_state="expanded", ) # ── CUSTOM CSS ──────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ── SESSION STATE ───────────────────────────────────────────── for k, v in { 'messages': [], 'openai_key': '', 'index_built': False, 'pipeline': None, }.items(): if k not in st.session_state: st.session_state[k] = v # ── HELPERS ─────────────────────────────────────────────────── def get_openai_key(): try: if 'OPENAI_API_KEY' in st.secrets: return st.secrets['OPENAI_API_KEY'] except Exception: pass return os.getenv('OPENAI_API_KEY', st.session_state.openai_key) def get_pipeline(key: str): if st.session_state.pipeline is None: from rag_pipeline import AgenticRAGPipeline st.session_state.pipeline = AgenticRAGPipeline(key) st.session_state.index_built = True return st.session_state.pipeline def score_class(s): if s is None: return "score-mid" if s >= 8: return "score-high" if s >= 6: return "score-mid" return "score-low" def score_emoji(s): if s is None: return "—" if s >= 8: return "✦" if s >= 6: return "◈" return "◇" # ── STEP LABELS ─────────────────────────────────────────────── STEP_META = { "query_rewrite": ("🔄", "Query Rewriting"), "retrieval": ("🔍", "Semantic Retrieval"), "relevance_grade": ("⚖️", "Relevance Grading"), "fallback": ("🔀", "Fallback Strategy"), "generation": ("✍️", "Response Generation"), "self_critique": ("🧪", "Self-Critique Judge"), "refinement": ("🔁", "Refinement Loop"), "final": ("✅", "Pipeline Complete"), } # ── SIDEBAR ─────────────────────────────────────────────────── with st.sidebar: st.markdown("""

⚡ Agentic RAG

Self-Critique Pipeline v1.0

""", unsafe_allow_html=True) st.divider() st.markdown("#### 🔑 OpenAI API Key") key_input = st.text_input("", type="password", value=st.session_state.openai_key, placeholder="sk-...", label_visibility="collapsed") if key_input: st.session_state.openai_key = key_input st.session_state.pipeline = None # reset pipeline on new key oai_key = get_openai_key() if oai_key: st.success("✅ Key configurada") else: st.warning("Configure a OpenAI API Key") st.divider() st.markdown("#### 🏗️ Arquitetura do Pipeline") steps = [ ("🔄", "Query Rewriting", "Expande e otimiza a query"), ("🔍", "Semantic Retrieval", "FAISS + MiniLM embeddings"), ("⚖️", "Relevance Grading", "LLM avalia qualidade dos chunks"), ("🔀", "Fallback", "Broadens query se irrelevante"), ("✍️", "Generation", "GPT-4o-mini gera resposta"), ("🧪", "Self-Critique", "LLM-as-Judge score 0-10"), ("🔁", "Refinement Loop", "Refaz se score < 7 (max 2x)"), ] for emoji, name, desc in steps: st.markdown(f"""

{emoji}

{name}

{desc}

""", unsafe_allow_html=True) st.divider() st.markdown("""

sentence-transformers/all-MiniLM-L6-v2
FAISS IndexFlatIP · gpt-4o-mini
CPU-only · HF Spaces free tier

""", unsafe_allow_html=True) st.divider() if st.button("🗑️ Limpar conversa", use_container_width=True): st.session_state.messages = [] st.rerun() # ── MAIN ────────────────────────────────────────────────────── st.markdown('

Agentic RAG Pipeline

', unsafe_allow_html=True) st.markdown( '

Query Rewriting · Semantic Retrieval · Relevance Grading · Self-Critique · Refinement Loop

', unsafe_allow_html=True ) # ── WELCOME / SUGGESTIONS ───────────────────────────────────── if not st.session_state.messages: col_info, col_sug = st.columns([1, 1]) with col_info: st.markdown("""

O que torna este pipeline Senior?

✦ Query Rewriting — query otimizada antes do retrieval
✦ Relevance Grading — filtra chunks irrelevantes
✦ Fallback Strategy — broadening automático se retrieval falha
✦ Self-Critique — LLM avalia própria resposta (0-10)
✦ Refinement Loop — refaz até score ≥ 7 (máx 2x)
✦ Live Trace — cada step visível em tempo real

""", unsafe_allow_html=True) with col_sug: st.markdown("""

💡 Sugestões de perguntas

""", unsafe_allow_html=True) suggestions = [ "Qual projeto tem maior AUC e por quê?", "Como funciona o Self-Critique no pipeline?", "Diferença entre TGN e HetGNN", "Explique Inductive Learning no GraphSAGE", "O que é DOMINANT e como detecta anomalias?", "Quais projetos usam PyTorch Geometric?", ] cols = st.columns(2) for i, sug in enumerate(suggestions): with cols[i % 2]: if st.button(sug, key=f"sug_{i}", use_container_width=True): st.session_state["pending_q"] = sug st.rerun() # ── CHAT HISTORY ───────────────────────────────────────────── for msg in st.session_state.messages: if msg["role"] == "user": st.markdown(f'

🧑 {msg["content"]}

', unsafe_allow_html=True) else: # Mostra trace colapsável if msg.get("trace"): with st.expander(f"🔬 Pipeline Trace — Score: {msg.get('score','?')}/10 · {msg.get('total_ms','?')}ms · {msg.get('n_refinements',0)} refinamento(s)", expanded=False): for event in msg["trace"]: step = event.get("step","") status = event.get("status","") emoji, label = STEP_META.get(step, ("◈", step)) data = event.get("data", {}) css = "step-done" if status == "done" else "step-refine" st.markdown(f'

{emoji} {label} +{event.get("elapsed_ms")}ms

', unsafe_allow_html=True) st.markdown(f'

🤖 {msg["content"]}

', unsafe_allow_html=True) # Métricas finais if msg.get("metrics"): m = msg["metrics"] sc = msg.get("score", 0) st.markdown(f"""

{score_emoji(sc)} Score: {sc}/10 Faithfulness: {m.get('faithfulness','?')} Relevance: {m.get('relevance','?')} Completeness: {m.get('completeness','?')} ⏱ {msg.get('total_ms','?')}ms

""", unsafe_allow_html=True) # ── INPUT ───────────────────────────────────────────────────── pending = st.session_state.pop("pending_q", None) question = st.chat_input("Pergunte sobre os projetos GNN ou o pipeline...") or pending if question: if not get_openai_key(): st.warning("Configure a OpenAI API Key na sidebar.") st.stop() # Mostra pergunta st.markdown(f'

🧑 {question}

', unsafe_allow_html=True) st.session_state.messages.append({"role": "user", "content": question}) # Trace container trace_ph = st.empty() answer_ph = st.empty() trace_events = [] final_data = {} # Container de trace ao vivo with trace_ph.container(): st.markdown('

', unsafe_allow_html=True) step_placeholders = {} try: pipeline = get_pipeline(get_openai_key()) for event in pipeline.run(question): trace_events.append(event) step = event.get("step", "") status = event.get("status", "") data = event.get("data", {}) ms = event.get("elapsed_ms", 0) emoji, label = STEP_META.get(step, ("◈", step)) if step == "final" and status == "done": final_data = data break # Renderiza step css = "step-running" if status == "running" else ( "step-refine" if step == "refinement" else "step-done" ) indicator = "⟳" if status == "running" else "✓" detail = "" if step == "query_rewrite" and status == "done": detail = f'
→ {data.get("rewritten","")[:80]}' elif step == "retrieval" and status == "done": chunks_html = " ".join( f'{c["title"][:25]} ({c["score"]})' for c in data.get("chunks", [])[:3] ) detail = f'
{chunks_html}' elif step == "relevance_grade" and status == "done": rel = data.get("relevant", True) color = "#00d4aa" if rel else "#ff2d7b" detail = f'
{"RELEVANT ✓" if rel else "IRRELEVANT ✗"} — {data.get("reason","")[:60]}' elif step == "self_critique" and status == "done": sc = data.get("score", 0) verdict = data.get("verdict", "") vcolor = "#00d4aa" if verdict == "APPROVE" else "#ff2d7b" detail = f'
{sc}/10 {verdict}' elif step == "refinement": if status == "running": detail = f' — tentativa {data.get("attempt")}, score anterior: {data.get("score")}' else: detail = f' — novo score: {data.get("new_score")} → {data.get("new_verdict")}' key = f"{step}_{status}_{ms}" if key not in step_placeholders: step_placeholders[key] = st.empty() step_placeholders[key].markdown( f'

' f'{emoji} {label} ' f'{indicator} +{ms}ms' f'{detail}' f'

', unsafe_allow_html=True ) except Exception as e: st.error(f"Erro no pipeline: {e}") st.stop() st.markdown('

', unsafe_allow_html=True) # ── Resposta final ──────────────────────────────────────── answer = final_data.get("answer", "Não foi possível gerar resposta.") score = final_data.get("score", 0) metrics = final_data.get("metrics", {}) total_ms = final_data.get("total_ms", 0) n_ref = final_data.get("n_refinements", 0) answer_ph.markdown(f'

🤖 {answer}

', unsafe_allow_html=True) st.markdown(f"""

{score_emoji(score)} Score: {score}/10 Faithfulness: {metrics.get('faithfulness','?')} Relevance: {metrics.get('relevance','?')} Completeness: {metrics.get('completeness','?')} ⏱ {total_ms}ms 🔁 {n_ref} refinamento(s)

""", unsafe_allow_html=True) # Salva no histórico st.session_state.messages.append({ "role": "assistant", "content": answer, "trace": trace_events, "score": score, "metrics": metrics, "total_ms": total_ms, "n_refinements": n_ref, })