Spaces:
Sleeping
Sleeping
| # app.py — Agentic RAG | Self-Critique Pipeline | Daniel Fonseca | |
| import streamlit as st | |
| import os | |
| import time | |
| st.set_page_config( | |
| page_title="Agentic RAG · Daniel Fonseca", | |
| page_icon="⚡", | |
| layout="wide", | |
| initial_sidebar_state="expanded", | |
| ) | |
| # ── CUSTOM CSS ──────────────────────────────────────────────── | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Syne:wght@400;600;800&display=swap'); | |
| /* Base */ | |
| html, body, [class*="css"] { | |
| font-family: 'Syne', sans-serif; | |
| background: #0a0a0f; | |
| color: #e8e8f0; | |
| } | |
| /* Hide streamlit chrome */ | |
| #MainMenu, footer, header {visibility: hidden;} | |
| .block-container { padding-top: 1.5rem; padding-bottom: 2rem; } | |
| /* Sidebar */ | |
| section[data-testid="stSidebar"] { | |
| background: #0f0f1a; | |
| border-right: 1px solid #1e1e3a; | |
| } | |
| section[data-testid="stSidebar"] * { color: #c8c8e0 !important; } | |
| /* Title */ | |
| .rag-title { | |
| font-family: 'Syne', sans-serif; | |
| font-weight: 800; | |
| font-size: 2.4rem; | |
| background: linear-gradient(135deg, #00d4ff 0%, #7b2fff 50%, #ff2d7b 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| background-clip: text; | |
| letter-spacing: -0.03em; | |
| line-height: 1.1; | |
| margin-bottom: 0.2rem; | |
| } | |
| .rag-subtitle { | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 0.75rem; | |
| color: #5a5a8a; | |
| letter-spacing: 0.15em; | |
| text-transform: uppercase; | |
| margin-bottom: 1.5rem; | |
| } | |
| /* Pipeline trace container */ | |
| .trace-container { | |
| background: #0d0d1f; | |
| border: 1px solid #1e1e3a; | |
| border-radius: 12px; | |
| padding: 1.2rem 1.5rem; | |
| margin-bottom: 0.8rem; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 0.82rem; | |
| } | |
| /* Step cards */ | |
| .step-running { | |
| border-left: 3px solid #f5a623; | |
| background: #131320; | |
| border-radius: 8px; | |
| padding: 0.6rem 1rem; | |
| margin: 0.3rem 0; | |
| animation: pulse 1.5s infinite; | |
| } | |
| .step-done { | |
| border-left: 3px solid #00d4aa; | |
| background: #0a1a14; | |
| border-radius: 8px; | |
| padding: 0.6rem 1rem; | |
| margin: 0.3rem 0; | |
| } | |
| .step-refine { | |
| border-left: 3px solid #ff2d7b; | |
| background: #1a0a14; | |
| border-radius: 8px; | |
| padding: 0.6rem 1rem; | |
| margin: 0.3rem 0; | |
| } | |
| @keyframes pulse { | |
| 0%, 100% { opacity: 1; } | |
| 50% { opacity: 0.6; } | |
| } | |
| /* Score badge */ | |
| .score-badge { | |
| display: inline-block; | |
| padding: 0.15rem 0.5rem; | |
| border-radius: 20px; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-weight: 700; | |
| font-size: 0.85rem; | |
| } | |
| .score-high { background: #003d2a; color: #00d4aa; border: 1px solid #00d4aa; } | |
| .score-mid { background: #3d2600; color: #f5a623; border: 1px solid #f5a623; } | |
| .score-low { background: #3d000a; color: #ff2d7b; border: 1px solid #ff2d7b; } | |
| /* Metric pill */ | |
| .metric-row { | |
| display: flex; | |
| gap: 0.6rem; | |
| flex-wrap: wrap; | |
| margin-top: 0.5rem; | |
| } | |
| .metric-pill { | |
| background: #141428; | |
| border: 1px solid #2a2a50; | |
| border-radius: 6px; | |
| padding: 0.2rem 0.6rem; | |
| font-size: 0.75rem; | |
| font-family: 'JetBrains Mono', monospace; | |
| color: #9090cc; | |
| } | |
| /* Chat messages */ | |
| .user-msg { | |
| background: #12121f; | |
| border: 1px solid #2a2a50; | |
| border-radius: 12px 12px 2px 12px; | |
| padding: 0.8rem 1.2rem; | |
| margin: 0.5rem 0; | |
| font-size: 0.95rem; | |
| } | |
| .assistant-msg { | |
| background: #0d1a14; | |
| border: 1px solid #1a3028; | |
| border-radius: 12px 12px 12px 2px; | |
| padding: 0.8rem 1.2rem; | |
| margin: 0.5rem 0; | |
| font-size: 0.95rem; | |
| line-height: 1.65; | |
| } | |
| /* Suggestion buttons */ | |
| .stButton button { | |
| background: #0f0f1e !important; | |
| border: 1px solid #2a2a50 !important; | |
| color: #9090cc !important; | |
| border-radius: 8px !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 0.75rem !important; | |
| transition: all 0.2s !important; | |
| } | |
| .stButton button:hover { | |
| border-color: #00d4ff !important; | |
| color: #00d4ff !important; | |
| background: #0a1020 !important; | |
| } | |
| /* Chat input */ | |
| .stChatInput textarea { | |
| background: #0f0f1a !important; | |
| border: 1px solid #2a2a50 !important; | |
| border-radius: 10px !important; | |
| color: #e8e8f0 !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 0.9rem !important; | |
| } | |
| /* Divider */ | |
| hr { border-color: #1e1e3a !important; } | |
| /* Chunk card */ | |
| .chunk-card { | |
| background: #0f0f1e; | |
| border: 1px solid #2a2a4a; | |
| border-radius: 8px; | |
| padding: 0.5rem 0.8rem; | |
| margin: 0.2rem 0; | |
| font-size: 0.78rem; | |
| font-family: 'JetBrains Mono', monospace; | |
| color: #7070aa; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ── SESSION STATE ───────────────────────────────────────────── | |
| for k, v in { | |
| 'messages': [], | |
| 'openai_key': '', | |
| 'index_built': False, | |
| 'pipeline': None, | |
| }.items(): | |
| if k not in st.session_state: | |
| st.session_state[k] = v | |
| # ── HELPERS ─────────────────────────────────────────────────── | |
| def get_openai_key(): | |
| try: | |
| if 'OPENAI_API_KEY' in st.secrets: | |
| return st.secrets['OPENAI_API_KEY'] | |
| except Exception: | |
| pass | |
| return os.getenv('OPENAI_API_KEY', st.session_state.openai_key) | |
| def get_pipeline(key: str): | |
| if st.session_state.pipeline is None: | |
| from rag_pipeline import AgenticRAGPipeline | |
| st.session_state.pipeline = AgenticRAGPipeline(key) | |
| st.session_state.index_built = True | |
| return st.session_state.pipeline | |
| def score_class(s): | |
| if s is None: return "score-mid" | |
| if s >= 8: return "score-high" | |
| if s >= 6: return "score-mid" | |
| return "score-low" | |
| def score_emoji(s): | |
| if s is None: return "—" | |
| if s >= 8: return "✦" | |
| if s >= 6: return "◈" | |
| return "◇" | |
| # ── STEP LABELS ─────────────────────────────────────────────── | |
| STEP_META = { | |
| "query_rewrite": ("🔄", "Query Rewriting"), | |
| "retrieval": ("🔍", "Semantic Retrieval"), | |
| "relevance_grade": ("⚖️", "Relevance Grading"), | |
| "fallback": ("🔀", "Fallback Strategy"), | |
| "generation": ("✍️", "Response Generation"), | |
| "self_critique": ("🧪", "Self-Critique Judge"), | |
| "refinement": ("🔁", "Refinement Loop"), | |
| "final": ("✅", "Pipeline Complete"), | |
| } | |
| # ── SIDEBAR ─────────────────────────────────────────────────── | |
| with st.sidebar: | |
| st.markdown(""" | |
| <div style='font-family:Syne;font-weight:800;font-size:1.3rem; | |
| background:linear-gradient(135deg,#00d4ff,#7b2fff); | |
| -webkit-background-clip:text;-webkit-text-fill-color:transparent; | |
| background-clip:text;margin-bottom:0.2rem'> | |
| ⚡ Agentic RAG | |
| </div> | |
| <div style='font-family:JetBrains Mono;font-size:0.65rem;color:#5a5a8a; | |
| letter-spacing:0.12em;text-transform:uppercase;margin-bottom:1rem'> | |
| Self-Critique Pipeline v1.0 | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.divider() | |
| st.markdown("#### 🔑 OpenAI API Key") | |
| key_input = st.text_input("", type="password", value=st.session_state.openai_key, | |
| placeholder="sk-...", label_visibility="collapsed") | |
| if key_input: | |
| st.session_state.openai_key = key_input | |
| st.session_state.pipeline = None # reset pipeline on new key | |
| oai_key = get_openai_key() | |
| if oai_key: | |
| st.success("✅ Key configurada") | |
| else: | |
| st.warning("Configure a OpenAI API Key") | |
| st.divider() | |
| st.markdown("#### 🏗️ Arquitetura do Pipeline") | |
| steps = [ | |
| ("🔄", "Query Rewriting", "Expande e otimiza a query"), | |
| ("🔍", "Semantic Retrieval", "FAISS + MiniLM embeddings"), | |
| ("⚖️", "Relevance Grading", "LLM avalia qualidade dos chunks"), | |
| ("🔀", "Fallback", "Broadens query se irrelevante"), | |
| ("✍️", "Generation", "GPT-4o-mini gera resposta"), | |
| ("🧪", "Self-Critique", "LLM-as-Judge score 0-10"), | |
| ("🔁", "Refinement Loop", "Refaz se score < 7 (max 2x)"), | |
| ] | |
| for emoji, name, desc in steps: | |
| st.markdown(f""" | |
| <div style='display:flex;align-items:flex-start;gap:0.5rem;margin:0.3rem 0'> | |
| <span style='font-size:0.85rem'>{emoji}</span> | |
| <div> | |
| <div style='font-family:Syne;font-size:0.8rem;color:#c8c8e0;font-weight:600'>{name}</div> | |
| <div style='font-family:JetBrains Mono;font-size:0.65rem;color:#5a5a8a'>{desc}</div> | |
| </div> | |
| </div>""", unsafe_allow_html=True) | |
| st.divider() | |
| st.markdown(""" | |
| <div style='font-family:JetBrains Mono;font-size:0.65rem;color:#3a3a6a;text-align:center'> | |
| sentence-transformers/all-MiniLM-L6-v2<br> | |
| FAISS IndexFlatIP · gpt-4o-mini<br> | |
| CPU-only · HF Spaces free tier | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.divider() | |
| if st.button("🗑️ Limpar conversa", use_container_width=True): | |
| st.session_state.messages = [] | |
| st.rerun() | |
| # ── MAIN ────────────────────────────────────────────────────── | |
| st.markdown('<div class="rag-title">Agentic RAG Pipeline</div>', unsafe_allow_html=True) | |
| st.markdown( | |
| '<div class="rag-subtitle">Query Rewriting · Semantic Retrieval · Relevance Grading · Self-Critique · Refinement Loop</div>', | |
| unsafe_allow_html=True | |
| ) | |
| # ── WELCOME / SUGGESTIONS ───────────────────────────────────── | |
| if not st.session_state.messages: | |
| col_info, col_sug = st.columns([1, 1]) | |
| with col_info: | |
| st.markdown(""" | |
| <div style='background:#0d0d1f;border:1px solid #1e1e3a;border-radius:12px;padding:1.2rem 1.5rem;margin-bottom:1rem'> | |
| <div style='font-family:Syne;font-weight:600;font-size:1rem;color:#00d4ff;margin-bottom:0.8rem'> | |
| O que torna este pipeline Senior? | |
| </div> | |
| <div style='font-family:JetBrains Mono;font-size:0.75rem;color:#7070aa;line-height:1.8'> | |
| ✦ <b style='color:#c8c8e0'>Query Rewriting</b> — query otimizada antes do retrieval<br> | |
| ✦ <b style='color:#c8c8e0'>Relevance Grading</b> — filtra chunks irrelevantes<br> | |
| ✦ <b style='color:#c8c8e0'>Fallback Strategy</b> — broadening automático se retrieval falha<br> | |
| ✦ <b style='color:#c8c8e0'>Self-Critique</b> — LLM avalia própria resposta (0-10)<br> | |
| ✦ <b style='color:#c8c8e0'>Refinement Loop</b> — refaz até score ≥ 7 (máx 2x)<br> | |
| ✦ <b style='color:#c8c8e0'>Live Trace</b> — cada step visível em tempo real | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col_sug: | |
| st.markdown(""" | |
| <div style='font-family:Syne;font-weight:600;font-size:0.9rem;color:#9090cc;margin-bottom:0.6rem'> | |
| 💡 Sugestões de perguntas | |
| </div>""", unsafe_allow_html=True) | |
| suggestions = [ | |
| "Qual projeto tem maior AUC e por quê?", | |
| "Como funciona o Self-Critique no pipeline?", | |
| "Diferença entre TGN e HetGNN", | |
| "Explique Inductive Learning no GraphSAGE", | |
| "O que é DOMINANT e como detecta anomalias?", | |
| "Quais projetos usam PyTorch Geometric?", | |
| ] | |
| cols = st.columns(2) | |
| for i, sug in enumerate(suggestions): | |
| with cols[i % 2]: | |
| if st.button(sug, key=f"sug_{i}", use_container_width=True): | |
| st.session_state["pending_q"] = sug | |
| st.rerun() | |
| # ── CHAT HISTORY ───────────────────────────────────────────── | |
| for msg in st.session_state.messages: | |
| if msg["role"] == "user": | |
| st.markdown(f'<div class="user-msg">🧑 {msg["content"]}</div>', unsafe_allow_html=True) | |
| else: | |
| # Mostra trace colapsável | |
| if msg.get("trace"): | |
| with st.expander(f"🔬 Pipeline Trace — Score: {msg.get('score','?')}/10 · {msg.get('total_ms','?')}ms · {msg.get('n_refinements',0)} refinamento(s)", expanded=False): | |
| for event in msg["trace"]: | |
| step = event.get("step","") | |
| status = event.get("status","") | |
| emoji, label = STEP_META.get(step, ("◈", step)) | |
| data = event.get("data", {}) | |
| css = "step-done" if status == "done" else "step-refine" | |
| st.markdown(f'<div class="{css}">{emoji} <b>{label}</b> <span style="color:#5a5a8a;font-size:0.75rem">+{event.get("elapsed_ms")}ms</span></div>', unsafe_allow_html=True) | |
| st.markdown(f'<div class="assistant-msg">🤖 {msg["content"]}</div>', unsafe_allow_html=True) | |
| # Métricas finais | |
| if msg.get("metrics"): | |
| m = msg["metrics"] | |
| sc = msg.get("score", 0) | |
| st.markdown(f""" | |
| <div class="metric-row"> | |
| <span class="score-badge {score_class(sc)}">{score_emoji(sc)} Score: {sc}/10</span> | |
| <span class="metric-pill">Faithfulness: {m.get('faithfulness','?')}</span> | |
| <span class="metric-pill">Relevance: {m.get('relevance','?')}</span> | |
| <span class="metric-pill">Completeness: {m.get('completeness','?')}</span> | |
| <span class="metric-pill">⏱ {msg.get('total_ms','?')}ms</span> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # ── INPUT ───────────────────────────────────────────────────── | |
| pending = st.session_state.pop("pending_q", None) | |
| question = st.chat_input("Pergunte sobre os projetos GNN ou o pipeline...") or pending | |
| if question: | |
| if not get_openai_key(): | |
| st.warning("Configure a OpenAI API Key na sidebar.") | |
| st.stop() | |
| # Mostra pergunta | |
| st.markdown(f'<div class="user-msg">🧑 {question}</div>', unsafe_allow_html=True) | |
| st.session_state.messages.append({"role": "user", "content": question}) | |
| # Trace container | |
| trace_ph = st.empty() | |
| answer_ph = st.empty() | |
| trace_events = [] | |
| final_data = {} | |
| # Container de trace ao vivo | |
| with trace_ph.container(): | |
| st.markdown('<div class="trace-container">', unsafe_allow_html=True) | |
| step_placeholders = {} | |
| try: | |
| pipeline = get_pipeline(get_openai_key()) | |
| for event in pipeline.run(question): | |
| trace_events.append(event) | |
| step = event.get("step", "") | |
| status = event.get("status", "") | |
| data = event.get("data", {}) | |
| ms = event.get("elapsed_ms", 0) | |
| emoji, label = STEP_META.get(step, ("◈", step)) | |
| if step == "final" and status == "done": | |
| final_data = data | |
| break | |
| # Renderiza step | |
| css = "step-running" if status == "running" else ( | |
| "step-refine" if step == "refinement" else "step-done" | |
| ) | |
| indicator = "⟳" if status == "running" else "✓" | |
| detail = "" | |
| if step == "query_rewrite" and status == "done": | |
| detail = f'<br><span style="color:#7b2fff">→ {data.get("rewritten","")[:80]}</span>' | |
| elif step == "retrieval" and status == "done": | |
| chunks_html = " ".join( | |
| f'<span class="metric-pill">{c["title"][:25]} ({c["score"]})</span>' | |
| for c in data.get("chunks", [])[:3] | |
| ) | |
| detail = f'<br>{chunks_html}' | |
| elif step == "relevance_grade" and status == "done": | |
| rel = data.get("relevant", True) | |
| color = "#00d4aa" if rel else "#ff2d7b" | |
| detail = f'<br><span style="color:{color}">{"RELEVANT ✓" if rel else "IRRELEVANT ✗"}</span> — {data.get("reason","")[:60]}' | |
| elif step == "self_critique" and status == "done": | |
| sc = data.get("score", 0) | |
| verdict = data.get("verdict", "") | |
| vcolor = "#00d4aa" if verdict == "APPROVE" else "#ff2d7b" | |
| detail = f'<br><span class="score-badge {score_class(sc)}">{sc}/10</span> <span style="color:{vcolor}">{verdict}</span>' | |
| elif step == "refinement": | |
| if status == "running": | |
| detail = f' — tentativa {data.get("attempt")}, score anterior: {data.get("score")}' | |
| else: | |
| detail = f' — novo score: {data.get("new_score")} → {data.get("new_verdict")}' | |
| key = f"{step}_{status}_{ms}" | |
| if key not in step_placeholders: | |
| step_placeholders[key] = st.empty() | |
| step_placeholders[key].markdown( | |
| f'<div class="{css}">' | |
| f'{emoji} <b>{label}</b> ' | |
| f'<span style="color:#5a5a8a;font-size:0.73rem">{indicator} +{ms}ms</span>' | |
| f'{detail}' | |
| f'</div>', | |
| unsafe_allow_html=True | |
| ) | |
| except Exception as e: | |
| st.error(f"Erro no pipeline: {e}") | |
| st.stop() | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # ── Resposta final ──────────────────────────────────────── | |
| answer = final_data.get("answer", "Não foi possível gerar resposta.") | |
| score = final_data.get("score", 0) | |
| metrics = final_data.get("metrics", {}) | |
| total_ms = final_data.get("total_ms", 0) | |
| n_ref = final_data.get("n_refinements", 0) | |
| answer_ph.markdown(f'<div class="assistant-msg">🤖 {answer}</div>', unsafe_allow_html=True) | |
| st.markdown(f""" | |
| <div class="metric-row"> | |
| <span class="score-badge {score_class(score)}">{score_emoji(score)} Score: {score}/10</span> | |
| <span class="metric-pill">Faithfulness: {metrics.get('faithfulness','?')}</span> | |
| <span class="metric-pill">Relevance: {metrics.get('relevance','?')}</span> | |
| <span class="metric-pill">Completeness: {metrics.get('completeness','?')}</span> | |
| <span class="metric-pill">⏱ {total_ms}ms</span> | |
| <span class="metric-pill">🔁 {n_ref} refinamento(s)</span> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Salva no histórico | |
| st.session_state.messages.append({ | |
| "role": "assistant", | |
| "content": answer, | |
| "trace": trace_events, | |
| "score": score, | |
| "metrics": metrics, | |
| "total_ms": total_ms, | |
| "n_refinements": n_ref, | |
| }) |