Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import tempfile | |
| import gc | |
| import base64 | |
| import time | |
| import requests | |
| from datetime import datetime | |
| from groq import Groq | |
| from src.agentic_rag.tools.custom_tool import DocumentSearchTool | |
| st.set_page_config(page_title="NeuralDocs β Agentic RAG", page_icon="π§ ", layout="wide", initial_sidebar_state="expanded") | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=JetBrains+Mono:wght@400;500&family=Figtree:wght@300;400;500;600&display=swap'); | |
| :root { | |
| --bg-base:#080c12;--bg-surface:#0d1520;--bg-elevated:#121d2e; | |
| --border:#1e3048;--border-bright:#2a4460; | |
| --accent:#00e5ff;--accent-dim:#0097a7;--accent-glow:rgba(0,229,255,0.12); | |
| --accent2:#7c5cfc;--text-primary:#e8f4f8;--text-secondary:#7a9bb5;--text-muted:#3d5a75; | |
| --success:#00c896;--warning:#ffb700; | |
| --font-display:'Syne',sans-serif;--font-body:'Figtree',sans-serif;--font-mono:'JetBrains Mono',monospace; | |
| --radius-sm:6px;--radius-md:12px;--radius-lg:18px; | |
| } | |
| html,body,[class*="css"]{font-family:var(--font-body);background-color:var(--bg-base)!important;color:var(--text-primary)!important;} | |
| #MainMenu,footer,header{visibility:hidden;}.stDeployButton{display:none;} | |
| .main .block-container{background:var(--bg-base);padding:1.5rem 2rem 3rem;max-width:1100px;} | |
| .app-header{display:flex;align-items:center;gap:14px;padding:1.2rem 0 0.6rem;border-bottom:1px solid var(--border);margin-bottom:1.5rem;} | |
| .app-logo{width:38px;height:38px;background:linear-gradient(135deg,var(--accent),var(--accent2));border-radius:10px;display:flex;align-items:center;justify-content:center;font-size:18px;font-weight:800;color:#000;flex-shrink:0;} | |
| .app-title{font-family:var(--font-display);font-size:1.5rem;font-weight:800;background:linear-gradient(135deg,var(--accent),var(--accent2));-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text;line-height:1;} | |
| .app-subtitle{font-size:0.7rem;color:var(--text-muted);letter-spacing:2px;text-transform:uppercase;font-family:var(--font-mono);margin-top:3px;} | |
| .header-badge{margin-left:auto;background:var(--accent-glow);border:1px solid var(--accent-dim);color:var(--accent);font-family:var(--font-mono);font-size:0.65rem;padding:4px 10px;border-radius:20px;letter-spacing:1px;} | |
| [data-testid="stSidebar"]{background:var(--bg-surface)!important;border-right:1px solid var(--border)!important;} | |
| [data-testid="stSidebar"] .block-container{background:var(--bg-surface)!important;padding:1.5rem 1.2rem;} | |
| .sidebar-section-label{font-family:var(--font-mono);font-size:0.6rem;letter-spacing:3px;text-transform:uppercase;color:var(--text-muted);margin:1.2rem 0 0.6rem;padding-bottom:0.4rem;border-bottom:1px solid var(--border);} | |
| .status-pill{display:inline-flex;align-items:center;gap:6px;padding:5px 12px;border-radius:20px;font-family:var(--font-mono);font-size:0.7rem;margin:6px 0;} | |
| .status-ready{background:rgba(0,200,150,0.1);border:1px solid var(--success);color:var(--success);} | |
| .status-waiting{background:rgba(255,183,0,0.1);border:1px solid var(--warning);color:var(--warning);} | |
| .status-dot{width:6px;height:6px;border-radius:50%;background:currentColor;} | |
| .status-dot.pulse{animation:pulse 1.5s infinite;} | |
| @keyframes pulse{0%,100%{opacity:1;}50%{opacity:0.3;}} | |
| .metrics-row{display:grid;grid-template-columns:1fr 1fr;gap:8px;margin:10px 0;} | |
| .metric-card{background:var(--bg-elevated);border:1px solid var(--border);border-radius:var(--radius-md);padding:10px 14px;} | |
| .metric-value{font-family:var(--font-display);font-size:1.3rem;font-weight:700;color:var(--accent);line-height:1;} | |
| .metric-label{font-family:var(--font-mono);font-size:0.6rem;color:var(--text-muted);letter-spacing:1px;text-transform:uppercase;margin-top:3px;} | |
| .stChatMessage{background:var(--bg-surface)!important;border:1px solid var(--border)!important;border-radius:var(--radius-lg)!important;padding:1rem 1.2rem!important;margin-bottom:10px!important;} | |
| [data-testid="stChatMessageContent"] p{font-family:var(--font-body);font-size:0.9rem;line-height:1.7;color:var(--text-primary);} | |
| .answer-meta{display:flex;align-items:center;gap:12px;padding:8px 0 4px;border-top:1px solid var(--border);margin-top:10px;flex-wrap:wrap;} | |
| .confidence-bar-wrap{display:flex;align-items:center;gap:6px;} | |
| .confidence-bar{width:60px;height:4px;background:var(--border);border-radius:2px;overflow:hidden;} | |
| .confidence-fill{height:100%;border-radius:2px;background:linear-gradient(90deg,var(--accent2),var(--accent));} | |
| [data-testid="stChatInput"]{background:var(--bg-elevated)!important;border:1px solid var(--border-bright)!important;border-radius:var(--radius-lg)!important;} | |
| [data-testid="stChatInput"] textarea{color:var(--text-primary)!important;font-family:var(--font-body)!important;background:transparent!important;} | |
| .stButton>button{background:var(--bg-elevated)!important;border:1px solid var(--border-bright)!important;color:var(--text-secondary)!important;border-radius:var(--radius-sm)!important;font-family:var(--font-mono)!important;font-size:0.72rem!important;} | |
| .stButton>button:hover{border-color:var(--accent)!important;color:var(--accent)!important;background:var(--accent-glow)!important;} | |
| [data-testid="stFileUploader"]{background:var(--bg-elevated)!important;border:1px dashed var(--border-bright)!important;border-radius:var(--radius-md)!important;} | |
| .history-item{background:var(--bg-elevated);border:1px solid var(--border);border-left:3px solid var(--accent-dim);border-radius:var(--radius-sm);padding:8px 10px;margin:4px 0;} | |
| .history-q{font-size:0.78rem;color:var(--text-secondary);white-space:nowrap;overflow:hidden;text-overflow:ellipsis;} | |
| .history-time{font-family:var(--font-mono);font-size:0.6rem;color:var(--text-muted);margin-top:2px;} | |
| .empty-state{display:flex;flex-direction:column;align-items:center;justify-content:center;padding:3rem 1rem;text-align:center;gap:12px;} | |
| .empty-icon{font-size:2.5rem;opacity:0.3;} | |
| .empty-text{font-family:var(--font-body);color:var(--text-muted);font-size:0.9rem;max-width:320px;line-height:1.6;} | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ββ Session State βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| defaults = { | |
| "messages": [], "pdf_tool": None, "pdf_name": None, | |
| "total_queries": 0, "pdf_hits": 0, "web_hits": 0, | |
| "query_log": [], "pending_followups": [], "pending_prompt": None, | |
| } | |
| for k, v in defaults.items(): | |
| if k not in st.session_state: | |
| st.session_state[k] = v | |
| def reset_chat(): | |
| st.session_state.messages = [] | |
| st.session_state.pending_followups = [] | |
| gc.collect() | |
| def full_reset(): | |
| for k in list(defaults.keys()): | |
| st.session_state[k] = defaults[k] | |
| gc.collect() | |
| def display_pdf(file_bytes, file_name): | |
| b64 = base64.b64encode(file_bytes).decode("utf-8") | |
| st.markdown(f'<iframe src="data:application/pdf;base64,{b64}" width="100%" height="480px" style="border:1px solid var(--border);border-radius:var(--radius-md);"></iframe>', unsafe_allow_html=True) | |
| def web_search(query: str) -> str: | |
| api_key = os.environ.get("SERPER_API_KEY", "") | |
| if not api_key: | |
| return "" | |
| try: | |
| response = requests.post( | |
| "https://google.serper.dev/search", | |
| headers={"X-API-KEY": api_key, "Content-Type": "application/json"}, | |
| json={"q": query, "num": 3}, timeout=10, | |
| ) | |
| data = response.json() | |
| results = [] | |
| for r in data.get("organic", [])[:3]: | |
| results.append(f"{r.get('title','')}: {r.get('snippet','')}") | |
| return "\n".join(results) | |
| except: | |
| return "" | |
| def detect_source(result_text): | |
| web_signals = ["according to", "website", "http", "search result", "online"] | |
| return "web" if any(s in result_text.lower() for s in web_signals) else "pdf" | |
| def estimate_confidence(result_text): | |
| if "i'm sorry" in result_text.lower() or "couldn't find" in result_text.lower(): return 20 | |
| if len(result_text) > 400: return 88 | |
| if len(result_text) > 150: return 72 | |
| return 55 | |
| def generate_followups(query, answer): | |
| keywords = [w for w in query.lower().split() if len(w) > 4] | |
| return [ | |
| f"Can you elaborate on {keywords[0] if keywords else 'this'} in more detail?", | |
| "What are the practical applications of this?", | |
| "What are the limitations or challenges here?", | |
| ] | |
| def run_query(prompt): | |
| """Direct Groq call β no CrewAI overhead, much faster.""" | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # Step 1: Try PDF search | |
| context = "" | |
| source = "web" | |
| if st.session_state.pdf_tool: | |
| try: | |
| pdf_result = st.session_state.pdf_tool._run(prompt) | |
| if pdf_result and "No relevant" not in pdf_result: | |
| context = f"Document context:\n{pdf_result}\n\n" | |
| source = "pdf" | |
| except: | |
| pass | |
| # Step 2: If no PDF context, search web | |
| if not context: | |
| web_result = web_search(prompt) | |
| if web_result: | |
| context = f"Web search results:\n{web_result}\n\n" | |
| source = "web" | |
| # Step 3: Ask Groq directly | |
| system_prompt = "You are a helpful research assistant. Answer questions concisely based on the provided context. If no context is provided, answer from your knowledge. Keep answers under 200 words." | |
| user_message = f"{context}Question: {prompt}" | |
| response = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_message} | |
| ], | |
| max_tokens=300, | |
| temperature=0.1, | |
| ) | |
| return response.choices[0].message.content, source | |
| # ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with st.sidebar: | |
| st.markdown(""" | |
| <div style="padding:0.2rem 0 1rem;"> | |
| <div style="display:flex;align-items:center;gap:10px;margin-bottom:4px;"> | |
| <div style="width:30px;height:30px;background:linear-gradient(135deg,#00e5ff,#7c5cfc);border-radius:8px;display:flex;align-items:center;justify-content:center;font-weight:800;color:#000;font-size:14px;">π§ </div> | |
| <span style="font-family:'Syne',sans-serif;font-size:1rem;font-weight:700;background:linear-gradient(135deg,#00e5ff,#7c5cfc);-webkit-background-clip:text;-webkit-text-fill-color:transparent;">NeuralDocs</span> | |
| </div> | |
| <div style="font-family:'JetBrains Mono',monospace;font-size:0.6rem;letter-spacing:2px;color:#3d5a75;text-transform:uppercase;">Agentic RAG Β· Groq + LLaMA 3</div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown('<div class="sidebar-section-label">π Document</div>', unsafe_allow_html=True) | |
| uploaded_file = st.file_uploader("Upload PDF", type=["pdf"], label_visibility="collapsed") | |
| if uploaded_file is not None: | |
| if st.session_state.pdf_tool is None or st.session_state.pdf_name != uploaded_file.name: | |
| with tempfile.TemporaryDirectory() as tmp: | |
| path = os.path.join(tmp, uploaded_file.name) | |
| with open(path, "wb") as f: | |
| f.write(uploaded_file.getvalue()) | |
| with st.spinner("Indexing documentβ¦"): | |
| st.session_state.pdf_tool = DocumentSearchTool(file_path=path) | |
| st.session_state.pdf_name = uploaded_file.name | |
| st.session_state.messages = [] | |
| st.session_state.pending_followups = [] | |
| st.markdown(f'<div class="status-pill status-ready"><span class="status-dot"></span>{uploaded_file.name[:28]}{"β¦" if len(uploaded_file.name)>28 else ""}</div>', unsafe_allow_html=True) | |
| with st.expander("Preview PDF", expanded=False): | |
| display_pdf(uploaded_file.getvalue(), uploaded_file.name) | |
| else: | |
| st.markdown('<div class="status-pill status-waiting"><span class="status-dot pulse"></span>No document loaded</div>', unsafe_allow_html=True) | |
| st.markdown('<div class="sidebar-section-label">π Session Stats</div>', unsafe_allow_html=True) | |
| total = st.session_state.total_queries or 0 | |
| pdf_h = st.session_state.pdf_hits or 0 | |
| web_h = st.session_state.web_hits or 0 | |
| pdf_pct = int((pdf_h/total)*100) if total else 0 | |
| web_pct = int((web_h/total)*100) if total else 0 | |
| st.markdown(f""" | |
| <div class="metrics-row"> | |
| <div class="metric-card"><div class="metric-value">{total}</div><div class="metric-label">Queries</div></div> | |
| <div class="metric-card"><div class="metric-value" style="color:var(--success)">{pdf_pct}%</div><div class="metric-label">PDF hits</div></div> | |
| </div> | |
| <div class="metrics-row"> | |
| <div class="metric-card"><div class="metric-value" style="color:var(--accent2)">{web_pct}%</div><div class="metric-label">Web hits</div></div> | |
| <div class="metric-card"><div class="metric-value" style="color:var(--warning)">{len(st.session_state.messages)//2}</div><div class="metric-label">Exchanges</div></div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| if st.session_state.query_log: | |
| st.markdown('<div class="sidebar-section-label">π Query History</div>', unsafe_allow_html=True) | |
| for item in reversed(st.session_state.query_log[-6:]): | |
| src_icon = "π" if item["source"] == "web" else "π" | |
| st.markdown(f'<div class="history-item"><div class="history-q">{src_icon} {item["q"][:48]}{"β¦" if len(item["q"])>48 else ""}</div><div class="history-time">{item["ts"]}</div></div>', unsafe_allow_html=True) | |
| st.markdown('<div class="sidebar-section-label">β Controls</div>', unsafe_allow_html=True) | |
| col1, col2 = st.columns(2) | |
| with col1: st.button("Clear Chat", on_click=reset_chat, use_container_width=True) | |
| with col2: st.button("Full Reset", on_click=full_reset, use_container_width=True) | |
| # ββ Main ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown(""" | |
| <div class="app-header"> | |
| <div class="app-logo">π§ </div> | |
| <div> | |
| <div class="app-title">NeuralDocs</div> | |
| <div class="app-subtitle">Agentic RAG Β· PDF + Web Intelligence</div> | |
| </div> | |
| <div class="header-badge">GROQ Β· LLAMA 3</div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| if not st.session_state.messages: | |
| st.markdown(""" | |
| <div class="empty-state"> | |
| <div class="empty-icon">π§ </div> | |
| <div class="empty-text">Upload a PDF in the sidebar, then ask anything.<br>The agent searches your document first β then the web.</div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| for i, message in enumerate(st.session_state.messages): | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| if message["role"] == "assistant" and i == len(st.session_state.messages) - 1 and st.session_state.pending_followups: | |
| conf = estimate_confidence(message["content"]) | |
| src = message.get("source", "pdf") | |
| st.markdown(f""" | |
| <div class="answer-meta"> | |
| <span style="font-family:var(--font-mono);font-size:0.65rem;color:var(--accent);">{'π Web' if src=='web' else 'π PDF'}</span> | |
| <div class="confidence-bar-wrap" style="display:flex;align-items:center;gap:6px;font-family:var(--font-mono);font-size:0.65rem;color:var(--text-muted);"> | |
| <span>Confidence</span> | |
| <div class="confidence-bar"><div class="confidence-fill" style="width:{conf}%"></div></div> | |
| <span>{conf}%</span> | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown('<div style="font-family:var(--font-mono);font-size:0.6rem;color:var(--text-muted);margin:8px 0 4px;">π‘ FOLLOW-UP SUGGESTIONS</div>', unsafe_allow_html=True) | |
| cols = st.columns(len(st.session_state.pending_followups)) | |
| for j, (col, q) in enumerate(zip(cols, st.session_state.pending_followups)): | |
| with col: | |
| if st.button(q, key=f"followup_{i}_{j}", use_container_width=True): | |
| st.session_state.pending_prompt = q | |
| def handle_query(prompt): | |
| st.session_state.pending_followups = [] | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant"): | |
| with st.spinner("Researchingβ¦"): | |
| result, src = run_query(prompt) | |
| st.markdown(result) | |
| ts = datetime.now().strftime("%H:%M") | |
| st.session_state.total_queries += 1 | |
| if src == "pdf": st.session_state.pdf_hits += 1 | |
| else: st.session_state.web_hits += 1 | |
| st.session_state.query_log.append({"q": prompt, "ts": ts, "source": src}) | |
| st.session_state.messages.append({"role": "assistant", "content": result, "source": src}) | |
| st.session_state.pending_followups = generate_followups(prompt, result) | |
| st.rerun() | |
| if st.session_state.pending_prompt: | |
| prompt = st.session_state.pending_prompt | |
| st.session_state.pending_prompt = None | |
| handle_query(prompt) | |
| prompt = st.chat_input("Ask anything about your document or search the webβ¦") | |
| if prompt: | |
| handle_query(prompt) | |