Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from core.retriever import Retriever | |
| from core.graph import RAGAgent | |
| from core.podcast import PodcastGenerator | |
| from core.visualizer import KnowledgeGraphGenerator | |
| from core.summarizer import Summarizer | |
| st.set_page_config( | |
| page_title="AI Knowledge Assistant", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| st.markdown(""" | |
| <style> | |
| .main { background-color: #f8f9fa; } | |
| /* Typography */ | |
| h1, h2, h3, h4 { font-family: 'Helvetica Neue', 'Inter', sans-serif; color: #385A7C; } | |
| p, li { color: #424242; line-height: 1.6; } | |
| /* Hero Section */ | |
| .hero-title { | |
| font-size: 3.5rem; | |
| font-weight: 800; | |
| color: #385A7C; | |
| text-align: center; | |
| margin-bottom: 0.5rem; | |
| background: -webkit-linear-gradient(#4A6D8C, #385A7C); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| .hero-subtitle { | |
| font-size: 1.5rem; | |
| color: #607d8b; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| /* Section Headers */ | |
| .section-header { | |
| font-size: 1.8rem; | |
| font-weight: 700; | |
| color: #385A7C; | |
| margin-top: 2rem; | |
| margin-bottom: 1rem; | |
| border-left: 5px solid #4A6D8C; | |
| padding-left: 15px; | |
| } | |
| /* Card Style */ | |
| .stCard { | |
| background-color: #ffffff; | |
| padding: 24px; | |
| border-radius: 16px; | |
| box-shadow: 0 8px 20px rgba(56, 90, 124, 0.05); | |
| margin-bottom: 20px; | |
| border: 1px solid #e1e8ed; | |
| transition: transform 0.3s ease; | |
| } | |
| .stCard:hover { | |
| transform: translateY(-5px); | |
| box-shadow: 0 12px 30px rgba(56, 90, 124, 0.1); | |
| } | |
| /* Feature Badge */ | |
| .feature-badge { | |
| background-color: #eef2f6; | |
| color: #4A6D8C; | |
| padding: 4px 12px; | |
| border-radius: 20px; | |
| font-size: 0.8rem; | |
| font-weight: 700; | |
| text-transform: uppercase; | |
| margin-bottom: 10px; | |
| display: inline-block; | |
| } | |
| /* Button Styling */ | |
| div.stButton > button { | |
| border-radius: 30px !important; | |
| padding: 10px 25px !important; | |
| background-color: #4A6D8C !important; | |
| color: white !important; | |
| border: none !important; | |
| box-shadow: 0 4px 12px rgba(74, 109, 140, 0.2) !important; | |
| font-size: 1rem !important; | |
| font-weight: 700 !important; | |
| } | |
| div.stButton > button:hover { | |
| background-color: #385A7C !important; | |
| color: white !important; | |
| box-shadow: 0 6px 18px rgba(74, 109, 140, 0.3) !important; | |
| } | |
| /* Force white text for button labels */ | |
| div.stButton > button p { | |
| color: white !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Session State | |
| if "page" not in st.session_state: | |
| st.session_state.page = "home" | |
| if "agent" not in st.session_state: | |
| st.session_state.agent = None | |
| if "pdf_processor" not in st.session_state: | |
| st.session_state.pdf_processor = Retriever() | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [] | |
| if "full_text" not in st.session_state: | |
| st.session_state.full_text = "" | |
| if "uploader_key" not in st.session_state: | |
| st.session_state.uploader_key = 0 | |
| if "processed_files" not in st.session_state: | |
| st.session_state.processed_files = set() | |
| if "deep_summary" not in st.session_state: | |
| st.session_state.deep_summary = None | |
| if "graph_dot" not in st.session_state: | |
| st.session_state.graph_dot = None | |
| if "podcast_audio" not in st.session_state: | |
| st.session_state.podcast_audio = None | |
| def switch_page(page_name): | |
| st.session_state.page = page_name | |
| st.rerun() | |
| def show_home(): | |
| st.markdown("<h1 class='hero-title'>π AI Knowledge Assistant</h1>", unsafe_allow_html=True) | |
| st.markdown("<p class='hero-subtitle'>Transforming Complex Documents into Dynamic Multi-Modal Insights</p>", unsafe_allow_html=True) | |
| col_cta1, col_cta2, col_cta3 = st.columns([1, 1, 1]) | |
| with col_cta2: | |
| if st.button("π Launch Application", type="primary", width='stretch'): | |
| switch_page("app") | |
| st.markdown("---") | |
| col1, col2 = st.columns(2, gap="large") | |
| with col1: | |
| st.markdown("<div class='section-header'>π 1. Motivation: Secure & Efficient KM</div>", unsafe_allow_html=True) | |
| st.markdown(""" | |
| <div class="stCard"> | |
| <p><b>Secure & Efficient Knowledge Management</b></p> | |
| <ul> | |
| <li><b>Privacy & Data Sovereignty:</b> Handling sensitive or proprietary documents without uploading to public cloud ecosystems.</li> | |
| <li><b>Efficiency via SLMs:</b> Moving away from expensive, giant models towards cost-effective agents that run on edge/consumer hardware.</li> | |
| <li><b>Information Overload:</b> Addressing the massive volume of unstructured files with tools that are both smart and private.</li> | |
| </ul> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col2: | |
| st.markdown("<div class='section-header'>β 2. Problem: Cloud RAG Limitations</div>", unsafe_allow_html=True) | |
| st.markdown(""" | |
| <div class="stCard"> | |
| <p><b>The Limitations of Standard Cloud RAG</b></p> | |
| <ul> | |
| <li><b>Data Privacy Risks:</b> External, cloud-hosted Vector DBs force sensitive data to leave the user's control.</li> | |
| <li><b>Context Window Constraints:</b> Single-pass processing fails on long docs (1000 pages) without losing critical detail.</li> | |
| <li><b>Naive RAG Failures:</b> Basic retrieval lacks self-correction, leading to hallucinations even with large models.</li> | |
| </ul> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown("<div class='section-header'>π‘ 3. Versatile Multi-Agent Suite</div>", unsafe_allow_html=True) | |
| c1, c2, c3, c4 = st.columns(4, gap="small") | |
| with c1: | |
| st.markdown(""" | |
| <div class="stCard" style="min-height: 240px;"> | |
| <span class="feature-badge">Conversational</span> | |
| <h4>Reflective RAG</h4> | |
| <p style="font-size: 0.9rem;">A <b>LangGraph</b> state machine that retrieves and self-corrects via reasoning loops for grounded Q&A.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with c2: | |
| st.markdown(""" | |
| <div class="stCard" style="min-height: 240px;"> | |
| <span class="feature-badge">Synthesis</span> | |
| <h4>Deep Summary</h4> | |
| <p style="font-size: 0.9rem;">Utilizes <b>Map-Reduce</b> logic to distill long documents into high-density atomic facts and briefings.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with c3: | |
| st.markdown(""" | |
| <div class="stCard" style="min-height: 240px;"> | |
| <span class="feature-badge">Audio</span> | |
| <h4>AI Podcast</h4> | |
| <p style="font-size: 0.9rem;">Transforms facts into natural narration using <b>NVIDIA Riva TTS</b> technology.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with c4: | |
| st.markdown(""" | |
| <div class="stCard" style="min-height: 240px;"> | |
| <span class="feature-badge">Visual</span> | |
| <h4>Knowledge Graph</h4> | |
| <p style="font-size: 0.9rem;">Maps relationships from summaries into hierarchical, interactive <b>DOT visuals</b> for structural insight.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown("<div class='section-header'>ποΈ 4. System Architecture</div>", unsafe_allow_html=True) | |
| st.markdown(""" | |
| <div class="stCard"> | |
| <div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 10px; text-align: center;"> | |
| <div> | |
| <h4>π¨ Frontend</h4> | |
| <p style="font-size: 0.85rem; color: #666;">Streamlit Dashboard<br>Responsive UI Components<br>Multi-modal Displays</p> | |
| </div> | |
| <div style="border-left: 1px solid #eee;"> | |
| <h4>π§ Brain</h4> | |
| <p style="font-size: 0.85rem; color: #666;">LangChain / LangGraph<br>Agentic Workflows<br>Task Orchestration</p> | |
| </div> | |
| <div style="border-left: 1px solid #eee;"> | |
| <h4>πΎ Data</h4> | |
| <p style="font-size: 0.85rem; color: #666;">ChromaDB Vector Store<br>Persistent Metadata<br>Hierarchical Retrieval</p> | |
| </div> | |
| <div style="border-left: 1px solid #eee;"> | |
| <h4>𧬠Models</h4> | |
| <p style="font-size: 0.85rem; color: #666;">NVIDIA Nemotron-3 (Reasoning)<br>NVIDIA Nemotron Embed-1B (Vector)<br>NVIDIA Riva TTS (Audio)</p> | |
| </div> | |
| </div> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown("<div class='section-header'>βοΈ 5. Implementation Details</div>", unsafe_allow_html=True) | |
| tab_rag, tab_sum, tab_others = st.tabs(["π»Reflective RAG", "π Smart Summary", "π οΈ Tools & Visuals"]) | |
| with tab_rag: | |
| st.info("**Cyclic State Machine**") | |
| st.markdown(""" | |
| - Executes a reasoning loop: **Retrieve β Draft β Grade β Rewrite**. | |
| - Powered by **LangGraph** to ensure answers are strictly evidence-based. | |
| """) | |
| with tab_sum: | |
| st.info("**Synthesis Pipeline**") | |
| st.markdown(""" | |
| - Seamlessly handles ultra-long documents by chunking and parallel summarizing. | |
| - Provides the analytical foundation for deep-dive tools. | |
| """) | |
| with tab_others: | |
| st.info("**Multi-Modal Outputs**") | |
| st.markdown(""" | |
| - **Podcast:** Natural audio briefings using NVIDIA Riva TTS. | |
| - **Knowledge Graph:** Structural relationship mapping via DOT syntax. | |
| """) | |
| st.markdown("<br><br>", unsafe_allow_html=True) | |
| def ensure_deep_summary(): | |
| if "deep_summary" not in st.session_state: | |
| st.session_state.deep_summary = None | |
| if not st.session_state.deep_summary: | |
| if st.session_state.full_text: | |
| with st.spinner("Analyzing Document (Deep Summary)..."): | |
| mr = Summarizer() | |
| st.session_state.deep_summary = mr.generate_deep_summary(st.session_state.full_text) | |
| return st.session_state.deep_summary | |
| if hasattr(st, "dialog"): | |
| dialog_decorator = st.dialog | |
| elif hasattr(st, "experimental_dialog"): | |
| dialog_decorator = st.experimental_dialog | |
| else: | |
| def dialog_decorator(*args, **kwargs): | |
| def decorator(func): | |
| return func | |
| return decorator | |
| def view_summary_dialog(text): | |
| if not hasattr(st, "dialog") and not hasattr(st, "experimental_dialog"): | |
| st.info("### Deep Document Summary") | |
| st.markdown(text) | |
| def view_graph_dialog(dot_code): | |
| st.graphviz_chart(dot_code, width="stretch") | |
| def show_app(): | |
| # Sidebar: Clean, just for upload and nav | |
| with st.sidebar: | |
| if st.button("π Home"): | |
| switch_page("home") | |
| st.header("π Upload") | |
| # Upload Status Message | |
| if "upload_status" in st.session_state and st.session_state.upload_status: | |
| st.success(st.session_state.upload_status) | |
| uploaded_files = st.file_uploader("Upload PDF(s)", type="pdf", accept_multiple_files=True, key=f"uploader_{st.session_state.uploader_key}") | |
| if uploaded_files: | |
| new_files = [f for f in uploaded_files if f.name not in st.session_state.processed_files] | |
| if new_files: | |
| with st.spinner(f"Analyzing {len(new_files)} new file(s)..."): | |
| total_tokens = st.session_state.pdf_processor.process_pdf(new_files) | |
| st.session_state.full_text = st.session_state.pdf_processor.get_full_text() | |
| st.session_state.agent = RAGAgent(st.session_state.pdf_processor.get_retriever()) | |
| # Mark as processed | |
| for f in new_files: | |
| st.session_state.processed_files.add(f.name) | |
| st.session_state.upload_status = f"Successfully indexed ~{total_tokens:,} tokens from {len(new_files)} new file(s)." | |
| st.session_state.uploader_key += 1 | |
| st.rerun() | |
| if st.session_state.full_text: | |
| st.success("Analysis Ready") | |
| if st.button("π Reset / Clear All", type="primary"): | |
| st.session_state.pdf_processor = Retriever() | |
| st.session_state.agent = None | |
| st.session_state.messages = [] | |
| st.session_state.full_text = "" | |
| st.session_state.processed_files = set() | |
| st.session_state.upload_status = "" | |
| st.session_state.podcast_audio = None | |
| st.session_state.uploader_key += 1 | |
| st.rerun() | |
| col_chat, col_tools = st.columns([3, 1.3]) | |
| with col_chat: | |
| st.subheader("π¬ Chat") | |
| for msg in st.session_state.messages: | |
| with st.chat_message(msg["role"]): | |
| if "thoughts" in msg and msg["thoughts"]: | |
| with st.expander("βοΈ Reasoning Log", expanded=False): | |
| for log in msg["thoughts"]: | |
| st.write(log) | |
| st.markdown(msg["content"]) | |
| if prompt := st.chat_input("Ask about the document..."): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| with st.chat_message("assistant"): | |
| if st.session_state.agent: | |
| with st.status("Agent Reasoning...", expanded=True) as status: | |
| thoughts = [] | |
| def graph_callback(node_name, state): | |
| msg = "" | |
| if node_name == "retriever": | |
| msg = f"π **Retrieving** context for query: *'{state.get('current_query', '...')}'*" | |
| elif node_name == "generator": | |
| msg = "π§ **Generating** answer..." | |
| elif node_name == "reflector": | |
| score = state.get("reflection_score") | |
| if score == "yes": | |
| msg = "β **Reflection Passed**: Answer is grounded." | |
| else: | |
| msg = "β **Reflection Failed**: Hallucination/Irrelevance detected." | |
| elif node_name == "rewriter": | |
| msg = f"π **Rewriting Query** to improve results..." | |
| if msg: | |
| status.write(msg) | |
| thoughts.append(msg) | |
| result = st.session_state.agent.run(prompt, callback=graph_callback) | |
| status.update(label="Response Ready", state="complete", expanded=False) | |
| response = result["generation"] | |
| with st.expander("π Final Stats", expanded=False): | |
| st.write(f"**Reflected:** {result.get('reflection_score')} | **Total Iter:** {result.get('iterations')}") | |
| st.markdown(response) | |
| st.session_state.messages.append({ | |
| "role": "assistant", | |
| "content": response, | |
| "thoughts": thoughts | |
| }) | |
| else: | |
| st.warning("Please upload a PDF first.") | |
| with col_tools: | |
| st.subheader("π Tools") | |
| if st.session_state.full_text: | |
| with st.expander("π Summary", expanded=False): | |
| if not st.session_state.deep_summary: | |
| if st.button("Generate Deep Summary"): | |
| ensure_deep_summary() | |
| st.rerun() | |
| else: | |
| st.success("Summary Ready!") | |
| if st.button("π View Full Summary", type="primary", width='stretch'): | |
| view_summary_dialog(st.session_state.deep_summary) | |
| if st.button("π Regenerate"): | |
| st.session_state.deep_summary = None | |
| st.rerun() | |
| with st.expander("π§ Podcast", expanded=False): | |
| if not st.session_state.podcast_audio: | |
| if st.button("Generate Audio"): | |
| briefing = ensure_deep_summary() | |
| with st.spinner("Scripting & Synthesizing..."): | |
| p_gen = PodcastGenerator() | |
| script = p_gen.generate_audio_script(briefing) | |
| audio_path = p_gen.generate_audio_file(script) | |
| if audio_path: | |
| st.session_state.podcast_audio = audio_path | |
| st.rerun() | |
| else: | |
| st.error("Audio generation failed.") | |
| else: | |
| st.success("Podcast Ready!") | |
| st.audio(st.session_state.podcast_audio) | |
| if st.button("π Regenerate Podcast"): | |
| st.session_state.podcast_audio = None | |
| st.rerun() | |
| with st.expander("πΈοΈ Knowledge Graph", expanded=False): | |
| if not st.session_state.graph_dot: | |
| if st.button("Generate Graph"): | |
| summary_text = ensure_deep_summary() | |
| with st.spinner("Building Graph structure..."): | |
| kg_gen = KnowledgeGraphGenerator() | |
| raw_dot = kg_gen.generate_graph(summary_text) | |
| st.session_state.graph_dot = raw_dot | |
| st.rerun() | |
| else: | |
| st.success("Graph Ready!") | |
| if st.button("ποΈ View Knowledge Graph", type="primary", width='stretch'): | |
| view_graph_dialog(st.session_state.graph_dot) | |
| if st.button("π Regenerate Graph"): | |
| st.session_state.graph_dot = None | |
| st.rerun() | |
| else: | |
| st.info("Upload PDF to enable tools.") | |
| if st.session_state.page == "home": | |
| show_home() | |
| else: | |
| show_app() | |