import streamlit as st from dotenv import load_dotenv load_dotenv() from core.retriever import Retriever from core.graph import RAGAgent from core.podcast import PodcastGenerator from core.visualizer import KnowledgeGraphGenerator from core.summarizer import Summarizer st.set_page_config( page_title="AI Knowledge Assistant", page_icon="🎓", layout="wide", initial_sidebar_state="collapsed" ) st.markdown(""" """, unsafe_allow_html=True) # Session State if "page" not in st.session_state: st.session_state.page = "home" if "agent" not in st.session_state: st.session_state.agent = None if "pdf_processor" not in st.session_state: st.session_state.pdf_processor = Retriever() if "messages" not in st.session_state: st.session_state.messages = [] if "full_text" not in st.session_state: st.session_state.full_text = "" if "uploader_key" not in st.session_state: st.session_state.uploader_key = 0 if "processed_files" not in st.session_state: st.session_state.processed_files = set() if "deep_summary" not in st.session_state: st.session_state.deep_summary = None if "graph_dot" not in st.session_state: st.session_state.graph_dot = None if "podcast_audio" not in st.session_state: st.session_state.podcast_audio = None def switch_page(page_name): st.session_state.page = page_name st.rerun() def show_home(): st.markdown("

🎓 AI Knowledge Assistant

", unsafe_allow_html=True) st.markdown("

Transforming Complex Documents into Dynamic Multi-Modal Insights

", unsafe_allow_html=True) col_cta1, col_cta2, col_cta3 = st.columns([1, 1, 1]) with col_cta2: if st.button("🚀 Launch Application", type="primary", width='stretch'): switch_page("app") st.markdown("---") col1, col2 = st.columns(2, gap="large") with col1: st.markdown("

🌟 1. Motivation: Secure & Efficient KM

", unsafe_allow_html=True) st.markdown("""

Secure & Efficient Knowledge Management

Privacy & Data Sovereignty: Handling sensitive or proprietary documents without uploading to public cloud ecosystems.
Efficiency via SLMs: Moving away from expensive, giant models towards cost-effective agents that run on edge/consumer hardware.
Information Overload: Addressing the massive volume of unstructured files with tools that are both smart and private.

""", unsafe_allow_html=True) with col2: st.markdown("

❓ 2. Problem: Cloud RAG Limitations

", unsafe_allow_html=True) st.markdown("""

The Limitations of Standard Cloud RAG

Data Privacy Risks: External, cloud-hosted Vector DBs force sensitive data to leave the user's control.
Context Window Constraints: Single-pass processing fails on long docs (1000 pages) without losing critical detail.
Naive RAG Failures: Basic retrieval lacks self-correction, leading to hallucinations even with large models.

""", unsafe_allow_html=True) st.markdown("

💡 3. Versatile Multi-Agent Suite

", unsafe_allow_html=True) c1, c2, c3, c4 = st.columns(4, gap="small") with c1: st.markdown("""

Conversational

Reflective RAG

A LangGraph state machine that retrieves and self-corrects via reasoning loops for grounded Q&A.

""", unsafe_allow_html=True) with c2: st.markdown("""

Synthesis

Deep Summary

Utilizes Map-Reduce logic to distill long documents into high-density atomic facts and briefings.

""", unsafe_allow_html=True) with c3: st.markdown("""

Audio

AI Podcast

Transforms facts into natural narration using NVIDIA Riva TTS technology.

""", unsafe_allow_html=True) with c4: st.markdown("""

Visual

Knowledge Graph

Maps relationships from summaries into hierarchical, interactive DOT visuals for structural insight.

""", unsafe_allow_html=True) st.markdown("

🏗️ 4. System Architecture

", unsafe_allow_html=True) st.markdown("""

🎨 Frontend

Streamlit Dashboard
Responsive UI Components
Multi-modal Displays

🧠 Brain

LangChain / LangGraph
Agentic Workflows
Task Orchestration

💾 Data

ChromaDB Vector Store
Persistent Metadata
Hierarchical Retrieval

🧬 Models

NVIDIA Nemotron-3 (Reasoning)
NVIDIA Nemotron Embed-1B (Vector)
NVIDIA Riva TTS (Audio)

""", unsafe_allow_html=True) st.markdown("

⚙️ 5. Implementation Details

", unsafe_allow_html=True) tab_rag, tab_sum, tab_others = st.tabs(["💻Reflective RAG", "📄 Smart Summary", "🛠️ Tools & Visuals"]) with tab_rag: st.info("**Cyclic State Machine**") st.markdown(""" - Executes a reasoning loop: **Retrieve → Draft → Grade → Rewrite**. - Powered by **LangGraph** to ensure answers are strictly evidence-based. """) with tab_sum: st.info("**Synthesis Pipeline**") st.markdown(""" - Seamlessly handles ultra-long documents by chunking and parallel summarizing. - Provides the analytical foundation for deep-dive tools. """) with tab_others: st.info("**Multi-Modal Outputs**") st.markdown(""" - **Podcast:** Natural audio briefings using NVIDIA Riva TTS. - **Knowledge Graph:** Structural relationship mapping via DOT syntax. """) st.markdown("

", unsafe_allow_html=True) def ensure_deep_summary(): if "deep_summary" not in st.session_state: st.session_state.deep_summary = None if not st.session_state.deep_summary: if st.session_state.full_text: with st.spinner("Analyzing Document (Deep Summary)..."): mr = Summarizer() st.session_state.deep_summary = mr.generate_deep_summary(st.session_state.full_text) return st.session_state.deep_summary if hasattr(st, "dialog"): dialog_decorator = st.dialog elif hasattr(st, "experimental_dialog"): dialog_decorator = st.experimental_dialog else: def dialog_decorator(*args, **kwargs): def decorator(func): return func return decorator @dialog_decorator("Deep Document Summary", width="large") def view_summary_dialog(text): if not hasattr(st, "dialog") and not hasattr(st, "experimental_dialog"): st.info("### Deep Document Summary") st.markdown(text) @dialog_decorator("Knowledge Graph Visualization", width="large") def view_graph_dialog(dot_code): st.graphviz_chart(dot_code, width="stretch") def show_app(): # Sidebar: Clean, just for upload and nav with st.sidebar: if st.button("🏠 Home"): switch_page("home") st.header("📄 Upload") # Upload Status Message if "upload_status" in st.session_state and st.session_state.upload_status: st.success(st.session_state.upload_status) uploaded_files = st.file_uploader("Upload PDF(s)", type="pdf", accept_multiple_files=True, key=f"uploader_{st.session_state.uploader_key}") if uploaded_files: new_files = [f for f in uploaded_files if f.name not in st.session_state.processed_files] if new_files: with st.spinner(f"Analyzing {len(new_files)} new file(s)..."): total_tokens = st.session_state.pdf_processor.process_pdf(new_files) st.session_state.full_text = st.session_state.pdf_processor.get_full_text() st.session_state.agent = RAGAgent(st.session_state.pdf_processor.get_retriever()) # Mark as processed for f in new_files: st.session_state.processed_files.add(f.name) st.session_state.upload_status = f"Successfully indexed ~{total_tokens:,} tokens from {len(new_files)} new file(s)." st.session_state.uploader_key += 1 st.rerun() if st.session_state.full_text: st.success("Analysis Ready") if st.button("🔄 Reset / Clear All", type="primary"): st.session_state.pdf_processor = Retriever() st.session_state.agent = None st.session_state.messages = [] st.session_state.full_text = "" st.session_state.processed_files = set() st.session_state.upload_status = "" st.session_state.podcast_audio = None st.session_state.uploader_key += 1 st.rerun() col_chat, col_tools = st.columns([3, 1.3]) with col_chat: st.subheader("💬 Chat") for msg in st.session_state.messages: with st.chat_message(msg["role"]): if "thoughts" in msg and msg["thoughts"]: with st.expander("⛓️ Reasoning Log", expanded=False): for log in msg["thoughts"]: st.write(log) st.markdown(msg["content"]) if prompt := st.chat_input("Ask about the document..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): if st.session_state.agent: with st.status("Agent Reasoning...", expanded=True) as status: thoughts = [] def graph_callback(node_name, state): msg = "" if node_name == "retriever": msg = f"🔍 **Retrieving** context for query: *'{state.get('current_query', '...')}'*" elif node_name == "generator": msg = "🧠 **Generating** answer..." elif node_name == "reflector": score = state.get("reflection_score") if score == "yes": msg = "✅ **Reflection Passed**: Answer is grounded." else: msg = "❌ **Reflection Failed**: Hallucination/Irrelevance detected." elif node_name == "rewriter": msg = f"🔄 **Rewriting Query** to improve results..." if msg: status.write(msg) thoughts.append(msg) result = st.session_state.agent.run(prompt, callback=graph_callback) status.update(label="Response Ready", state="complete", expanded=False) response = result["generation"] with st.expander("📊 Final Stats", expanded=False): st.write(f"**Reflected:** {result.get('reflection_score')} | **Total Iter:** {result.get('iterations')}") st.markdown(response) st.session_state.messages.append({ "role": "assistant", "content": response, "thoughts": thoughts }) else: st.warning("Please upload a PDF first.") with col_tools: st.subheader("🛠 Tools") if st.session_state.full_text: with st.expander("📝 Summary", expanded=False): if not st.session_state.deep_summary: if st.button("Generate Deep Summary"): ensure_deep_summary() st.rerun() else: st.success("Summary Ready!") if st.button("📖 View Full Summary", type="primary", width='stretch'): view_summary_dialog(st.session_state.deep_summary) if st.button("🔄 Regenerate"): st.session_state.deep_summary = None st.rerun() with st.expander("🎧 Podcast", expanded=False): if not st.session_state.podcast_audio: if st.button("Generate Audio"): briefing = ensure_deep_summary() with st.spinner("Scripting & Synthesizing..."): p_gen = PodcastGenerator() script = p_gen.generate_audio_script(briefing) audio_path = p_gen.generate_audio_file(script) if audio_path: st.session_state.podcast_audio = audio_path st.rerun() else: st.error("Audio generation failed.") else: st.success("Podcast Ready!") st.audio(st.session_state.podcast_audio) if st.button("🔄 Regenerate Podcast"): st.session_state.podcast_audio = None st.rerun() with st.expander("🕸️ Knowledge Graph", expanded=False): if not st.session_state.graph_dot: if st.button("Generate Graph"): summary_text = ensure_deep_summary() with st.spinner("Building Graph structure..."): kg_gen = KnowledgeGraphGenerator() raw_dot = kg_gen.generate_graph(summary_text) st.session_state.graph_dot = raw_dot st.rerun() else: st.success("Graph Ready!") if st.button("👁️ View Knowledge Graph", type="primary", width='stretch'): view_graph_dialog(st.session_state.graph_dot) if st.button("🔄 Regenerate Graph"): st.session_state.graph_dot = None st.rerun() else: st.info("Upload PDF to enable tools.") if st.session_state.page == "home": show_home() else: show_app()