import streamlit as st
from dotenv import load_dotenv
load_dotenv()
from core.retriever import Retriever
from core.graph import RAGAgent
from core.podcast import PodcastGenerator
from core.visualizer import KnowledgeGraphGenerator
from core.summarizer import Summarizer
st.set_page_config(
page_title="AI Knowledge Assistant",
page_icon="π",
layout="wide",
initial_sidebar_state="collapsed"
)
st.markdown("""
""", unsafe_allow_html=True)
# Session State
if "page" not in st.session_state:
st.session_state.page = "home"
if "agent" not in st.session_state:
st.session_state.agent = None
if "pdf_processor" not in st.session_state:
st.session_state.pdf_processor = Retriever()
if "messages" not in st.session_state:
st.session_state.messages = []
if "full_text" not in st.session_state:
st.session_state.full_text = ""
if "uploader_key" not in st.session_state:
st.session_state.uploader_key = 0
if "processed_files" not in st.session_state:
st.session_state.processed_files = set()
if "deep_summary" not in st.session_state:
st.session_state.deep_summary = None
if "graph_dot" not in st.session_state:
st.session_state.graph_dot = None
if "podcast_audio" not in st.session_state:
st.session_state.podcast_audio = None
def switch_page(page_name):
st.session_state.page = page_name
st.rerun()
def show_home():
st.markdown("
π AI Knowledge Assistant
", unsafe_allow_html=True)
st.markdown("Transforming Complex Documents into Dynamic Multi-Modal Insights
", unsafe_allow_html=True)
col_cta1, col_cta2, col_cta3 = st.columns([1, 1, 1])
with col_cta2:
if st.button("π Launch Application", type="primary", width='stretch'):
switch_page("app")
st.markdown("---")
col1, col2 = st.columns(2, gap="large")
with col1:
st.markdown("", unsafe_allow_html=True)
st.markdown("""
Secure & Efficient Knowledge Management
- Privacy & Data Sovereignty: Handling sensitive or proprietary documents without uploading to public cloud ecosystems.
- Efficiency via SLMs: Moving away from expensive, giant models towards cost-effective agents that run on edge/consumer hardware.
- Information Overload: Addressing the massive volume of unstructured files with tools that are both smart and private.
""", unsafe_allow_html=True)
with col2:
st.markdown("", unsafe_allow_html=True)
st.markdown("""
The Limitations of Standard Cloud RAG
- Data Privacy Risks: External, cloud-hosted Vector DBs force sensitive data to leave the user's control.
- Context Window Constraints: Single-pass processing fails on long docs (1000 pages) without losing critical detail.
- Naive RAG Failures: Basic retrieval lacks self-correction, leading to hallucinations even with large models.
""", unsafe_allow_html=True)
st.markdown("", unsafe_allow_html=True)
c1, c2, c3, c4 = st.columns(4, gap="small")
with c1:
st.markdown("""
Conversational
Reflective RAG
A LangGraph state machine that retrieves and self-corrects via reasoning loops for grounded Q&A.
""", unsafe_allow_html=True)
with c2:
st.markdown("""
Synthesis
Deep Summary
Utilizes Map-Reduce logic to distill long documents into high-density atomic facts and briefings.
""", unsafe_allow_html=True)
with c3:
st.markdown("""
Audio
AI Podcast
Transforms facts into natural narration using NVIDIA Riva TTS technology.
""", unsafe_allow_html=True)
with c4:
st.markdown("""
Visual
Knowledge Graph
Maps relationships from summaries into hierarchical, interactive DOT visuals for structural insight.
""", unsafe_allow_html=True)
st.markdown("", unsafe_allow_html=True)
st.markdown("""
π¨ Frontend
Streamlit Dashboard
Responsive UI Components
Multi-modal Displays
π§ Brain
LangChain / LangGraph
Agentic Workflows
Task Orchestration
πΎ Data
ChromaDB Vector Store
Persistent Metadata
Hierarchical Retrieval
𧬠Models
NVIDIA Nemotron-3 (Reasoning)
NVIDIA Nemotron Embed-1B (Vector)
NVIDIA Riva TTS (Audio)
""", unsafe_allow_html=True)
st.markdown("", unsafe_allow_html=True)
tab_rag, tab_sum, tab_others = st.tabs(["π»Reflective RAG", "π Smart Summary", "π οΈ Tools & Visuals"])
with tab_rag:
st.info("**Cyclic State Machine**")
st.markdown("""
- Executes a reasoning loop: **Retrieve β Draft β Grade β Rewrite**.
- Powered by **LangGraph** to ensure answers are strictly evidence-based.
""")
with tab_sum:
st.info("**Synthesis Pipeline**")
st.markdown("""
- Seamlessly handles ultra-long documents by chunking and parallel summarizing.
- Provides the analytical foundation for deep-dive tools.
""")
with tab_others:
st.info("**Multi-Modal Outputs**")
st.markdown("""
- **Podcast:** Natural audio briefings using NVIDIA Riva TTS.
- **Knowledge Graph:** Structural relationship mapping via DOT syntax.
""")
st.markdown("
", unsafe_allow_html=True)
def ensure_deep_summary():
if "deep_summary" not in st.session_state:
st.session_state.deep_summary = None
if not st.session_state.deep_summary:
if st.session_state.full_text:
with st.spinner("Analyzing Document (Deep Summary)..."):
mr = Summarizer()
st.session_state.deep_summary = mr.generate_deep_summary(st.session_state.full_text)
return st.session_state.deep_summary
if hasattr(st, "dialog"):
dialog_decorator = st.dialog
elif hasattr(st, "experimental_dialog"):
dialog_decorator = st.experimental_dialog
else:
def dialog_decorator(*args, **kwargs):
def decorator(func):
return func
return decorator
@dialog_decorator("Deep Document Summary", width="large")
def view_summary_dialog(text):
if not hasattr(st, "dialog") and not hasattr(st, "experimental_dialog"):
st.info("### Deep Document Summary")
st.markdown(text)
@dialog_decorator("Knowledge Graph Visualization", width="large")
def view_graph_dialog(dot_code):
st.graphviz_chart(dot_code, width="stretch")
def show_app():
# Sidebar: Clean, just for upload and nav
with st.sidebar:
if st.button("π Home"):
switch_page("home")
st.header("π Upload")
# Upload Status Message
if "upload_status" in st.session_state and st.session_state.upload_status:
st.success(st.session_state.upload_status)
uploaded_files = st.file_uploader("Upload PDF(s)", type="pdf", accept_multiple_files=True, key=f"uploader_{st.session_state.uploader_key}")
if uploaded_files:
new_files = [f for f in uploaded_files if f.name not in st.session_state.processed_files]
if new_files:
with st.spinner(f"Analyzing {len(new_files)} new file(s)..."):
total_tokens = st.session_state.pdf_processor.process_pdf(new_files)
st.session_state.full_text = st.session_state.pdf_processor.get_full_text()
st.session_state.agent = RAGAgent(st.session_state.pdf_processor.get_retriever())
# Mark as processed
for f in new_files:
st.session_state.processed_files.add(f.name)
st.session_state.upload_status = f"Successfully indexed ~{total_tokens:,} tokens from {len(new_files)} new file(s)."
st.session_state.uploader_key += 1
st.rerun()
if st.session_state.full_text:
st.success("Analysis Ready")
if st.button("π Reset / Clear All", type="primary"):
st.session_state.pdf_processor = Retriever()
st.session_state.agent = None
st.session_state.messages = []
st.session_state.full_text = ""
st.session_state.processed_files = set()
st.session_state.upload_status = ""
st.session_state.podcast_audio = None
st.session_state.uploader_key += 1
st.rerun()
col_chat, col_tools = st.columns([3, 1.3])
with col_chat:
st.subheader("π¬ Chat")
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
if "thoughts" in msg and msg["thoughts"]:
with st.expander("βοΈ Reasoning Log", expanded=False):
for log in msg["thoughts"]:
st.write(log)
st.markdown(msg["content"])
if prompt := st.chat_input("Ask about the document..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
if st.session_state.agent:
with st.status("Agent Reasoning...", expanded=True) as status:
thoughts = []
def graph_callback(node_name, state):
msg = ""
if node_name == "retriever":
msg = f"π **Retrieving** context for query: *'{state.get('current_query', '...')}'*"
elif node_name == "generator":
msg = "π§ **Generating** answer..."
elif node_name == "reflector":
score = state.get("reflection_score")
if score == "yes":
msg = "β
**Reflection Passed**: Answer is grounded."
else:
msg = "β **Reflection Failed**: Hallucination/Irrelevance detected."
elif node_name == "rewriter":
msg = f"π **Rewriting Query** to improve results..."
if msg:
status.write(msg)
thoughts.append(msg)
result = st.session_state.agent.run(prompt, callback=graph_callback)
status.update(label="Response Ready", state="complete", expanded=False)
response = result["generation"]
with st.expander("π Final Stats", expanded=False):
st.write(f"**Reflected:** {result.get('reflection_score')} | **Total Iter:** {result.get('iterations')}")
st.markdown(response)
st.session_state.messages.append({
"role": "assistant",
"content": response,
"thoughts": thoughts
})
else:
st.warning("Please upload a PDF first.")
with col_tools:
st.subheader("π Tools")
if st.session_state.full_text:
with st.expander("π Summary", expanded=False):
if not st.session_state.deep_summary:
if st.button("Generate Deep Summary"):
ensure_deep_summary()
st.rerun()
else:
st.success("Summary Ready!")
if st.button("π View Full Summary", type="primary", width='stretch'):
view_summary_dialog(st.session_state.deep_summary)
if st.button("π Regenerate"):
st.session_state.deep_summary = None
st.rerun()
with st.expander("π§ Podcast", expanded=False):
if not st.session_state.podcast_audio:
if st.button("Generate Audio"):
briefing = ensure_deep_summary()
with st.spinner("Scripting & Synthesizing..."):
p_gen = PodcastGenerator()
script = p_gen.generate_audio_script(briefing)
audio_path = p_gen.generate_audio_file(script)
if audio_path:
st.session_state.podcast_audio = audio_path
st.rerun()
else:
st.error("Audio generation failed.")
else:
st.success("Podcast Ready!")
st.audio(st.session_state.podcast_audio)
if st.button("π Regenerate Podcast"):
st.session_state.podcast_audio = None
st.rerun()
with st.expander("πΈοΈ Knowledge Graph", expanded=False):
if not st.session_state.graph_dot:
if st.button("Generate Graph"):
summary_text = ensure_deep_summary()
with st.spinner("Building Graph structure..."):
kg_gen = KnowledgeGraphGenerator()
raw_dot = kg_gen.generate_graph(summary_text)
st.session_state.graph_dot = raw_dot
st.rerun()
else:
st.success("Graph Ready!")
if st.button("ποΈ View Knowledge Graph", type="primary", width='stretch'):
view_graph_dialog(st.session_state.graph_dot)
if st.button("π Regenerate Graph"):
st.session_state.graph_dot = None
st.rerun()
else:
st.info("Upload PDF to enable tools.")
if st.session_state.page == "home":
show_home()
else:
show_app()