import streamlit as st import os import traceback import pandas as pd import json import time from dotenv import load_dotenv from agent.llm_client import GroqClient from agent.agent import LlamaPDFAgent as PDFAgent, AgentRateLimitError # Load environment variables load_dotenv() # Page configuration st.set_page_config( page_title="Naresh AI DocuPulse Submission - PDF Intelligence", page_icon="📄", layout="wide", ) # Custom Styling for a Premium Dark Mode (Consistent with Challenge A) st.markdown(""" """, unsafe_allow_html=True) # Initialize Session State if "pdf_agent" not in st.session_state: st.session_state.pdf_agent = None if "messages" not in st.session_state: st.session_state.messages = [] if "deep_insights" not in st.session_state: st.session_state.deep_insights = {} # Sidebar with st.sidebar: st.markdown('
NARESH AI
', unsafe_allow_html=True) st.title("Settings") # API Key Input groq_api_key = st.text_input("Groq API Key", type="password", value=os.getenv("GROQ_API_KEY", "")) # Dynamic Model Fetching available_models = ["meta-llama/llama-4-scout-17b-16e-instruct", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"] if groq_api_key: try: temp_client = GroqClient(api_key=groq_api_key) fetched_models = temp_client.list_models() if fetched_models: available_models = fetched_models except Exception: pass model_choice = st.selectbox( "Model Architecture", available_models, index=0 if "meta-llama/llama-4-scout-17b-16e-instruct" not in available_models else available_models.index("meta-llama/llama-4-scout-17b-16e-instruct") ) st.divider() st.markdown("### 🗂️ Document Library") # Initialize agent if not exist (for library access) if "pdf_agent" in st.session_state and st.session_state.pdf_agent: if not hasattr(st.session_state.pdf_agent, "get_library"): st.session_state.pdf_agent = None # Clear stale object if not st.session_state.pdf_agent: from agent.agent import LlamaPDFAgent as PDFAgent st.session_state.pdf_agent = PDFAgent(api_key=groq_api_key or os.getenv("GROQ_API_KEY"), model=model_choice) library = st.session_state.pdf_agent.get_library() if not library: st.caption("No documents in library.") else: for doc in library: col1, col2 = st.columns([0.8, 0.2]) with col1: st.markdown(f"**{doc['filename']}**") with col2: if st.button("🗑️", key=f"del_{doc['hash']}", help="Delete vectors"): if st.session_state.pdf_agent.delete_document(doc['hash']): st.session_state.pdf_agent = None # Force re-init if active one deleted st.rerun() st.info("To switch document, simply upload it again. It will load instantly from the library.") st.divider() st.markdown("### Document Controls") if st.button("Reset Session"): st.session_state.pdf_agent = None st.session_state.messages = [] st.session_state.deep_insights = {} st.rerun() st.divider() st.markdown("### Profile") st.write("**Built by:** Naresh Kumar Lahajal") st.write("**Role:** GenAI Enthusiast") st.info("High-speed PDF intelligence powered by Groq and FastEmbed.") # Header st.title("Naresh AI DocuPulse - Submission") st.subheader("Challenge B: PDF RAG & Summarization") # File Upload uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"]) if uploaded_file and (st.session_state.pdf_agent is None or uploaded_file.name != st.session_state.get("last_uploaded_file")): with st.status("Ingesting document and indexing knowledge...", expanded=True) as status: try: agent = PDFAgent(api_key=groq_api_key, model=model_choice) status_msg = agent.ingest_pdf(uploaded_file) st.session_state.pdf_agent = agent st.session_state.last_uploaded_file = uploaded_file.name # Sync tables for explorer st.session_state.extracted_tables = agent.tables # Auto-Clear History on New Upload st.session_state.messages = [] st.session_state.deep_insights = {} status.update(label=f"✅ {status_msg}", state="complete", expanded=False) st.toast("Intelligence Engine Initialized", icon="🧠") except Exception as e: st.error(f"Critical Ingestion Error: {e}") with st.expander("Show Traceback"): st.code(traceback.format_exc()) # Helper for Exact Backoff def run_with_exact_backoff(func, *args, **kwargs): """ Runs a function and catches AgentRateLimitError to perform a precise UI countdown retry. """ max_attempts = 3 for attempt in range(max_attempts): try: return func(*args, **kwargs) except AgentRateLimitError as e: if attempt == max_attempts - 1: st.error(f"Failed after {max_attempts} attempts due to Persistent Rate Limits. Please wait a few minutes.") raise e # Precise wait + 1s buffer wait_time = int(e.wait_time) + 1 st.toast(f"Rate Limit Hit! Waiting {wait_time}s to retry...", icon="⏳") # Visual Countdown placeholder = st.empty() for remaining in range(wait_time, 0, -1): placeholder.warning(f"⚠️ API Cooldown: Retrying in {remaining} seconds...") time.sleep(1) placeholder.empty() return None if st.session_state.pdf_agent: # Action Tabs tab1, tab2, tab3, tab4 = st.tabs(["💬 Ask Questions", "📝 Auto-Summary", "🧠 Deep Intelligence", "📋 Table Explorer"]) with tab1: st.markdown("### 💬 Document Conversation") st.caption("Ask questions about the document and maintain a conversation thread.") # Display Chat History for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if "sources" in message and message["sources"]: with st.expander("🔗 Sources & Citations", expanded=False): for i, src in enumerate(message["sources"]): page_text = f"Page {src['page']}" if src['page'] else "Unknown Page" st.markdown(f"**[{i+1}] {page_text}**") st.caption(f"_{src['text']}_") st.divider() # Chat Input if prompt := st.chat_input("What would you like to know?"): # Add user message to history st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Generate AI response with st.chat_message("assistant"): with st.spinner("Analyzing document context..."): response_data = run_with_exact_backoff(st.session_state.pdf_agent.answer_question, prompt) if response_data: # Use st.write_stream for typing effect answer = st.write_stream(response_data['answer_gen']) sources = response_data.get("sources", []) if sources: with st.expander("🔗 Sources & Citations", expanded=False): for i, src in enumerate(sources): page_text = f"Page {src['page']}" if src['page'] else "Unknown Page" st.markdown(f"**[{i+1}] {page_text}**") st.caption(f"_{src['text']}_") st.divider() # Add assistant response to history st.session_state.messages.append({ "role": "assistant", "content": answer, "sources": sources }) with tab2: if st.button("Generate Executive Summary"): with st.spinner("Synthesizing document overview..."): streaming_response = run_with_exact_backoff(st.session_state.pdf_agent.summarize_document) if streaming_response: st.markdown('
', unsafe_allow_html=True) st.markdown("### 📝 Document Summary") st.write_stream(streaming_response.response_gen) st.markdown('
', unsafe_allow_html=True) with tab3: st.markdown("### 🚀 Strategic Deep Analysis") st.info("This mode uses multi-stage recursive retrieval to extract deep strategic insights and KPIs.") if st.button("Run Deep Intelligence Scan"): with st.status("Analyzing document layers...", expanded=True) as status: st.write("🔍 Extracting Strategic Vision...") insights = run_with_exact_backoff(st.session_state.pdf_agent.get_deep_insights) if insights: st.session_state.deep_insights = insights # Fetch KPI visualization data st.write("📊 Generating Visual Analytics...") viz_data = run_with_exact_backoff(st.session_state.pdf_agent.get_kpi_viz_data) st.session_state.kpi_viz_data = viz_data status.update(label="✅ Deep Analysis Complete", state="complete", expanded=False) else: status.update(label="❌ Failed after retries", state="error", expanded=False) if st.session_state.deep_insights: insights = st.session_state.deep_insights # 1. Strategic Vision st.markdown('
', unsafe_allow_html=True) st.markdown("#### 🎯 Strategic Vision") st.write(insights.get("strategic_vision", "N/A")) st.markdown('
', unsafe_allow_html=True) col1, col2 = st.columns(2) with col1: # 2. Key Metrics st.markdown("#### 📊 Key Performance Indicators") metrics_text = insights.get("key_metrics", "") st.markdown(metrics_text if metrics_text else "No metrics extracted.") with col2: # 3. Risks st.markdown("#### ⚠️ Risks & Challenges") risks_text = insights.get("risks_and_challenges", "") st.markdown(risks_text if risks_text else "No risks identified.") # Visual Dashboard Section if st.session_state.get("kpi_viz_data"): st.divider() st.markdown("#### 📈 Key Trends & Metrics") viz_df = pd.DataFrame(st.session_state.kpi_viz_data) # Heuristic for chart type if any("year" in str(l).lower() or "q1" in str(l).lower() or "q2" in str(l).lower() or "q3" in str(l).lower() or "q4" in str(l).lower() for l in viz_df['label']): st.line_chart(viz_df.set_index('label'), color="#3b82f6") st.caption("Auto-detected Time Series data.") else: st.bar_chart(viz_df.set_index('label'), color="#60a5fa") st.caption("Bar chart representation of extracted KPIs.") # 4. SWOT Analysis st.divider() st.markdown("#### 🛠️ Automated SWOT Analysis") swot_raw = insights.get("swot_analysis", "{}") try: # Attempt to clean potential markdown artifacts around JSON if "```json" in swot_raw: swot_raw = swot_raw.split("```json")[1].split("```")[0].strip() elif "{" in swot_raw: swot_raw = "{" + swot_raw.split("{", 1)[1].rsplit("}", 1)[0] + "}" swot_data = json.loads(swot_raw) # Display SWOT in a grid s_col1, s_col2 = st.columns(2) with s_col1: st.success(f"**Strengths**\n\n{swot_data.get('S', 'N/A')}") st.info(f"**Opportunities**\n\n{swot_data.get('O', 'N/A')}") with s_col2: st.warning(f"**Weaknesses**\n\n{swot_data.get('W', 'N/A')}") st.error(f"**Threats**\n\n{swot_data.get('T', 'N/A')}") except Exception as e: st.write("Raw SWOT Insight:") st.write(swot_raw) # Report Export st.divider() report_md = f"""# Executive Intelligence Report: {st.session_state.last_uploaded_file} ## 🎯 Strategic Vision {insights.get('strategic_vision', 'N/A')} ## 📊 Key Performance Indicators {insights.get('key_metrics', 'N/A')} ## ⚠️ Risks & Challenges {insights.get('risks_and_challenges', 'N/A')} ## 🛠️ SWOT Analysis ### Strengths {swot_data.get('S', 'N/A') if 'swot_data' in locals() else 'N/A'} ### Weaknesses {swot_data.get('W', 'N/A') if 'swot_data' in locals() else 'N/A'} ### Opportunities {swot_data.get('O', 'N/A') if 'swot_data' in locals() else 'N/A'} ### Threats {swot_data.get('T', 'N/A') if 'swot_data' in locals() else 'N/A'} --- *Report generated by Naresh AI DocuPulse* """ st.download_button( label="📥 Download Executive Intelligence Report", data=report_md, file_name=f"Intelligence_Report_{st.session_state.last_uploaded_file.replace('.pdf', '')}.md", mime="text/markdown" ) with tab4: st.markdown("### 📋 PDF Table Explorer") st.info("Direct extraction of tabular data from the document. Select a table to explore.") tables = st.session_state.pdf_agent.tables if not tables: st.warning("No structured tables were detected in the document.") else: table_labels = [f"{t['label']} (Page Grounded)" for t in tables] selected_label = st.selectbox("Select Table", table_labels) # Find the selected table selected_idx = table_labels.index(selected_label) selected_table = tables[selected_idx] st.markdown(f"#### {selected_table['label']}") st.dataframe(selected_table['df'], width="stretch") # Download as CSV csv = selected_table['df'].to_csv(index=False).encode('utf-8') st.download_button( label=f"📥 Download {selected_table['label']} as CSV", data=csv, file_name=f"{selected_table['label'].replace(' ', '_')}.csv", mime="text/csv" ) else: st.info("Please upload a PDF document to begin analysis.") # Footer st.divider() st.markdown( """
© 2026 Naresh Kumar Lahajal. All Rights Reserved.
Powered by Groq and Retrieval-Augmented Generation
""", unsafe_allow_html=True )