Spaces:

NEXAS
/

challenge-b

Running

File size: 18,582 Bytes

import streamlit as st
import os
import traceback
import pandas as pd
import json
import time

from dotenv import load_dotenv
from agent.llm_client import GroqClient
from agent.agent import LlamaPDFAgent as PDFAgent, AgentRateLimitError

# Load environment variables

load_dotenv()

# Page configuration
st.set_page_config(
    page_title="Naresh AI DocuPulse Submission - PDF Intelligence",
    page_icon="📄",
    layout="wide",
)

# Custom Styling for a Premium Dark Mode (Consistent with Challenge A)
st.markdown("""
<style>
    /* Main container styling - Deep Dark Gradient */
    .stApp {
        background: radial-gradient(circle at top left, #1e293b 0%, #0f172a 100%) !important;
        color: #f1f5f9 !important;
    }
    
    /* Header and Title styling - Neon Blue */
    h1 {
        color: #60a5fa !important;
        font-family: 'Outfit', sans-serif;
        font-weight: 800 !important;
        letter-spacing: -0.05rem;
        text-shadow: 0 0 20px rgba(96, 165, 250, 0.3);
    }
    
    h3 {
        color: #94a3b8 !important;
        font-weight: 400 !important;
    }

    /* Input styling - Darker Glass */
    .stTextInput>div>div>input {
        background-color: rgba(30, 41, 59, 0.7) !important;
        color: white !important;
        border: 1px solid rgba(96, 165, 250, 0.5) !important;
        border-radius: 12px !important;
        padding: 12px 20px !important;
        font-size: 1.1rem !important;
    }
    
    /* Button styling - Glowing Blue */
    .stButton>button {
        background: linear-gradient(90deg, #2563eb 0%, #3b82f6 100%) !important;
        color: white !important;
        border: none !important;
        border-radius: 12px !important;
        padding: 15px 30px !important;
        font-weight: 700 !important;
        font-size: 1.1rem !important;
        transition: all 0.3s ease !important;
        box-shadow: 0 0 15px rgba(37, 99, 235, 0.4) !important;
        width: 100% !important;
    }
    
    .stButton>button:hover {
        transform: translateY(-2px) !important;
        box-shadow: 0 0 30px rgba(59, 130, 246, 0.6) !important;
    }

    /* Result Card styling - Dark Inset */
    .answer-container {
        background-color: rgba(30, 41, 59, 0.5);
        padding: 30px;
        border-radius: 20px;
        backdrop-filter: blur(20px);
        border: 1px solid rgba(255, 255, 255, 0.1);
        box-shadow: inset 0 0 20px rgba(0, 0, 0, 0.2);
        border-left: 8px solid #2563eb;
        margin-top: 25px;
    }
    
    /* Sidebar Dark Glass */
    section[data-testid="stSidebar"] {
        background-color: rgba(15, 23, 42, 0.95) !important;
        backdrop-filter: blur(20px) !important;
        border-right: 1px solid rgba(255, 255, 255, 0.1) !important;
    }
    
    .brand-text {
        font-size: 1.5rem;
        font-weight: 900;
        background: linear-gradient(90deg, #60a5fa, #3b82f6);
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        margin-bottom: 20px;
    }
    
    /* Standard Text Color Fixes */
    .stMarkdown, p, li {
        color: #cbd5e1 !important;
    }
    
    strong {
        color: #f1f5f9 !important;
    }
</style>
""", unsafe_allow_html=True)

# Initialize Session State
if "pdf_agent" not in st.session_state:
    st.session_state.pdf_agent = None
if "messages" not in st.session_state:
    st.session_state.messages = []
if "deep_insights" not in st.session_state:
    st.session_state.deep_insights = {}


# Sidebar
with st.sidebar:
    st.markdown('<div class="brand-text">NARESH AI</div>', unsafe_allow_html=True)
    st.title("Settings")
    
    # API Key Input
    groq_api_key = st.text_input("Groq API Key", type="password", value=os.getenv("GROQ_API_KEY", ""))
    
    # Dynamic Model Fetching
    available_models = ["meta-llama/llama-4-scout-17b-16e-instruct", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]
    if groq_api_key:
        try:
            temp_client = GroqClient(api_key=groq_api_key)
            fetched_models = temp_client.list_models()
            if fetched_models:
                available_models = fetched_models
        except Exception:
            pass
            
    model_choice = st.selectbox(
        "Model Architecture",
        available_models,
        index=0 if "meta-llama/llama-4-scout-17b-16e-instruct" not in available_models else available_models.index("meta-llama/llama-4-scout-17b-16e-instruct")
    )

    
    st.divider()
    st.markdown("### 🗂️ Document Library")
    
    # Initialize agent if not exist (for library access)
    if "pdf_agent" in st.session_state and st.session_state.pdf_agent:
        if not hasattr(st.session_state.pdf_agent, "get_library"):
            st.session_state.pdf_agent = None # Clear stale object
            
    if not st.session_state.pdf_agent:
        from agent.agent import LlamaPDFAgent as PDFAgent
        st.session_state.pdf_agent = PDFAgent(api_key=groq_api_key or os.getenv("GROQ_API_KEY"), model=model_choice)
    
    library = st.session_state.pdf_agent.get_library()
    if not library:
        st.caption("No documents in library.")
    else:
        for doc in library:
            col1, col2 = st.columns([0.8, 0.2])
            with col1:
                st.markdown(f"**{doc['filename']}**")
            with col2:
                if st.button("🗑️", key=f"del_{doc['hash']}", help="Delete vectors"):
                    if st.session_state.pdf_agent.delete_document(doc['hash']):
                        st.session_state.pdf_agent = None # Force re-init if active one deleted
                        st.rerun()
        st.info("To switch document, simply upload it again. It will load instantly from the library.")

    st.divider()
    st.markdown("### Document Controls")
    if st.button("Reset Session"):
        st.session_state.pdf_agent = None
        st.session_state.messages = []
        st.session_state.deep_insights = {}
        st.rerun()



    st.divider()
    st.markdown("### Profile")
    st.write("**Built by:** Naresh Kumar Lahajal")
    st.write("**Role:** GenAI Enthusiast")
    st.info("High-speed PDF intelligence powered by Groq and FastEmbed.")

# Header
st.title("Naresh AI DocuPulse - Submission")
st.subheader("Challenge B: PDF RAG & Summarization")

# File Upload
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])

if uploaded_file and (st.session_state.pdf_agent is None or uploaded_file.name != st.session_state.get("last_uploaded_file")):
    with st.status("Ingesting document and indexing knowledge...", expanded=True) as status:
        try:
            agent = PDFAgent(api_key=groq_api_key, model=model_choice)
            status_msg = agent.ingest_pdf(uploaded_file)
            st.session_state.pdf_agent = agent
            st.session_state.last_uploaded_file = uploaded_file.name
            # Sync tables for explorer
            st.session_state.extracted_tables = agent.tables
            # Auto-Clear History on New Upload
            st.session_state.messages = []
            st.session_state.deep_insights = {}
            status.update(label=f"✅ {status_msg}", state="complete", expanded=False)
            st.toast("Intelligence Engine Initialized", icon="🧠")

        except Exception as e:
            st.error(f"Critical Ingestion Error: {e}")
            with st.expander("Show Traceback"):
                st.code(traceback.format_exc())


# Helper for Exact Backoff
def run_with_exact_backoff(func, *args, **kwargs):
    """
    Runs a function and catches AgentRateLimitError to perform a precise UI countdown retry.
    """
    max_attempts = 3
    for attempt in range(max_attempts):
        try:
            return func(*args, **kwargs)
        except AgentRateLimitError as e:
            if attempt == max_attempts - 1:
                st.error(f"Failed after {max_attempts} attempts due to Persistent Rate Limits. Please wait a few minutes.")
                raise e
            
            # Precise wait + 1s buffer
            wait_time = int(e.wait_time) + 1
            st.toast(f"Rate Limit Hit! Waiting {wait_time}s to retry...", icon="⏳")
            
            # Visual Countdown
            placeholder = st.empty()
            for remaining in range(wait_time, 0, -1):
                placeholder.warning(f"⚠️ API Cooldown: Retrying in {remaining} seconds...")
                time.sleep(1)
            placeholder.empty()
    return None

if st.session_state.pdf_agent:

    # Action Tabs
    tab1, tab2, tab3, tab4 = st.tabs(["💬 Ask Questions", "📝 Auto-Summary", "🧠 Deep Intelligence", "📋 Table Explorer"])

    
    with tab1:
        st.markdown("### 💬 Document Conversation")
        st.caption("Ask questions about the document and maintain a conversation thread.")
        
        # Display Chat History
        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])
                if "sources" in message and message["sources"]:
                    with st.expander("🔗 Sources & Citations", expanded=False):
                        for i, src in enumerate(message["sources"]):
                            page_text = f"Page {src['page']}" if src['page'] else "Unknown Page"
                            st.markdown(f"**[{i+1}] {page_text}**")
                            st.caption(f"_{src['text']}_")
                            st.divider()

        # Chat Input
        if prompt := st.chat_input("What would you like to know?"):
            # Add user message to history
            st.session_state.messages.append({"role": "user", "content": prompt})
            with st.chat_message("user"):
                st.markdown(prompt)

            # Generate AI response
            with st.chat_message("assistant"):
                with st.spinner("Analyzing document context..."):
                    response_data = run_with_exact_backoff(st.session_state.pdf_agent.answer_question, prompt)
                    if response_data:
                        # Use st.write_stream for typing effect
                        answer = st.write_stream(response_data['answer_gen'])
                        sources = response_data.get("sources", [])
                        
                        if sources:
                            with st.expander("🔗 Sources & Citations", expanded=False):
                                for i, src in enumerate(sources):
                                    page_text = f"Page {src['page']}" if src['page'] else "Unknown Page"
                                    st.markdown(f"**[{i+1}] {page_text}**")
                                    st.caption(f"_{src['text']}_")
                                    st.divider()
                        
                        # Add assistant response to history
                        st.session_state.messages.append({
                            "role": "assistant", 
                            "content": answer,
                            "sources": sources
                        })




                
    with tab2:
        if st.button("Generate Executive Summary"):
            with st.spinner("Synthesizing document overview..."):
                streaming_response = run_with_exact_backoff(st.session_state.pdf_agent.summarize_document)
                if streaming_response:
                    st.markdown('<div class="answer-container" style="border-left: 8px solid #60a5fa;">', unsafe_allow_html=True)
                    st.markdown("### 📝 Document Summary")
                    st.write_stream(streaming_response.response_gen)
                    st.markdown('</div>', unsafe_allow_html=True)



    with tab3:
        st.markdown("### 🚀 Strategic Deep Analysis")
        st.info("This mode uses multi-stage recursive retrieval to extract deep strategic insights and KPIs.")
        
        if st.button("Run Deep Intelligence Scan"):
            with st.status("Analyzing document layers...", expanded=True) as status:
                st.write("🔍 Extracting Strategic Vision...")
                insights = run_with_exact_backoff(st.session_state.pdf_agent.get_deep_insights)
                if insights:
                    st.session_state.deep_insights = insights
                    
                    # Fetch KPI visualization data
                    st.write("📊 Generating Visual Analytics...")
                    viz_data = run_with_exact_backoff(st.session_state.pdf_agent.get_kpi_viz_data)
                    st.session_state.kpi_viz_data = viz_data
                    
                    status.update(label="✅ Deep Analysis Complete", state="complete", expanded=False)
                else:
                    status.update(label="❌ Failed after retries", state="error", expanded=False)


            
        if st.session_state.deep_insights:
            insights = st.session_state.deep_insights
            
            # 1. Strategic Vision
            st.markdown('<div class="answer-container" style="border-left: 8px solid #8b5cf6;">', unsafe_allow_html=True)
            st.markdown("#### 🎯 Strategic Vision")
            st.write(insights.get("strategic_vision", "N/A"))
            st.markdown('</div>', unsafe_allow_html=True)
            
            col1, col2 = st.columns(2)
            
            with col1:
                # 2. Key Metrics
                st.markdown("#### 📊 Key Performance Indicators")
                metrics_text = insights.get("key_metrics", "")
                st.markdown(metrics_text if metrics_text else "No metrics extracted.")
                
            with col2:
                # 3. Risks
                st.markdown("#### ⚠️ Risks & Challenges")
                risks_text = insights.get("risks_and_challenges", "")
                st.markdown(risks_text if risks_text else "No risks identified.")
            
            # Visual Dashboard Section
            if st.session_state.get("kpi_viz_data"):
                st.divider()
                st.markdown("#### 📈 Key Trends & Metrics")
                viz_df = pd.DataFrame(st.session_state.kpi_viz_data)
                
                # Heuristic for chart type
                if any("year" in str(l).lower() or "q1" in str(l).lower() or "q2" in str(l).lower() or "q3" in str(l).lower() or "q4" in str(l).lower() for l in viz_df['label']):
                    st.line_chart(viz_df.set_index('label'), color="#3b82f6")
                    st.caption("Auto-detected Time Series data.")
                else:
                    st.bar_chart(viz_df.set_index('label'), color="#60a5fa")
                    st.caption("Bar chart representation of extracted KPIs.")

            # 4. SWOT Analysis

            st.divider()
            st.markdown("#### 🛠️ Automated SWOT Analysis")
            swot_raw = insights.get("swot_analysis", "{}")
            try:
                # Attempt to clean potential markdown artifacts around JSON
                if "```json" in swot_raw:
                    swot_raw = swot_raw.split("```json")[1].split("```")[0].strip()
                elif "{" in swot_raw:
                    swot_raw = "{" + swot_raw.split("{", 1)[1].rsplit("}", 1)[0] + "}"
                
                swot_data = json.loads(swot_raw)
                
                # Display SWOT in a grid
                s_col1, s_col2 = st.columns(2)
                with s_col1:
                    st.success(f"**Strengths**\n\n{swot_data.get('S', 'N/A')}")
                    st.info(f"**Opportunities**\n\n{swot_data.get('O', 'N/A')}")
                with s_col2:
                    st.warning(f"**Weaknesses**\n\n{swot_data.get('W', 'N/A')}")
                    st.error(f"**Threats**\n\n{swot_data.get('T', 'N/A')}")
            except Exception as e:
                st.write("Raw SWOT Insight:")
                st.write(swot_raw)
            
            # Report Export
            st.divider()
            report_md = f"""# Executive Intelligence Report: {st.session_state.last_uploaded_file}
            
## 🎯 Strategic Vision
{insights.get('strategic_vision', 'N/A')}

## 📊 Key Performance Indicators
{insights.get('key_metrics', 'N/A')}

## ⚠️ Risks & Challenges
{insights.get('risks_and_challenges', 'N/A')}

## 🛠️ SWOT Analysis
### Strengths
{swot_data.get('S', 'N/A') if 'swot_data' in locals() else 'N/A'}

### Weaknesses
{swot_data.get('W', 'N/A') if 'swot_data' in locals() else 'N/A'}

### Opportunities
{swot_data.get('O', 'N/A') if 'swot_data' in locals() else 'N/A'}

### Threats
{swot_data.get('T', 'N/A') if 'swot_data' in locals() else 'N/A'}

---
*Report generated by Naresh AI DocuPulse*
"""
            st.download_button(
                label="📥 Download Executive Intelligence Report",
                data=report_md,
                file_name=f"Intelligence_Report_{st.session_state.last_uploaded_file.replace('.pdf', '')}.md",
                mime="text/markdown"
            )

    with tab4:
        st.markdown("### 📋 PDF Table Explorer")
        st.info("Direct extraction of tabular data from the document. Select a table to explore.")
        
        tables = st.session_state.pdf_agent.tables
        if not tables:
            st.warning("No structured tables were detected in the document.")
        else:
            table_labels = [f"{t['label']} (Page Grounded)" for t in tables]
            selected_label = st.selectbox("Select Table", table_labels)
            
            # Find the selected table
            selected_idx = table_labels.index(selected_label)
            selected_table = tables[selected_idx]
            
            st.markdown(f"#### {selected_table['label']}")
            st.dataframe(selected_table['df'], width="stretch")
            
            # Download as CSV
            csv = selected_table['df'].to_csv(index=False).encode('utf-8')
            st.download_button(
                label=f"📥 Download {selected_table['label']} as CSV",
                data=csv,
                file_name=f"{selected_table['label'].replace(' ', '_')}.csv",
                mime="text/csv"
            )




else:
    st.info("Please upload a PDF document to begin analysis.")


# Footer
st.divider()
st.markdown(
    """
    <div style="text-align: center; color: #64748b; padding: 20px;">
        © 2026 <b>Naresh Kumar Lahajal</b>. All Rights Reserved.<br>
        <small>Powered by Groq and Retrieval-Augmented Generation</small>
    </div>
    """,
    unsafe_allow_html=True
)