challenge-b / app.py
NEXAS's picture
Upload 23 files
109bdd3 verified
import streamlit as st
import os
import traceback
import pandas as pd
import json
import time
from dotenv import load_dotenv
from agent.llm_client import GroqClient
from agent.agent import LlamaPDFAgent as PDFAgent, AgentRateLimitError
# Load environment variables
load_dotenv()
# Page configuration
st.set_page_config(
page_title="Naresh AI DocuPulse Submission - PDF Intelligence",
page_icon="📄",
layout="wide",
)
# Custom Styling for a Premium Dark Mode (Consistent with Challenge A)
st.markdown("""
<style>
/* Main container styling - Deep Dark Gradient */
.stApp {
background: radial-gradient(circle at top left, #1e293b 0%, #0f172a 100%) !important;
color: #f1f5f9 !important;
}
/* Header and Title styling - Neon Blue */
h1 {
color: #60a5fa !important;
font-family: 'Outfit', sans-serif;
font-weight: 800 !important;
letter-spacing: -0.05rem;
text-shadow: 0 0 20px rgba(96, 165, 250, 0.3);
}
h3 {
color: #94a3b8 !important;
font-weight: 400 !important;
}
/* Input styling - Darker Glass */
.stTextInput>div>div>input {
background-color: rgba(30, 41, 59, 0.7) !important;
color: white !important;
border: 1px solid rgba(96, 165, 250, 0.5) !important;
border-radius: 12px !important;
padding: 12px 20px !important;
font-size: 1.1rem !important;
}
/* Button styling - Glowing Blue */
.stButton>button {
background: linear-gradient(90deg, #2563eb 0%, #3b82f6 100%) !important;
color: white !important;
border: none !important;
border-radius: 12px !important;
padding: 15px 30px !important;
font-weight: 700 !important;
font-size: 1.1rem !important;
transition: all 0.3s ease !important;
box-shadow: 0 0 15px rgba(37, 99, 235, 0.4) !important;
width: 100% !important;
}
.stButton>button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 0 30px rgba(59, 130, 246, 0.6) !important;
}
/* Result Card styling - Dark Inset */
.answer-container {
background-color: rgba(30, 41, 59, 0.5);
padding: 30px;
border-radius: 20px;
backdrop-filter: blur(20px);
border: 1px solid rgba(255, 255, 255, 0.1);
box-shadow: inset 0 0 20px rgba(0, 0, 0, 0.2);
border-left: 8px solid #2563eb;
margin-top: 25px;
}
/* Sidebar Dark Glass */
section[data-testid="stSidebar"] {
background-color: rgba(15, 23, 42, 0.95) !important;
backdrop-filter: blur(20px) !important;
border-right: 1px solid rgba(255, 255, 255, 0.1) !important;
}
.brand-text {
font-size: 1.5rem;
font-weight: 900;
background: linear-gradient(90deg, #60a5fa, #3b82f6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 20px;
}
/* Standard Text Color Fixes */
.stMarkdown, p, li {
color: #cbd5e1 !important;
}
strong {
color: #f1f5f9 !important;
}
</style>
""", unsafe_allow_html=True)
# Initialize Session State
if "pdf_agent" not in st.session_state:
st.session_state.pdf_agent = None
if "messages" not in st.session_state:
st.session_state.messages = []
if "deep_insights" not in st.session_state:
st.session_state.deep_insights = {}
# Sidebar
with st.sidebar:
st.markdown('<div class="brand-text">NARESH AI</div>', unsafe_allow_html=True)
st.title("Settings")
# API Key Input
groq_api_key = st.text_input("Groq API Key", type="password", value=os.getenv("GROQ_API_KEY", ""))
# Dynamic Model Fetching
available_models = ["meta-llama/llama-4-scout-17b-16e-instruct", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]
if groq_api_key:
try:
temp_client = GroqClient(api_key=groq_api_key)
fetched_models = temp_client.list_models()
if fetched_models:
available_models = fetched_models
except Exception:
pass
model_choice = st.selectbox(
"Model Architecture",
available_models,
index=0 if "meta-llama/llama-4-scout-17b-16e-instruct" not in available_models else available_models.index("meta-llama/llama-4-scout-17b-16e-instruct")
)
st.divider()
st.markdown("### 🗂️ Document Library")
# Initialize agent if not exist (for library access)
if "pdf_agent" in st.session_state and st.session_state.pdf_agent:
if not hasattr(st.session_state.pdf_agent, "get_library"):
st.session_state.pdf_agent = None # Clear stale object
if not st.session_state.pdf_agent:
from agent.agent import LlamaPDFAgent as PDFAgent
st.session_state.pdf_agent = PDFAgent(api_key=groq_api_key or os.getenv("GROQ_API_KEY"), model=model_choice)
library = st.session_state.pdf_agent.get_library()
if not library:
st.caption("No documents in library.")
else:
for doc in library:
col1, col2 = st.columns([0.8, 0.2])
with col1:
st.markdown(f"**{doc['filename']}**")
with col2:
if st.button("🗑️", key=f"del_{doc['hash']}", help="Delete vectors"):
if st.session_state.pdf_agent.delete_document(doc['hash']):
st.session_state.pdf_agent = None # Force re-init if active one deleted
st.rerun()
st.info("To switch document, simply upload it again. It will load instantly from the library.")
st.divider()
st.markdown("### Document Controls")
if st.button("Reset Session"):
st.session_state.pdf_agent = None
st.session_state.messages = []
st.session_state.deep_insights = {}
st.rerun()
st.divider()
st.markdown("### Profile")
st.write("**Built by:** Naresh Kumar Lahajal")
st.write("**Role:** GenAI Enthusiast")
st.info("High-speed PDF intelligence powered by Groq and FastEmbed.")
# Header
st.title("Naresh AI DocuPulse - Submission")
st.subheader("Challenge B: PDF RAG & Summarization")
# File Upload
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
if uploaded_file and (st.session_state.pdf_agent is None or uploaded_file.name != st.session_state.get("last_uploaded_file")):
with st.status("Ingesting document and indexing knowledge...", expanded=True) as status:
try:
agent = PDFAgent(api_key=groq_api_key, model=model_choice)
status_msg = agent.ingest_pdf(uploaded_file)
st.session_state.pdf_agent = agent
st.session_state.last_uploaded_file = uploaded_file.name
# Sync tables for explorer
st.session_state.extracted_tables = agent.tables
# Auto-Clear History on New Upload
st.session_state.messages = []
st.session_state.deep_insights = {}
status.update(label=f"✅ {status_msg}", state="complete", expanded=False)
st.toast("Intelligence Engine Initialized", icon="🧠")
except Exception as e:
st.error(f"Critical Ingestion Error: {e}")
with st.expander("Show Traceback"):
st.code(traceback.format_exc())
# Helper for Exact Backoff
def run_with_exact_backoff(func, *args, **kwargs):
"""
Runs a function and catches AgentRateLimitError to perform a precise UI countdown retry.
"""
max_attempts = 3
for attempt in range(max_attempts):
try:
return func(*args, **kwargs)
except AgentRateLimitError as e:
if attempt == max_attempts - 1:
st.error(f"Failed after {max_attempts} attempts due to Persistent Rate Limits. Please wait a few minutes.")
raise e
# Precise wait + 1s buffer
wait_time = int(e.wait_time) + 1
st.toast(f"Rate Limit Hit! Waiting {wait_time}s to retry...", icon="⏳")
# Visual Countdown
placeholder = st.empty()
for remaining in range(wait_time, 0, -1):
placeholder.warning(f"⚠️ API Cooldown: Retrying in {remaining} seconds...")
time.sleep(1)
placeholder.empty()
return None
if st.session_state.pdf_agent:
# Action Tabs
tab1, tab2, tab3, tab4 = st.tabs(["💬 Ask Questions", "📝 Auto-Summary", "🧠 Deep Intelligence", "📋 Table Explorer"])
with tab1:
st.markdown("### 💬 Document Conversation")
st.caption("Ask questions about the document and maintain a conversation thread.")
# Display Chat History
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if "sources" in message and message["sources"]:
with st.expander("🔗 Sources & Citations", expanded=False):
for i, src in enumerate(message["sources"]):
page_text = f"Page {src['page']}" if src['page'] else "Unknown Page"
st.markdown(f"**[{i+1}] {page_text}**")
st.caption(f"_{src['text']}_")
st.divider()
# Chat Input
if prompt := st.chat_input("What would you like to know?"):
# Add user message to history
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Generate AI response
with st.chat_message("assistant"):
with st.spinner("Analyzing document context..."):
response_data = run_with_exact_backoff(st.session_state.pdf_agent.answer_question, prompt)
if response_data:
# Use st.write_stream for typing effect
answer = st.write_stream(response_data['answer_gen'])
sources = response_data.get("sources", [])
if sources:
with st.expander("🔗 Sources & Citations", expanded=False):
for i, src in enumerate(sources):
page_text = f"Page {src['page']}" if src['page'] else "Unknown Page"
st.markdown(f"**[{i+1}] {page_text}**")
st.caption(f"_{src['text']}_")
st.divider()
# Add assistant response to history
st.session_state.messages.append({
"role": "assistant",
"content": answer,
"sources": sources
})
with tab2:
if st.button("Generate Executive Summary"):
with st.spinner("Synthesizing document overview..."):
streaming_response = run_with_exact_backoff(st.session_state.pdf_agent.summarize_document)
if streaming_response:
st.markdown('<div class="answer-container" style="border-left: 8px solid #60a5fa;">', unsafe_allow_html=True)
st.markdown("### 📝 Document Summary")
st.write_stream(streaming_response.response_gen)
st.markdown('</div>', unsafe_allow_html=True)
with tab3:
st.markdown("### 🚀 Strategic Deep Analysis")
st.info("This mode uses multi-stage recursive retrieval to extract deep strategic insights and KPIs.")
if st.button("Run Deep Intelligence Scan"):
with st.status("Analyzing document layers...", expanded=True) as status:
st.write("🔍 Extracting Strategic Vision...")
insights = run_with_exact_backoff(st.session_state.pdf_agent.get_deep_insights)
if insights:
st.session_state.deep_insights = insights
# Fetch KPI visualization data
st.write("📊 Generating Visual Analytics...")
viz_data = run_with_exact_backoff(st.session_state.pdf_agent.get_kpi_viz_data)
st.session_state.kpi_viz_data = viz_data
status.update(label="✅ Deep Analysis Complete", state="complete", expanded=False)
else:
status.update(label="❌ Failed after retries", state="error", expanded=False)
if st.session_state.deep_insights:
insights = st.session_state.deep_insights
# 1. Strategic Vision
st.markdown('<div class="answer-container" style="border-left: 8px solid #8b5cf6;">', unsafe_allow_html=True)
st.markdown("#### 🎯 Strategic Vision")
st.write(insights.get("strategic_vision", "N/A"))
st.markdown('</div>', unsafe_allow_html=True)
col1, col2 = st.columns(2)
with col1:
# 2. Key Metrics
st.markdown("#### 📊 Key Performance Indicators")
metrics_text = insights.get("key_metrics", "")
st.markdown(metrics_text if metrics_text else "No metrics extracted.")
with col2:
# 3. Risks
st.markdown("#### ⚠️ Risks & Challenges")
risks_text = insights.get("risks_and_challenges", "")
st.markdown(risks_text if risks_text else "No risks identified.")
# Visual Dashboard Section
if st.session_state.get("kpi_viz_data"):
st.divider()
st.markdown("#### 📈 Key Trends & Metrics")
viz_df = pd.DataFrame(st.session_state.kpi_viz_data)
# Heuristic for chart type
if any("year" in str(l).lower() or "q1" in str(l).lower() or "q2" in str(l).lower() or "q3" in str(l).lower() or "q4" in str(l).lower() for l in viz_df['label']):
st.line_chart(viz_df.set_index('label'), color="#3b82f6")
st.caption("Auto-detected Time Series data.")
else:
st.bar_chart(viz_df.set_index('label'), color="#60a5fa")
st.caption("Bar chart representation of extracted KPIs.")
# 4. SWOT Analysis
st.divider()
st.markdown("#### 🛠️ Automated SWOT Analysis")
swot_raw = insights.get("swot_analysis", "{}")
try:
# Attempt to clean potential markdown artifacts around JSON
if "```json" in swot_raw:
swot_raw = swot_raw.split("```json")[1].split("```")[0].strip()
elif "{" in swot_raw:
swot_raw = "{" + swot_raw.split("{", 1)[1].rsplit("}", 1)[0] + "}"
swot_data = json.loads(swot_raw)
# Display SWOT in a grid
s_col1, s_col2 = st.columns(2)
with s_col1:
st.success(f"**Strengths**\n\n{swot_data.get('S', 'N/A')}")
st.info(f"**Opportunities**\n\n{swot_data.get('O', 'N/A')}")
with s_col2:
st.warning(f"**Weaknesses**\n\n{swot_data.get('W', 'N/A')}")
st.error(f"**Threats**\n\n{swot_data.get('T', 'N/A')}")
except Exception as e:
st.write("Raw SWOT Insight:")
st.write(swot_raw)
# Report Export
st.divider()
report_md = f"""# Executive Intelligence Report: {st.session_state.last_uploaded_file}
## 🎯 Strategic Vision
{insights.get('strategic_vision', 'N/A')}
## 📊 Key Performance Indicators
{insights.get('key_metrics', 'N/A')}
## ⚠️ Risks & Challenges
{insights.get('risks_and_challenges', 'N/A')}
## 🛠️ SWOT Analysis
### Strengths
{swot_data.get('S', 'N/A') if 'swot_data' in locals() else 'N/A'}
### Weaknesses
{swot_data.get('W', 'N/A') if 'swot_data' in locals() else 'N/A'}
### Opportunities
{swot_data.get('O', 'N/A') if 'swot_data' in locals() else 'N/A'}
### Threats
{swot_data.get('T', 'N/A') if 'swot_data' in locals() else 'N/A'}
---
*Report generated by Naresh AI DocuPulse*
"""
st.download_button(
label="📥 Download Executive Intelligence Report",
data=report_md,
file_name=f"Intelligence_Report_{st.session_state.last_uploaded_file.replace('.pdf', '')}.md",
mime="text/markdown"
)
with tab4:
st.markdown("### 📋 PDF Table Explorer")
st.info("Direct extraction of tabular data from the document. Select a table to explore.")
tables = st.session_state.pdf_agent.tables
if not tables:
st.warning("No structured tables were detected in the document.")
else:
table_labels = [f"{t['label']} (Page Grounded)" for t in tables]
selected_label = st.selectbox("Select Table", table_labels)
# Find the selected table
selected_idx = table_labels.index(selected_label)
selected_table = tables[selected_idx]
st.markdown(f"#### {selected_table['label']}")
st.dataframe(selected_table['df'], width="stretch")
# Download as CSV
csv = selected_table['df'].to_csv(index=False).encode('utf-8')
st.download_button(
label=f"📥 Download {selected_table['label']} as CSV",
data=csv,
file_name=f"{selected_table['label'].replace(' ', '_')}.csv",
mime="text/csv"
)
else:
st.info("Please upload a PDF document to begin analysis.")
# Footer
st.divider()
st.markdown(
"""
<div style="text-align: center; color: #64748b; padding: 20px;">
© 2026 <b>Naresh Kumar Lahajal</b>. All Rights Reserved.<br>
<small>Powered by Groq and Retrieval-Augmented Generation</small>
</div>
""",
unsafe_allow_html=True
)