""" FinAgent — Infosys AR 2024-25 | Streamlit Q&A Interface Run: streamlit run app.py """ import json import re import time import streamlit as st # ── Page config (must be first Streamlit call) ──────────────────────────────── st.set_page_config( page_title="FinAgent · Infosys AR Q&A", page_icon="📊", layout="wide", initial_sidebar_state="expanded", ) # ── Custom CSS ──────────────────────────────────────────────────────────────── st.markdown( """ """, unsafe_allow_html=True, ) # ── Helper: colour-coded intent badge ──────────────────────────────────────── INTENT_BADGE = { "financial_metrics": ("📈", "badge-blue", "Financial Metrics"), "compliance_check": ("⚖️", "badge-amber", "Compliance Check"), "investment_alert": ("🔔", "badge-green", "Investment Alert"), "analyst_review": ("📅", "badge-blue", "Analyst Review"), "rag_query": ("🔍", "badge-blue", "General Query"), } def intent_badge(intent: str) -> str: icon, cls, label = INTENT_BADGE.get( intent, ("❓", "badge-blue", intent or "Unknown") ) return f'{icon} {label}' def score_colour(score: float) -> str: if score >= 0.8: return "badge-green" if score >= 0.5: return "badge-amber" return "badge-red" # ── Cached resource: heavy models loaded once per session ───────────────────── @st.cache_resource(show_spinner=False) def load_pipeline(): """ Load Phase 4 LangGraph pipeline. Cached by Streamlit so models are only loaded once. """ from phase4_tools import build_phase4_components, build_phase4_graph components = build_phase4_components() app = build_phase4_graph(components) return components, app def run_query(query: str, components: dict, app) -> dict: from phase3_agent import make_initial_state state = make_initial_state(query) result = app.invoke(state) return result def _parse_retry_seconds(error_msg: str) -> str: """Extract human-readable wait time from a Groq rate-limit error message.""" match = re.search(r"try again in ([\d]+m[\d.]+s|[\d.]+s|[\d]+m)", str(error_msg)) return match.group(1) if match else "a few minutes" # ── Sidebar ─────────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("## 📊 FinAgent") st.markdown("**Document:** Infosys Integrated Annual Report 2024-25") st.divider() st.markdown("### Pipeline") st.markdown( "- 🔍 **Hybrid retrieval** — BM25 + Dense (HyDE)\n" "- 🏆 **Cross-encoder reranking** — top-4 chunks\n" "- 🤖 **LangGraph agents** — intent-routed\n" "- 💹 **FinBERT** — domain sentiment\n" "- ♻️ **Self-RAG** — inline faithfulness check" ) st.divider() st.markdown("### Agent Intents") for icon, label, desc in [ ("📈", "Financial Metrics", "Revenue, margins, FCF, RoE"), ("⚖️", "Compliance Check", "SEBI, risk factors, ESG"), ("🔔", "Investment Alert", "Signals with FinBERT sentiment"), ("📅", "Analyst Review", "Schedule review meetings"), ("🔍", "General Query", "Free-form document Q&A"), ]: st.markdown(f"**{icon} {label}** — {desc}") st.divider() st.caption("Powered by Groq · LangGraph · ChromaDB · HuggingFace") # ── Main header ─────────────────────────────────────────────────────────────── st.markdown('

📊 FinAgent

', unsafe_allow_html=True) st.markdown( '

Ask anything about the Infosys Annual Report 2024-25

', unsafe_allow_html=True, ) # ── Example queries ─────────────────────────────────────────────────────────── st.markdown("##### Try an example") example_cols = st.columns(3) example_queries = [ "What was Infosys revenue and operating margin in FY25?", "Are there any SEBI compliance issues mentioned in the annual report?", "Generate an investment alert for Infosys based on FY25 performance.", "What is Infosys AI strategy and Topaz platform?", "What were the major risk factors disclosed by Infosys?", "Schedule an analyst review for Infosys credit assessment.", ] # The query lives in session_state so example clicks persist across reruns # (otherwise clicking an example then "Ask" submits an empty box). if "query_text" not in st.session_state: st.session_state.query_text = "" for i, col in enumerate(example_cols): with col: if st.button(f"💬 {example_queries[i*2][:45]}…", key=f"ex_{i*2}", use_container_width=True): st.session_state.query_text = example_queries[i * 2] if st.button(f"💬 {example_queries[i*2+1][:45]}…", key=f"ex_{i*2+1}", use_container_width=True): st.session_state.query_text = example_queries[i * 2 + 1] st.divider() # ── Query input ─────────────────────────────────────────────────────────────── # Bound to session_state via key — the widget owns the value, so example # clicks above pre-fill it and the text survives the "Ask" rerun. query = st.text_area( "Enter your question", key="query_text", height=80, placeholder="e.g. What was Infosys revenue in FY25?", label_visibility="collapsed", ) col_btn, col_clear = st.columns([1, 6]) with col_btn: submit = st.button("🔍 Ask FinAgent", type="primary", use_container_width=True) with col_clear: st.empty() # ── Pipeline loader ─────────────────────────────────────────────────────────── if "pipeline_ready" not in st.session_state: st.session_state.pipeline_ready = False if not st.session_state.pipeline_ready: with st.status("⚙️ Loading models (first run only — takes ~30s)…", expanded=True) as status: st.write("Loading embedding model (all-mpnet-base-v2)…") st.write("Connecting to ChromaDB vector store…") st.write("Loading FinBERT sentiment model…") components, app = load_pipeline() st.session_state.pipeline_ready = True status.update(label="✅ Pipeline ready", state="complete", expanded=False) else: components, app = load_pipeline() # ── Run query ───────────────────────────────────────────────────────────────── if submit and query.strip(): with st.spinner("🤔 Retrieving and reasoning…"): t0 = time.time() try: result = run_query(query.strip(), components, app) except Exception as e: err_str = str(e) if "rate_limit_exceeded" in err_str or "429" in err_str: wait = _parse_retry_seconds(err_str) st.error( f"⏳ **Groq daily token limit reached.**\n\n" f"Your free-tier quota (100,000 tokens/day) is exhausted. " f"Please try again in **{wait}** — the limit resets at midnight UTC.\n\n" f"**To avoid this:** upgrade to Groq Dev Tier at " f"https://console.groq.com/settings/billing" ) else: st.error(f"❌ Unexpected error: {err_str}") st.stop() elapsed = round(time.time() - t0, 1) # ── Extract fields ──────────────────────────────────────────────────────── answer = result.get("final_answer", "No answer generated.") intent = result.get("intent", "") faith_score = result.get("faithfulness_score", 0.0) or 0.0 citations = result.get("citations", []) doc_dicts = result.get("retrieved_docs", []) tool_name = result.get("tool_name", "") tool_output = result.get("tool_output", {}) iteration = result.get("iteration_count", 0) # ── Top metrics row ─────────────────────────────────────────────────────── m1, m2, m3, m4 = st.columns(4) m1.metric("🎯 Intent", intent.replace("_", " ").title() if intent else "—") m2.metric("⚡ Latency", f"{elapsed}s") m3.metric("✅ Faithfulness", f"{faith_score:.0%}") m4.metric("🔄 Self-RAG iters", f"{iteration + 1}") st.divider() # ── Answer ──────────────────────────────────────────────────────────────── st.markdown("### 💡 Answer") st.markdown( f'
{answer}
', unsafe_allow_html=True, ) # ── Citations ───────────────────────────────────────────────────────────── if citations: chips = " ".join( f'📄 Page {p}' for p in sorted(set(citations)) if p is not None ) st.markdown(f"**Citations:** {chips}", unsafe_allow_html=True) st.divider() # ── Retrieved context ───────────────────────────────────────────────────── if doc_dicts: with st.expander(f"📚 Retrieved context ({len(doc_dicts)} chunks)", expanded=False): for i, doc in enumerate(doc_dicts, 1): page = doc.get("metadata", {}).get("page", "?") text = doc.get("page_content", "").strip() st.markdown( f'
' f'Chunk {i} — Page {page}
{text[:600]}' f'{"…" if len(text) > 600 else ""}' f"
", unsafe_allow_html=True, ) # ── Tool output ─────────────────────────────────────────────────────────── if tool_output and tool_name: with st.expander(f"🔧 Tool output — `{tool_name}`", expanded=False): st.json(tool_output) # ── FinBERT sentiment (if investment alert) ─────────────────────────────── finbert = None if isinstance(tool_output, dict): finbert = tool_output.get("finbert_sentiment") if finbert: with st.expander("💹 FinBERT Sentiment Breakdown", expanded=False): label = finbert.get("label", "neutral").capitalize() score = finbert.get("score", 0.0) scores = finbert.get("all_scores", {}) fcol1, fcol2 = st.columns([1, 2]) with fcol1: colour = {"Positive": "🟢", "Negative": "🔴", "Neutral": "🟡"}.get(label, "⚪") st.metric(f"{colour} Dominant sentiment", f"{label} ({score:.0%})") with fcol2: if scores: for sent, val in scores.items(): st.progress(val, text=f"{sent.capitalize()}: {val:.1%}") elif submit and not query.strip(): st.warning("Please enter a question before clicking Ask.") # ── Footer ──────────────────────────────────────────────────────────────────── st.divider() st.caption( "FinAgent · RAG + LangGraph + FinBERT · " "Groq llama-3.3-70b-versatile · ChromaDB 3,607 chunks · " "Infosys AR 2024-25" )