Spaces:
Sleeping
Sleeping
| """ | |
| FinAgent β Infosys AR 2024-25 | Streamlit Q&A Interface | |
| Run: | |
| streamlit run app.py | |
| """ | |
| import json | |
| import re | |
| import time | |
| import streamlit as st | |
| # ββ Page config (must be first Streamlit call) ββββββββββββββββββββββββββββββββ | |
| st.set_page_config( | |
| page_title="FinAgent Β· Infosys AR Q&A", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded", | |
| ) | |
| # ββ Custom CSS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown( | |
| """ | |
| <style> | |
| .main-title { font-size:2.2rem; font-weight:700; color:#1a3c5e; margin-bottom:0; } | |
| .sub-title { font-size:1rem; color:#5a7fa6; margin-top:0; margin-bottom:1.5rem; } | |
| .metric-card { background:#f0f6ff; border-radius:10px; padding:12px 18px; | |
| border-left:4px solid #1a3c5e; margin-bottom:8px; } | |
| .badge { display:inline-block; padding:2px 10px; border-radius:12px; | |
| font-size:0.78rem; font-weight:600; margin-right:6px; } | |
| .badge-blue { background:#dbeafe; color:#1e40af; } | |
| .badge-green { background:#dcfce7; color:#166534; } | |
| .badge-amber { background:#fef9c3; color:#854d0e; } | |
| .badge-red { background:#fee2e2; color:#991b1b; } | |
| .cite-chip { display:inline-block; background:#e0e7ff; color:#3730a3; | |
| border-radius:6px; padding:1px 8px; font-size:0.8rem; | |
| margin:2px; font-weight:500; } | |
| .context-box { background:#fafafa; border:1px solid #e5e7eb; border-radius:8px; | |
| padding:10px 14px; margin:6px 0; font-size:0.85rem; } | |
| .answer-box { background:#f8faff; border-left:4px solid #3b82f6; | |
| padding:16px 20px; border-radius:8px; } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| # ββ Helper: colour-coded intent badge ββββββββββββββββββββββββββββββββββββββββ | |
| INTENT_BADGE = { | |
| "financial_metrics": ("π", "badge-blue", "Financial Metrics"), | |
| "compliance_check": ("βοΈ", "badge-amber", "Compliance Check"), | |
| "investment_alert": ("π", "badge-green", "Investment Alert"), | |
| "analyst_review": ("π ", "badge-blue", "Analyst Review"), | |
| "rag_query": ("π", "badge-blue", "General Query"), | |
| } | |
| def intent_badge(intent: str) -> str: | |
| icon, cls, label = INTENT_BADGE.get( | |
| intent, ("β", "badge-blue", intent or "Unknown") | |
| ) | |
| return f'<span class="badge {cls}">{icon} {label}</span>' | |
| def score_colour(score: float) -> str: | |
| if score >= 0.8: | |
| return "badge-green" | |
| if score >= 0.5: | |
| return "badge-amber" | |
| return "badge-red" | |
| # ββ Cached resource: heavy models loaded once per session βββββββββββββββββββββ | |
| def load_pipeline(): | |
| """ | |
| Load Phase 4 LangGraph pipeline. | |
| Cached by Streamlit so models are only loaded once. | |
| """ | |
| from phase4_tools import build_phase4_components, build_phase4_graph | |
| components = build_phase4_components() | |
| app = build_phase4_graph(components) | |
| return components, app | |
| def run_query(query: str, components: dict, app) -> dict: | |
| from phase3_agent import make_initial_state | |
| state = make_initial_state(query) | |
| result = app.invoke(state) | |
| return result | |
| def _parse_retry_seconds(error_msg: str) -> str: | |
| """Extract human-readable wait time from a Groq rate-limit error message.""" | |
| match = re.search(r"try again in ([\d]+m[\d.]+s|[\d.]+s|[\d]+m)", str(error_msg)) | |
| return match.group(1) if match else "a few minutes" | |
| # ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with st.sidebar: | |
| st.markdown("## π FinAgent") | |
| st.markdown("**Document:** Infosys Integrated Annual Report 2024-25") | |
| st.divider() | |
| st.markdown("### Pipeline") | |
| st.markdown( | |
| "- π **Hybrid retrieval** β BM25 + Dense (HyDE)\n" | |
| "- π **Cross-encoder reranking** β top-4 chunks\n" | |
| "- π€ **LangGraph agents** β intent-routed\n" | |
| "- πΉ **FinBERT** β domain sentiment\n" | |
| "- β»οΈ **Self-RAG** β inline faithfulness check" | |
| ) | |
| st.divider() | |
| st.markdown("### Agent Intents") | |
| for icon, label, desc in [ | |
| ("π", "Financial Metrics", "Revenue, margins, FCF, RoE"), | |
| ("βοΈ", "Compliance Check", "SEBI, risk factors, ESG"), | |
| ("π", "Investment Alert", "Signals with FinBERT sentiment"), | |
| ("π ", "Analyst Review", "Schedule review meetings"), | |
| ("π", "General Query", "Free-form document Q&A"), | |
| ]: | |
| st.markdown(f"**{icon} {label}** β {desc}") | |
| st.divider() | |
| st.caption("Powered by Groq Β· LangGraph Β· ChromaDB Β· HuggingFace") | |
| # ββ Main header βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown('<p class="main-title">π FinAgent</p>', unsafe_allow_html=True) | |
| st.markdown( | |
| '<p class="sub-title">Ask anything about the <b>Infosys Annual Report 2024-25</b></p>', | |
| unsafe_allow_html=True, | |
| ) | |
| # ββ Example queries βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown("##### Try an example") | |
| example_cols = st.columns(3) | |
| example_queries = [ | |
| "What was Infosys revenue and operating margin in FY25?", | |
| "Are there any SEBI compliance issues mentioned in the annual report?", | |
| "Generate an investment alert for Infosys based on FY25 performance.", | |
| "What is Infosys AI strategy and Topaz platform?", | |
| "What were the major risk factors disclosed by Infosys?", | |
| "Schedule an analyst review for Infosys credit assessment.", | |
| ] | |
| # The query lives in session_state so example clicks persist across reruns | |
| # (otherwise clicking an example then "Ask" submits an empty box). | |
| if "query_text" not in st.session_state: | |
| st.session_state.query_text = "" | |
| for i, col in enumerate(example_cols): | |
| with col: | |
| if st.button(f"π¬ {example_queries[i*2][:45]}β¦", key=f"ex_{i*2}", use_container_width=True): | |
| st.session_state.query_text = example_queries[i * 2] | |
| if st.button(f"π¬ {example_queries[i*2+1][:45]}β¦", key=f"ex_{i*2+1}", use_container_width=True): | |
| st.session_state.query_text = example_queries[i * 2 + 1] | |
| st.divider() | |
| # ββ Query input βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Bound to session_state via key β the widget owns the value, so example | |
| # clicks above pre-fill it and the text survives the "Ask" rerun. | |
| query = st.text_area( | |
| "Enter your question", | |
| key="query_text", | |
| height=80, | |
| placeholder="e.g. What was Infosys revenue in FY25?", | |
| label_visibility="collapsed", | |
| ) | |
| col_btn, col_clear = st.columns([1, 6]) | |
| with col_btn: | |
| submit = st.button("π Ask FinAgent", type="primary", use_container_width=True) | |
| with col_clear: | |
| st.empty() | |
| # ββ Pipeline loader βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if "pipeline_ready" not in st.session_state: | |
| st.session_state.pipeline_ready = False | |
| if not st.session_state.pipeline_ready: | |
| with st.status("βοΈ Loading models (first run only β takes ~30s)β¦", expanded=True) as status: | |
| st.write("Loading embedding model (all-mpnet-base-v2)β¦") | |
| st.write("Connecting to ChromaDB vector storeβ¦") | |
| st.write("Loading FinBERT sentiment modelβ¦") | |
| components, app = load_pipeline() | |
| st.session_state.pipeline_ready = True | |
| status.update(label="β Pipeline ready", state="complete", expanded=False) | |
| else: | |
| components, app = load_pipeline() | |
| # ββ Run query βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if submit and query.strip(): | |
| with st.spinner("π€ Retrieving and reasoningβ¦"): | |
| t0 = time.time() | |
| try: | |
| result = run_query(query.strip(), components, app) | |
| except Exception as e: | |
| err_str = str(e) | |
| if "rate_limit_exceeded" in err_str or "429" in err_str: | |
| wait = _parse_retry_seconds(err_str) | |
| st.error( | |
| f"β³ **Groq daily token limit reached.**\n\n" | |
| f"Your free-tier quota (100,000 tokens/day) is exhausted. " | |
| f"Please try again in **{wait}** β the limit resets at midnight UTC.\n\n" | |
| f"**To avoid this:** upgrade to Groq Dev Tier at " | |
| f"https://console.groq.com/settings/billing" | |
| ) | |
| else: | |
| st.error(f"β Unexpected error: {err_str}") | |
| st.stop() | |
| elapsed = round(time.time() - t0, 1) | |
| # ββ Extract fields ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| answer = result.get("final_answer", "No answer generated.") | |
| intent = result.get("intent", "") | |
| faith_score = result.get("faithfulness_score", 0.0) or 0.0 | |
| citations = result.get("citations", []) | |
| doc_dicts = result.get("retrieved_docs", []) | |
| tool_name = result.get("tool_name", "") | |
| tool_output = result.get("tool_output", {}) | |
| iteration = result.get("iteration_count", 0) | |
| # ββ Top metrics row βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| m1, m2, m3, m4 = st.columns(4) | |
| m1.metric("π― Intent", intent.replace("_", " ").title() if intent else "β") | |
| m2.metric("β‘ Latency", f"{elapsed}s") | |
| m3.metric("β Faithfulness", f"{faith_score:.0%}") | |
| m4.metric("π Self-RAG iters", f"{iteration + 1}") | |
| st.divider() | |
| # ββ Answer ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown("### π‘ Answer") | |
| st.markdown( | |
| f'<div class="answer-box">{answer}</div>', | |
| unsafe_allow_html=True, | |
| ) | |
| # ββ Citations βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if citations: | |
| chips = " ".join( | |
| f'<span class="cite-chip">π Page {p}</span>' | |
| for p in sorted(set(citations)) | |
| if p is not None | |
| ) | |
| st.markdown(f"**Citations:** {chips}", unsafe_allow_html=True) | |
| st.divider() | |
| # ββ Retrieved context βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if doc_dicts: | |
| with st.expander(f"π Retrieved context ({len(doc_dicts)} chunks)", expanded=False): | |
| for i, doc in enumerate(doc_dicts, 1): | |
| page = doc.get("metadata", {}).get("page", "?") | |
| text = doc.get("page_content", "").strip() | |
| st.markdown( | |
| f'<div class="context-box">' | |
| f'<b>Chunk {i} β Page {page}</b><br>{text[:600]}' | |
| f'{"β¦" if len(text) > 600 else ""}' | |
| f"</div>", | |
| unsafe_allow_html=True, | |
| ) | |
| # ββ Tool output βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if tool_output and tool_name: | |
| with st.expander(f"π§ Tool output β `{tool_name}`", expanded=False): | |
| st.json(tool_output) | |
| # ββ FinBERT sentiment (if investment alert) βββββββββββββββββββββββββββββββ | |
| finbert = None | |
| if isinstance(tool_output, dict): | |
| finbert = tool_output.get("finbert_sentiment") | |
| if finbert: | |
| with st.expander("πΉ FinBERT Sentiment Breakdown", expanded=False): | |
| label = finbert.get("label", "neutral").capitalize() | |
| score = finbert.get("score", 0.0) | |
| scores = finbert.get("all_scores", {}) | |
| fcol1, fcol2 = st.columns([1, 2]) | |
| with fcol1: | |
| colour = {"Positive": "π’", "Negative": "π΄", "Neutral": "π‘"}.get(label, "βͺ") | |
| st.metric(f"{colour} Dominant sentiment", f"{label} ({score:.0%})") | |
| with fcol2: | |
| if scores: | |
| for sent, val in scores.items(): | |
| st.progress(val, text=f"{sent.capitalize()}: {val:.1%}") | |
| elif submit and not query.strip(): | |
| st.warning("Please enter a question before clicking Ask.") | |
| # ββ Footer ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.divider() | |
| st.caption( | |
| "FinAgent Β· RAG + LangGraph + FinBERT Β· " | |
| "Groq llama-3.3-70b-versatile Β· ChromaDB 3,607 chunks Β· " | |
| "Infosys AR 2024-25" | |
| ) | |