Spaces:

sumitnewold
/

RAG

Sleeping

App Files Files Community

RAG / app.py

sumitnewold

Upload 10 files

76bd1fc verified 14 days ago

Raw

History Blame Contribute Delete

14.1 kB

	"""
	FinAgent — Infosys AR 2024-25 \| Streamlit Q&A Interface

	Run:
	streamlit run app.py
	"""

	import json
	import re
	import time

	import streamlit as st

	# ── Page config (must be first Streamlit call) ────────────────────────────────
	st.set_page_config(
	page_title="FinAgent · Infosys AR Q&A",
	page_icon="📊",
	layout="wide",
	initial_sidebar_state="expanded",
	)

	# ── Custom CSS ────────────────────────────────────────────────────────────────
	st.markdown(
	"""
	<style>
	.main-title { font-size:2.2rem; font-weight:700; color:#1a3c5e; margin-bottom:0; }
	.sub-title { font-size:1rem; color:#5a7fa6; margin-top:0; margin-bottom:1.5rem; }
	.metric-card { background:#f0f6ff; border-radius:10px; padding:12px 18px;
	border-left:4px solid #1a3c5e; margin-bottom:8px; }
	.badge { display:inline-block; padding:2px 10px; border-radius:12px;
	font-size:0.78rem; font-weight:600; margin-right:6px; }
	.badge-blue { background:#dbeafe; color:#1e40af; }
	.badge-green { background:#dcfce7; color:#166534; }
	.badge-amber { background:#fef9c3; color:#854d0e; }
	.badge-red { background:#fee2e2; color:#991b1b; }
	.cite-chip { display:inline-block; background:#e0e7ff; color:#3730a3;
	border-radius:6px; padding:1px 8px; font-size:0.8rem;
	margin:2px; font-weight:500; }
	.context-box { background:#fafafa; border:1px solid #e5e7eb; border-radius:8px;
	padding:10px 14px; margin:6px 0; font-size:0.85rem; }
	.answer-box { background:#f8faff; border-left:4px solid #3b82f6;
	padding:16px 20px; border-radius:8px; }
	</style>
	""",
	unsafe_allow_html=True,
	)


	# ── Helper: colour-coded intent badge ────────────────────────────────────────
	INTENT_BADGE = {
	"financial_metrics": ("📈", "badge-blue", "Financial Metrics"),
	"compliance_check": ("⚖️", "badge-amber", "Compliance Check"),
	"investment_alert": ("🔔", "badge-green", "Investment Alert"),
	"analyst_review": ("📅", "badge-blue", "Analyst Review"),
	"rag_query": ("🔍", "badge-blue", "General Query"),
	}

	def intent_badge(intent: str) -> str:
	icon, cls, label = INTENT_BADGE.get(
	intent, ("❓", "badge-blue", intent or "Unknown")
	)
	return f'<span class="badge {cls}">{icon} {label}</span>'


	def score_colour(score: float) -> str:
	if score >= 0.8:
	return "badge-green"
	if score >= 0.5:
	return "badge-amber"
	return "badge-red"


	# ── Cached resource: heavy models loaded once per session ─────────────────────
	@st.cache_resource(show_spinner=False)
	def load_pipeline():
	"""
	Load Phase 4 LangGraph pipeline.
	Cached by Streamlit so models are only loaded once.
	"""
	from phase4_tools import build_phase4_components, build_phase4_graph
	components = build_phase4_components()
	app = build_phase4_graph(components)
	return components, app


	def run_query(query: str, components: dict, app) -> dict:
	from phase3_agent import make_initial_state
	state = make_initial_state(query)
	result = app.invoke(state)
	return result


	def _parse_retry_seconds(error_msg: str) -> str:
	"""Extract human-readable wait time from a Groq rate-limit error message."""
	match = re.search(r"try again in ([\d]+m[\d.]+s\|[\d.]+s\|[\d]+m)", str(error_msg))
	return match.group(1) if match else "a few minutes"


	# ── Sidebar ───────────────────────────────────────────────────────────────────
	with st.sidebar:
	st.markdown("## 📊 FinAgent")
	st.markdown("Document: Infosys Integrated Annual Report 2024-25")
	st.divider()

	st.markdown("### Pipeline")
	st.markdown(
	"- 🔍 Hybrid retrieval — BM25 + Dense (HyDE)\n"
	"- 🏆 Cross-encoder reranking — top-4 chunks\n"
	"- 🤖 LangGraph agents — intent-routed\n"
	"- 💹 FinBERT — domain sentiment\n"
	"- ♻️ Self-RAG — inline faithfulness check"
	)
	st.divider()

	st.markdown("### Agent Intents")
	for icon, label, desc in [
	("📈", "Financial Metrics", "Revenue, margins, FCF, RoE"),
	("⚖️", "Compliance Check", "SEBI, risk factors, ESG"),
	("🔔", "Investment Alert", "Signals with FinBERT sentiment"),
	("📅", "Analyst Review", "Schedule review meetings"),
	("🔍", "General Query", "Free-form document Q&A"),
	]:
	st.markdown(f"{icon} {label} — {desc}")

	st.divider()
	st.caption("Powered by Groq · LangGraph · ChromaDB · HuggingFace")


	# ── Main header ───────────────────────────────────────────────────────────────
	st.markdown('<p class="main-title">📊 FinAgent</p>', unsafe_allow_html=True)
	st.markdown(
	'<p class="sub-title">Ask anything about the <b>Infosys Annual Report 2024-25</b></p>',
	unsafe_allow_html=True,
	)

	# ── Example queries ───────────────────────────────────────────────────────────
	st.markdown("##### Try an example")
	example_cols = st.columns(3)
	example_queries = [
	"What was Infosys revenue and operating margin in FY25?",
	"Are there any SEBI compliance issues mentioned in the annual report?",
	"Generate an investment alert for Infosys based on FY25 performance.",
	"What is Infosys AI strategy and Topaz platform?",
	"What were the major risk factors disclosed by Infosys?",
	"Schedule an analyst review for Infosys credit assessment.",
	]

	# The query lives in session_state so example clicks persist across reruns
	# (otherwise clicking an example then "Ask" submits an empty box).
	if "query_text" not in st.session_state:
	st.session_state.query_text = ""

	for i, col in enumerate(example_cols):
	with col:
	if st.button(f"💬 {example_queries[i2][:45]}…", key=f"ex_{i2}", use_container_width=True):
	st.session_state.query_text = example_queries[i * 2]
	if st.button(f"💬 {example_queries[i2+1][:45]}…", key=f"ex_{i2+1}", use_container_width=True):
	st.session_state.query_text = example_queries[i * 2 + 1]

	st.divider()

	# ── Query input ───────────────────────────────────────────────────────────────
	# Bound to session_state via key — the widget owns the value, so example
	# clicks above pre-fill it and the text survives the "Ask" rerun.
	query = st.text_area(
	"Enter your question",
	key="query_text",
	height=80,
	placeholder="e.g. What was Infosys revenue in FY25?",
	label_visibility="collapsed",
	)

	col_btn, col_clear = st.columns([1, 6])
	with col_btn:
	submit = st.button("🔍 Ask FinAgent", type="primary", use_container_width=True)
	with col_clear:
	st.empty()

	# ── Pipeline loader ───────────────────────────────────────────────────────────
	if "pipeline_ready" not in st.session_state:
	st.session_state.pipeline_ready = False

	if not st.session_state.pipeline_ready:
	with st.status("⚙️ Loading models (first run only — takes ~30s)…", expanded=True) as status:
	st.write("Loading embedding model (all-mpnet-base-v2)…")
	st.write("Connecting to ChromaDB vector store…")
	st.write("Loading FinBERT sentiment model…")
	components, app = load_pipeline()
	st.session_state.pipeline_ready = True
	status.update(label="✅ Pipeline ready", state="complete", expanded=False)
	else:
	components, app = load_pipeline()

	# ── Run query ─────────────────────────────────────────────────────────────────
	if submit and query.strip():
	with st.spinner("🤔 Retrieving and reasoning…"):
	t0 = time.time()
	try:
	result = run_query(query.strip(), components, app)
	except Exception as e:
	err_str = str(e)
	if "rate_limit_exceeded" in err_str or "429" in err_str:
	wait = _parse_retry_seconds(err_str)
	st.error(
	f"⏳ Groq daily token limit reached.\n\n"
	f"Your free-tier quota (100,000 tokens/day) is exhausted. "
	f"Please try again in {wait} — the limit resets at midnight UTC.\n\n"
	f"To avoid this: upgrade to Groq Dev Tier at "
	f"https://console.groq.com/settings/billing"
	)
	else:
	st.error(f"❌ Unexpected error: {err_str}")
	st.stop()
	elapsed = round(time.time() - t0, 1)

	# ── Extract fields ────────────────────────────────────────────────────────
	answer = result.get("final_answer", "No answer generated.")
	intent = result.get("intent", "")
	faith_score = result.get("faithfulness_score", 0.0) or 0.0
	citations = result.get("citations", [])
	doc_dicts = result.get("retrieved_docs", [])
	tool_name = result.get("tool_name", "")
	tool_output = result.get("tool_output", {})
	iteration = result.get("iteration_count", 0)

	# ── Top metrics row ───────────────────────────────────────────────────────
	m1, m2, m3, m4 = st.columns(4)
	m1.metric("🎯 Intent", intent.replace("_", " ").title() if intent else "—")
	m2.metric("⚡ Latency", f"{elapsed}s")
	m3.metric("✅ Faithfulness", f"{faith_score:.0%}")
	m4.metric("🔄 Self-RAG iters", f"{iteration + 1}")

	st.divider()

	# ── Answer ────────────────────────────────────────────────────────────────
	st.markdown("### 💡 Answer")
	st.markdown(
	f'<div class="answer-box">{answer}</div>',
	unsafe_allow_html=True,
	)

	# ── Citations ─────────────────────────────────────────────────────────────
	if citations:
	chips = " ".join(
	f'<span class="cite-chip">📄 Page {p}</span>'
	for p in sorted(set(citations))
	if p is not None
	)
	st.markdown(f"Citations: {chips}", unsafe_allow_html=True)

	st.divider()

	# ── Retrieved context ─────────────────────────────────────────────────────
	if doc_dicts:
	with st.expander(f"📚 Retrieved context ({len(doc_dicts)} chunks)", expanded=False):
	for i, doc in enumerate(doc_dicts, 1):
	page = doc.get("metadata", {}).get("page", "?")
	text = doc.get("page_content", "").strip()
	st.markdown(
	f'<div class="context-box">'
	f'<b>Chunk {i} — Page {page}</b><br>{text[:600]}'
	f'{"…" if len(text) > 600 else ""}'
	f"</div>",
	unsafe_allow_html=True,
	)

	# ── Tool output ───────────────────────────────────────────────────────────
	if tool_output and tool_name:
	with st.expander(f"🔧 Tool output — `{tool_name}`", expanded=False):
	st.json(tool_output)

	# ── FinBERT sentiment (if investment alert) ───────────────────────────────
	finbert = None
	if isinstance(tool_output, dict):
	finbert = tool_output.get("finbert_sentiment")
	if finbert:
	with st.expander("💹 FinBERT Sentiment Breakdown", expanded=False):
	label = finbert.get("label", "neutral").capitalize()
	score = finbert.get("score", 0.0)
	scores = finbert.get("all_scores", {})

	fcol1, fcol2 = st.columns([1, 2])
	with fcol1:
	colour = {"Positive": "🟢", "Negative": "🔴", "Neutral": "🟡"}.get(label, "⚪")
	st.metric(f"{colour} Dominant sentiment", f"{label} ({score:.0%})")
	with fcol2:
	if scores:
	for sent, val in scores.items():
	st.progress(val, text=f"{sent.capitalize()}: {val:.1%}")

	elif submit and not query.strip():
	st.warning("Please enter a question before clicking Ask.")

	# ── Footer ────────────────────────────────────────────────────────────────────
	st.divider()
	st.caption(
	"FinAgent · RAG + LangGraph + FinBERT · "
	"Groq llama-3.3-70b-versatile · ChromaDB 3,607 chunks · "
	"Infosys AR 2024-25"
	)