Spaces:
Running
Running
File size: 7,074 Bytes
8299003 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | """
app.py
======
Streamlit chat UI for the Morningstar Financial Intelligence RAG Pipeline.
Deployed on Hugging Face Spaces (free CPU tier).
Vector store: Qdrant Cloud (free tier).
LLM: Google Gemini 1.5 Flash (free tier, 1M tokens/min).
Run locally:
streamlit run app.py
"""
import os
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
# ββ Page config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.set_page_config(
page_title = "Financial Intelligence Assistant",
page_icon = "π",
layout = "wide",
)
# ββ Load pipeline once (cached across sessions) ββββββββββββββββββββββββββββββββ
@st.cache_resource(show_spinner="Loading pipeline...")
def init_pipeline():
from src.retriever import FinancialRetriever
from src.rag_chain import get_llm
from src.guardrails import RAGGuardrails
retriever = FinancialRetriever(rerank=True)
llm = get_llm()
guardrails = RAGGuardrails(run_grounding=False) # NLI skipped for latency
return retriever, llm, guardrails
retriever, llm, guardrails = init_pipeline()
# ββ Sidebar ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with st.sidebar:
st.title("About")
st.markdown(
"This assistant answers questions about **Apple Inc.** using:\n"
"- SEC filings: 10-K, 10-Q, 8-K\n"
"- Morningstar equity research reports\n\n"
"**Retrieval:** Dense ANN + cross-encoder reranking \n"
"**LLM:** Google Gemini 2.5 Flash \n"
"**Vector store:** Qdrant Cloud\n\n"
"_For informational purposes only. Not investment advice._"
)
st.divider()
st.caption("Sample questions:")
examples = [
"What was Apple's total net sales in FY2024?",
"What were Apple's main risk factors in the 2024 10-K?",
"How did Apple's Services segment perform in FY2024?",
"What is Apple's gross margin trend over the last 3 years?",
"What did Apple say about AI in their latest filings?",
]
for ex in examples:
if st.button(ex, use_container_width=True):
st.session_state["prefill"] = ex
# ββ Main UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.title("Financial Intelligence Assistant")
st.caption(
"Apple SEC Filings (10-K, 10-Q, 8-K) + Morningstar Research | "
"Powered by Google Gemini 1.5 Flash + Qdrant"
)
# ββ Session state ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat history
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if msg.get("sources"):
with st.expander("Sources", expanded=False):
for src in msg["sources"]:
st.caption(src)
# Handle prefilled example question from sidebar
prefill = st.session_state.pop("prefill", None)
# ββ Chat input βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
query = st.chat_input("Ask about Apple financials...") or prefill
if query:
# Show user message
st.session_state.messages.append({"role": "user", "content": query})
with st.chat_message("user"):
st.markdown(query)
with st.chat_message("assistant"):
# Layer 1: input guardrail
ok, reason = guardrails.gate_input(query)
if not ok:
st.warning(reason)
st.session_state.messages.append({
"role": "assistant", "content": reason
})
st.stop()
# Retrieve chunks
with st.spinner("Searching documents..."):
chunks = retriever.retrieve(query, n_results=8)
# Layer 2: retrieval guardrail
ok, reason = guardrails.gate_retrieval(chunks)
if not ok:
st.warning(reason)
st.session_state.messages.append({
"role": "assistant", "content": reason
})
st.stop()
# Build context
context = retriever.build_context(chunks, max_chars=6000)
# Build prompt
from src.rag_chain import RAG_PROMPT_TEMPLATE
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
prompt_template = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
chain = prompt_template | llm | StrOutputParser()
# Stream response token by token
full_answer = ""
placeholder = st.empty()
for chunk_token in chain.stream({"query": query, "context": context}):
full_answer += chunk_token
placeholder.markdown(full_answer + "β")
placeholder.markdown(full_answer)
# Layer 3: output guardrail (basic checks, no NLI for latency)
try:
final_answer, warnings = guardrails.gate_output(
full_answer, chunks
)
except ValueError as e:
st.error(str(e))
st.session_state.messages.append({
"role": "assistant", "content": str(e)
})
st.stop()
for w in warnings:
st.caption(f"Note: {w}")
# Build source attribution list
sources = []
for i, c in enumerate(chunks[:5], 1):
m = c["metadata"]
doc_type = m.get("doc_type", "")
fiscal_year = m.get("fiscal_year", "")
filed = m.get("filing_date", "")
heading = m.get("heading_path", "") or m.get("section_title", "")
score = c.get("score", 0)
label = f"[{i}] {doc_type}"
if fiscal_year:
label += f" FY{fiscal_year}"
if filed:
label += f" (filed {filed})"
if heading:
label += f" | {heading[:60]}"
label += f" | score: {score:.3f}"
sources.append(label)
if sources:
with st.expander("Sources", expanded=False):
for src in sources:
st.caption(src)
# Save to session state
st.session_state.messages.append({
"role" : "assistant",
"content" : final_answer,
"sources" : sources,
})
|