Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +212 -76
src/streamlit_app.py
CHANGED
|
@@ -1,35 +1,92 @@
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
import google.generativeai as genai
|
| 5 |
-
import json, os, re
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@st.cache_data
|
| 13 |
-
def process_loan_data(df: pd.DataFrame):
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
| 15 |
df["loan_status"] = df["loan_status"].astype(int)
|
| 16 |
-
|
|
|
|
|
|
|
| 17 |
pd_seg = group["loan_status"].mean()
|
| 18 |
-
lgd_seg = (1 - group["credit_score"].mean() / 850)
|
| 19 |
ead_seg = group["loan_amnt"].sum()
|
| 20 |
ecl_seg = pd_seg * lgd_seg * ead_seg
|
| 21 |
ecl_df = pd.concat([pd_seg, lgd_seg, ead_seg, ecl_seg], axis=1)
|
| 22 |
ecl_df.columns = ["PD", "LGD", "EAD", "ECL"]
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
prompt = f"""
|
| 30 |
-
You are a financial risk advisor.
|
| 31 |
-
Analyze the following loan segment and return ONLY valid JSON.
|
| 32 |
-
Schema:
|
| 33 |
{{"action":"increase_interest"|"reduce_disbursement"|"maintain","rationale":"string","confidence":float}}
|
| 34 |
|
| 35 |
Segment: {segment}
|
|
@@ -39,77 +96,156 @@ EAD: {ead_val:,.0f}
|
|
| 39 |
ECL: {ecl_val:,.0f}
|
| 40 |
|
| 41 |
Rules:
|
| 42 |
-
- PD > 0.25
|
| 43 |
-
- 0.20
|
| 44 |
-
- PD < 0.15
|
| 45 |
|
| 46 |
-
Respond with
|
| 47 |
"""
|
| 48 |
|
|
|
|
| 49 |
try:
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
#
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
match = re.search(r"\{.*\}", text, re.DOTALL)
|
| 60 |
-
if match:
|
| 61 |
-
text = match.group(0)
|
| 62 |
-
|
| 63 |
-
data = json.loads(text)
|
| 64 |
return data
|
| 65 |
-
|
| 66 |
except Exception as e:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
#
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
if uploaded:
|
| 78 |
df = pd.read_csv(uploaded)
|
| 79 |
-
|
| 80 |
-
st.
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
col1, col2 = st.columns(2)
|
| 85 |
with col1:
|
| 86 |
st.subheader("ECL by Segment")
|
| 87 |
-
fig, ax = plt.subplots(figsize=(
|
| 88 |
-
ax.bar(ecl_df["
|
| 89 |
-
ax.set_xlabel("Segment"); ax.set_ylabel("ECL")
|
| 90 |
-
plt.xticks(rotation=45)
|
| 91 |
st.pyplot(fig)
|
| 92 |
with col2:
|
| 93 |
st.subheader("PD by Segment")
|
| 94 |
-
fig2, ax2 = plt.subplots(figsize=(
|
| 95 |
-
ax2.bar(ecl_df["
|
| 96 |
-
ax2.set_xlabel("Segment"); ax2.set_ylabel("PD")
|
| 97 |
-
plt.xticks(rotation=45)
|
| 98 |
st.pyplot(fig2)
|
| 99 |
|
| 100 |
-
#
|
| 101 |
-
st.subheader("Analyze
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
st.
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
else:
|
| 115 |
-
st.info("
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
import streamlit as st
|
| 3 |
import pandas as pd
|
| 4 |
import matplotlib.pyplot as plt
|
| 5 |
import google.generativeai as genai
|
| 6 |
+
import json, os, re, time
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime
|
| 9 |
+
from sqlalchemy.orm import declarative_base, sessionmaker
|
| 10 |
+
|
| 11 |
+
# ========== CONFIG ==========
|
| 12 |
+
st.set_page_config(page_title="ECL Decision Assistant", layout="wide")
|
| 13 |
+
GEN_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 14 |
+
if GEN_API_KEY:
|
| 15 |
+
genai.configure(api_key=GEN_API_KEY)
|
| 16 |
+
else:
|
| 17 |
+
st.warning("GEMINI_API_KEY not found in env. Set it in HF Space secrets to enable AI decisions.")
|
| 18 |
+
|
| 19 |
+
# Simple credential store (replace with secure store in production)
|
| 20 |
+
USERS = {
|
| 21 |
+
"analyst": {"password": "analyst123", "role": "analyst"},
|
| 22 |
+
"cro": {"password": "cro123", "role": "cro"},
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
# SQLite DB for persisting reports
|
| 26 |
+
DB_FILE = "reports.db"
|
| 27 |
+
engine = create_engine(f"sqlite:///{DB_FILE}", connect_args={"check_same_thread": False})
|
| 28 |
+
Base = declarative_base()
|
| 29 |
+
SessionLocal = sessionmaker(bind=engine)
|
| 30 |
+
|
| 31 |
+
class Report(Base):
|
| 32 |
+
__tablename__ = "reports"
|
| 33 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 34 |
+
segment = Column(String)
|
| 35 |
+
pd = Column(Float)
|
| 36 |
+
lgd = Column(Float)
|
| 37 |
+
ead = Column(Float)
|
| 38 |
+
ecl = Column(Float)
|
| 39 |
+
action = Column(String)
|
| 40 |
+
rationale = Column(String)
|
| 41 |
+
confidence = Column(Float)
|
| 42 |
+
generated_by = Column(String)
|
| 43 |
+
created_at = Column(DateTime)
|
| 44 |
+
|
| 45 |
+
Base.metadata.create_all(bind=engine)
|
| 46 |
+
|
| 47 |
+
# ========== UTILITIES ==========
|
| 48 |
@st.cache_data
|
| 49 |
+
def process_loan_data(df: pd.DataFrame, segment_col: str = "loan_intent"):
|
| 50 |
+
"""Compute PD, LGD, EAD, ECL by segment column."""
|
| 51 |
+
required = [segment_col, "credit_score", "loan_amnt", "loan_status"]
|
| 52 |
+
df = df.dropna(subset=required)
|
| 53 |
+
# ensure types
|
| 54 |
df["loan_status"] = df["loan_status"].astype(int)
|
| 55 |
+
df["credit_score"] = df["credit_score"].astype(float)
|
| 56 |
+
df["loan_amnt"] = df["loan_amnt"].astype(float)
|
| 57 |
+
group = df.groupby(segment_col)
|
| 58 |
pd_seg = group["loan_status"].mean()
|
| 59 |
+
lgd_seg = (1 - group["credit_score"].mean() / 850).clip(lower=0.0)
|
| 60 |
ead_seg = group["loan_amnt"].sum()
|
| 61 |
ecl_seg = pd_seg * lgd_seg * ead_seg
|
| 62 |
ecl_df = pd.concat([pd_seg, lgd_seg, ead_seg, ecl_seg], axis=1)
|
| 63 |
ecl_df.columns = ["PD", "LGD", "EAD", "ECL"]
|
| 64 |
+
ecl_df = ecl_df.reset_index().rename(columns={segment_col: "segment"})
|
| 65 |
+
return ecl_df
|
| 66 |
+
|
| 67 |
+
def sanitize_parse_json(text: str):
|
| 68 |
+
"""Extract first JSON object in text and parse it."""
|
| 69 |
+
if not text:
|
| 70 |
+
raise ValueError("Empty response")
|
| 71 |
+
# remove common markdown fences
|
| 72 |
+
text = re.sub(r"^```json\s*", "", text, flags=re.IGNORECASE)
|
| 73 |
+
text = re.sub(r"^```\s*", "", text)
|
| 74 |
+
text = re.sub(r"```$", "", text)
|
| 75 |
+
# find JSON block
|
| 76 |
+
m = re.search(r"\{.*\}", text, flags=re.DOTALL)
|
| 77 |
+
if m:
|
| 78 |
+
text = m.group(0)
|
| 79 |
+
# attempt load
|
| 80 |
+
return json.loads(text)
|
| 81 |
+
|
| 82 |
+
def get_gemini_decision_single(segment, pd_val, lgd_val, ead_val, ecl_val):
|
| 83 |
+
"""Single Gemini call per selected segment. Robust cleaning. Returns dict."""
|
| 84 |
+
# If API key missing, return deterministic fallback
|
| 85 |
+
if not GEN_API_KEY:
|
| 86 |
+
return {"action": "maintain", "rationale": "No API key configured", "confidence": 0.0}
|
| 87 |
|
| 88 |
prompt = f"""
|
| 89 |
+
You are a financial risk advisor. Return ONLY one valid JSON object with this schema:
|
|
|
|
|
|
|
| 90 |
{{"action":"increase_interest"|"reduce_disbursement"|"maintain","rationale":"string","confidence":float}}
|
| 91 |
|
| 92 |
Segment: {segment}
|
|
|
|
| 96 |
ECL: {ecl_val:,.0f}
|
| 97 |
|
| 98 |
Rules:
|
| 99 |
+
- PD > 0.25 => increase_interest
|
| 100 |
+
- 0.20 <= PD <= 0.25 => reduce_disbursement
|
| 101 |
+
- PD < 0.15 => maintain
|
| 102 |
|
| 103 |
+
Respond with a single JSON object and nothing else.
|
| 104 |
"""
|
| 105 |
|
| 106 |
+
# Use model.generate_content with single prompt string (compat for HF)
|
| 107 |
try:
|
| 108 |
+
model = genai.GenerativeModel("gemini-2.5-flash-lite")
|
| 109 |
+
resp = model.generate_content(prompt, generation_config={"temperature": 0.05})
|
| 110 |
+
raw = resp.text if hasattr(resp, "text") else str(resp)
|
| 111 |
+
# parse
|
| 112 |
+
data = sanitize_parse_json(raw)
|
| 113 |
+
# validate keys
|
| 114 |
+
for k in ("action", "rationale", "confidence"):
|
| 115 |
+
if k not in data:
|
| 116 |
+
raise ValueError(f"Missing key: {k}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
return data
|
|
|
|
| 118 |
except Exception as e:
|
| 119 |
+
# handle rate limits explicitly
|
| 120 |
+
msg = str(e)
|
| 121 |
+
if "429" in msg or "Resource exhausted" in msg:
|
| 122 |
+
return {"action": "maintain", "rationale": "API quota exhausted - retry later", "confidence": 0.0}
|
| 123 |
+
# fallback deterministic rule as final fallback
|
| 124 |
+
if pd_val > 0.25:
|
| 125 |
+
return {"action": "increase_interest", "rationale": "PD > 0.25 (deterministic fallback)", "confidence": 0.8}
|
| 126 |
+
if 0.20 <= pd_val <= 0.25:
|
| 127 |
+
return {"action": "reduce_disbursement", "rationale": "PD in 0.20-0.25 (deterministic fallback)", "confidence": 0.7}
|
| 128 |
+
return {"action": "maintain", "rationale": "Fallback - parse or API error", "confidence": 0.0}
|
| 129 |
+
|
| 130 |
+
def save_report_to_db(row, decision, username):
|
| 131 |
+
s = SessionLocal()
|
| 132 |
+
r = Report(
|
| 133 |
+
segment=row["segment"],
|
| 134 |
+
pd=float(row["PD"]),
|
| 135 |
+
lgd=float(row["LGD"]),
|
| 136 |
+
ead=float(row["EAD"]),
|
| 137 |
+
ecl=float(row["ECL"]),
|
| 138 |
+
action=decision.get("action"),
|
| 139 |
+
rationale=decision.get("rationale"),
|
| 140 |
+
confidence=float(decision.get("confidence", 0.0)),
|
| 141 |
+
generated_by=username,
|
| 142 |
+
created_at=datetime.utcnow()
|
| 143 |
+
)
|
| 144 |
+
s.add(r)
|
| 145 |
+
s.commit()
|
| 146 |
+
s.refresh(r)
|
| 147 |
+
s.close()
|
| 148 |
+
return r.id
|
| 149 |
+
|
| 150 |
+
def load_reports_from_db(username, role):
|
| 151 |
+
s = SessionLocal()
|
| 152 |
+
if role == "cro":
|
| 153 |
+
rows = s.query(Report).order_by(Report.created_at.desc()).all()
|
| 154 |
+
else:
|
| 155 |
+
rows = s.query(Report).filter(Report.generated_by == username).order_by(Report.created_at.desc()).all()
|
| 156 |
+
df = pd.DataFrame([{
|
| 157 |
+
"id": r.id,
|
| 158 |
+
"segment": r.segment,
|
| 159 |
+
"pd": r.pd,
|
| 160 |
+
"lgd": r.lgd,
|
| 161 |
+
"ead": r.ead,
|
| 162 |
+
"ecl": r.ecl,
|
| 163 |
+
"action": r.action,
|
| 164 |
+
"rationale": r.rationale,
|
| 165 |
+
"confidence": r.confidence,
|
| 166 |
+
"generated_by": r.generated_by,
|
| 167 |
+
"created_at": r.created_at
|
| 168 |
+
} for r in rows])
|
| 169 |
+
s.close()
|
| 170 |
+
return df
|
| 171 |
+
|
| 172 |
+
# ========== UI - AUTH ==========
|
| 173 |
+
st.sidebar.title("Login")
|
| 174 |
+
username = st.sidebar.text_input("Username")
|
| 175 |
+
password = st.sidebar.text_input("Password", type="password")
|
| 176 |
+
if "auth_ok" not in st.session_state:
|
| 177 |
+
st.session_state.auth_ok = False
|
| 178 |
+
if st.sidebar.button("Sign in"):
|
| 179 |
+
user = USERS.get(username)
|
| 180 |
+
if user and user["password"] == password:
|
| 181 |
+
st.session_state.auth_ok = True
|
| 182 |
+
st.session_state.username = username
|
| 183 |
+
st.session_state.role = user["role"]
|
| 184 |
+
st.sidebar.success(f"Signed in as {username} ({user['role']})")
|
| 185 |
+
else:
|
| 186 |
+
st.sidebar.error("Invalid credentials")
|
| 187 |
+
if not st.session_state.auth_ok:
|
| 188 |
+
st.stop()
|
| 189 |
+
|
| 190 |
+
# ========== MAIN ==========
|
| 191 |
+
st.header("ECL Decision Assistant")
|
| 192 |
+
st.write(f"Signed in as **{st.session_state.username}** ({st.session_state.role})")
|
| 193 |
+
|
| 194 |
+
# Upload CSV
|
| 195 |
+
uploaded = st.file_uploader("Upload loan CSV (must contain loan_intent, credit_score, loan_amnt, loan_status)", type=["csv"])
|
| 196 |
if uploaded:
|
| 197 |
df = pd.read_csv(uploaded)
|
| 198 |
+
st.write("Sample rows:")
|
| 199 |
+
st.dataframe(df.head(), width='stretch')
|
| 200 |
+
# allow user to choose segmentation column
|
| 201 |
+
seg_col = st.selectbox("Segment by column", options=[c for c in df.columns if df[c].dtype == object] , index=0)
|
| 202 |
+
ecl_df = process_loan_data(df, segment_col=seg_col)
|
| 203 |
+
st.subheader("Segment-level ECL Summary")
|
| 204 |
+
st.dataframe(ecl_df, width='stretch')
|
| 205 |
+
|
| 206 |
+
# Plots
|
| 207 |
col1, col2 = st.columns(2)
|
| 208 |
with col1:
|
| 209 |
st.subheader("ECL by Segment")
|
| 210 |
+
fig, ax = plt.subplots(figsize=(8, 3))
|
| 211 |
+
ax.bar(ecl_df["segment"], ecl_df["ECL"])
|
| 212 |
+
ax.set_xlabel("Segment"); ax.set_ylabel("ECL"); plt.xticks(rotation=45)
|
|
|
|
| 213 |
st.pyplot(fig)
|
| 214 |
with col2:
|
| 215 |
st.subheader("PD by Segment")
|
| 216 |
+
fig2, ax2 = plt.subplots(figsize=(8, 3))
|
| 217 |
+
ax2.bar(ecl_df["segment"], ecl_df["PD"], color="gray")
|
| 218 |
+
ax2.set_xlabel("Segment"); ax2.set_ylabel("PD"); plt.xticks(rotation=45)
|
|
|
|
| 219 |
st.pyplot(fig2)
|
| 220 |
|
| 221 |
+
# Select single segment for Gemini
|
| 222 |
+
st.subheader("Analyze one segment (single API call)")
|
| 223 |
+
selected = st.selectbox("Choose a segment to analyze", ecl_df["segment"].tolist())
|
| 224 |
+
row = ecl_df[ecl_df["segment"] == selected].iloc[0]
|
| 225 |
+
st.write(f"PD: {row.PD:.3f} | LGD: {row.LGD:.3f} | EAD: {row.EAD:,.0f} | ECL: {row.ECL:,.0f}")
|
| 226 |
+
|
| 227 |
+
# Optionally show top segments only to reduce API usage
|
| 228 |
+
top_n = st.number_input("Show top N segments by ECL (for reference)", min_value=1, max_value=len(ecl_df), value=5)
|
| 229 |
+
st.write(ecl_df.sort_values("ECL", ascending=False).head(top_n))
|
| 230 |
+
|
| 231 |
+
if st.button("Request Gemini decision for selected segment"):
|
| 232 |
+
with st.spinner("Querying Gemini (single call)..."):
|
| 233 |
+
decision = get_gemini_decision_single(row["segment"], row["PD"], row["LGD"], row["EAD"], row["ECL"])
|
| 234 |
+
# save
|
| 235 |
+
rec_id = save_report_to_db(row, decision, st.session_state.username)
|
| 236 |
+
st.success("Decision recorded")
|
| 237 |
+
st.json({"record_id": rec_id, "segment": row["segment"], "decision": decision})
|
| 238 |
+
|
| 239 |
+
# Historical reports section
|
| 240 |
+
st.subheader("Past Reports")
|
| 241 |
+
reports_df = load_reports_from_db(st.session_state.username, st.session_state.role)
|
| 242 |
+
if not reports_df.empty:
|
| 243 |
+
st.dataframe(reports_df, width='stretch')
|
| 244 |
+
# allow filtering by action
|
| 245 |
+
action_filter = st.selectbox("Filter by action (All / increase_interest / reduce_disbursement / maintain)", ["All", "increase_interest", "reduce_disbursement", "maintain"])
|
| 246 |
+
if action_filter != "All":
|
| 247 |
+
st.dataframe(reports_df[reports_df["action"] == action_filter], width='stretch')
|
| 248 |
+
if st.button("Download reports CSV"):
|
| 249 |
+
st.download_button("Download", reports_df.to_csv(index=False).encode("utf-8"), file_name="reports.csv", mime="text/csv")
|
| 250 |
else:
|
| 251 |
+
st.info("No reports recorded yet (use 'Request Gemini decision' to create one).")
|