AKKI-AFK commited on
Commit
4b71f90
·
verified ·
1 Parent(s): e2583c6

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +212 -76
src/streamlit_app.py CHANGED
@@ -1,35 +1,92 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  import google.generativeai as genai
5
- import json, os, re
6
-
7
- # ====== CONFIG ======
8
- st.set_page_config(page_title="ECL Risk Analyzer", layout="wide")
9
- genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
10
-
11
- # ====== HELPERS ======
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @st.cache_data
13
- def process_loan_data(df: pd.DataFrame):
14
- df = df.dropna(subset=["loan_intent", "credit_score", "loan_amnt", "loan_status"])
 
 
 
15
  df["loan_status"] = df["loan_status"].astype(int)
16
- group = df.groupby("loan_intent")
 
 
17
  pd_seg = group["loan_status"].mean()
18
- lgd_seg = (1 - group["credit_score"].mean() / 850)
19
  ead_seg = group["loan_amnt"].sum()
20
  ecl_seg = pd_seg * lgd_seg * ead_seg
21
  ecl_df = pd.concat([pd_seg, lgd_seg, ead_seg, ecl_seg], axis=1)
22
  ecl_df.columns = ["PD", "LGD", "EAD", "ECL"]
23
- return ecl_df.reset_index()
24
-
25
- def get_gemini_decision(segment, pd_val, lgd_val, ead_val, ecl_val):
26
- """Gemini-backed risk decision. Single call, valid for all SDK versions."""
27
- model = genai.GenerativeModel("gemini-2.5-flash-lite")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  prompt = f"""
30
- You are a financial risk advisor.
31
- Analyze the following loan segment and return ONLY valid JSON.
32
- Schema:
33
  {{"action":"increase_interest"|"reduce_disbursement"|"maintain","rationale":"string","confidence":float}}
34
 
35
  Segment: {segment}
@@ -39,77 +96,156 @@ EAD: {ead_val:,.0f}
39
  ECL: {ecl_val:,.0f}
40
 
41
  Rules:
42
- - PD > 0.25 increase_interest
43
- - 0.20 PD 0.25 reduce_disbursement
44
- - PD < 0.15 maintain
45
 
46
- Respond with one JSON object only.
47
  """
48
 
 
49
  try:
50
- resp = model.generate_content(prompt, generation_config={"temperature": 0.1})
51
- text = resp.text.strip()
52
-
53
- # clean markdown fences if any
54
- text = re.sub(r"^```json", "", text)
55
- text = re.sub(r"^```", "", text)
56
- text = re.sub(r"```$", "", text)
57
-
58
- # extract first {...} JSON block
59
- match = re.search(r"\{.*\}", text, re.DOTALL)
60
- if match:
61
- text = match.group(0)
62
-
63
- data = json.loads(text)
64
  return data
65
-
66
  except Exception as e:
67
- st.warning(f"⚠️ Gemini output parse failed: {e}")
68
- st.text_area("Raw Gemini output", value=resp.text if 'resp' in locals() else "No response", height=150)
69
- return {"action": "maintain", "rationale": "Fallback - parse failure", "confidence": 0.0}
70
-
71
- # ====== UI ======
72
- st.title("📊 Expected Credit Loss (ECL) Risk Dashboard")
73
- st.write("Upload your **loan dataset**, review segment-level ECL metrics, and analyze one segment at a time with Gemini.")
74
-
75
- uploaded = st.file_uploader("Upload CSV dataset", type=["csv"])
76
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  if uploaded:
78
  df = pd.read_csv(uploaded)
79
- ecl_df = process_loan_data(df)
80
- st.success("Dataset processed successfully.")
81
- st.dataframe(ecl_df, use_container_width=True, hide_index=True)
82
-
83
- # --- Visual overview ---
 
 
 
 
84
  col1, col2 = st.columns(2)
85
  with col1:
86
  st.subheader("ECL by Segment")
87
- fig, ax = plt.subplots(figsize=(6, 3))
88
- ax.bar(ecl_df["loan_intent"], ecl_df["ECL"])
89
- ax.set_xlabel("Segment"); ax.set_ylabel("ECL")
90
- plt.xticks(rotation=45)
91
  st.pyplot(fig)
92
  with col2:
93
  st.subheader("PD by Segment")
94
- fig2, ax2 = plt.subplots(figsize=(6, 3))
95
- ax2.bar(ecl_df["loan_intent"], ecl_df["PD"], color="gray")
96
- ax2.set_xlabel("Segment"); ax2.set_ylabel("PD")
97
- plt.xticks(rotation=45)
98
  st.pyplot(fig2)
99
 
100
- # --- Segment selection ---
101
- st.subheader("Analyze Specific Segment")
102
- segments = ecl_df["loan_intent"].unique().tolist()
103
- selected_segment = st.selectbox("Choose a segment:", segments)
104
-
105
- row = ecl_df[ecl_df["loan_intent"] == selected_segment].iloc[0]
106
- st.write(f"**PD:** {row.PD:.3f} | **LGD:** {row.LGD:.3f} | **EAD:** {row.EAD:,.0f} | **ECL:** {row.ECL:,.0f}")
107
-
108
- if st.button("Generate Gemini Decision"):
109
- with st.spinner("Querying Gemini..."):
110
- decision = get_gemini_decision(row["loan_intent"], row["PD"], row["LGD"], row["EAD"], row["ECL"])
111
- st.success("Gemini Decision:")
112
- st.json(decision)
113
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  else:
115
- st.info("Upload a CSV file to begin.")
 
1
+ # app.py
2
  import streamlit as st
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
5
  import google.generativeai as genai
6
+ import json, os, re, time
7
+ from datetime import datetime
8
+ from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime
9
+ from sqlalchemy.orm import declarative_base, sessionmaker
10
+
11
+ # ========== CONFIG ==========
12
+ st.set_page_config(page_title="ECL Decision Assistant", layout="wide")
13
+ GEN_API_KEY = os.getenv("GEMINI_API_KEY")
14
+ if GEN_API_KEY:
15
+ genai.configure(api_key=GEN_API_KEY)
16
+ else:
17
+ st.warning("GEMINI_API_KEY not found in env. Set it in HF Space secrets to enable AI decisions.")
18
+
19
+ # Simple credential store (replace with secure store in production)
20
+ USERS = {
21
+ "analyst": {"password": "analyst123", "role": "analyst"},
22
+ "cro": {"password": "cro123", "role": "cro"},
23
+ }
24
+
25
+ # SQLite DB for persisting reports
26
+ DB_FILE = "reports.db"
27
+ engine = create_engine(f"sqlite:///{DB_FILE}", connect_args={"check_same_thread": False})
28
+ Base = declarative_base()
29
+ SessionLocal = sessionmaker(bind=engine)
30
+
31
+ class Report(Base):
32
+ __tablename__ = "reports"
33
+ id = Column(Integer, primary_key=True, index=True)
34
+ segment = Column(String)
35
+ pd = Column(Float)
36
+ lgd = Column(Float)
37
+ ead = Column(Float)
38
+ ecl = Column(Float)
39
+ action = Column(String)
40
+ rationale = Column(String)
41
+ confidence = Column(Float)
42
+ generated_by = Column(String)
43
+ created_at = Column(DateTime)
44
+
45
+ Base.metadata.create_all(bind=engine)
46
+
47
+ # ========== UTILITIES ==========
48
  @st.cache_data
49
+ def process_loan_data(df: pd.DataFrame, segment_col: str = "loan_intent"):
50
+ """Compute PD, LGD, EAD, ECL by segment column."""
51
+ required = [segment_col, "credit_score", "loan_amnt", "loan_status"]
52
+ df = df.dropna(subset=required)
53
+ # ensure types
54
  df["loan_status"] = df["loan_status"].astype(int)
55
+ df["credit_score"] = df["credit_score"].astype(float)
56
+ df["loan_amnt"] = df["loan_amnt"].astype(float)
57
+ group = df.groupby(segment_col)
58
  pd_seg = group["loan_status"].mean()
59
+ lgd_seg = (1 - group["credit_score"].mean() / 850).clip(lower=0.0)
60
  ead_seg = group["loan_amnt"].sum()
61
  ecl_seg = pd_seg * lgd_seg * ead_seg
62
  ecl_df = pd.concat([pd_seg, lgd_seg, ead_seg, ecl_seg], axis=1)
63
  ecl_df.columns = ["PD", "LGD", "EAD", "ECL"]
64
+ ecl_df = ecl_df.reset_index().rename(columns={segment_col: "segment"})
65
+ return ecl_df
66
+
67
+ def sanitize_parse_json(text: str):
68
+ """Extract first JSON object in text and parse it."""
69
+ if not text:
70
+ raise ValueError("Empty response")
71
+ # remove common markdown fences
72
+ text = re.sub(r"^```json\s*", "", text, flags=re.IGNORECASE)
73
+ text = re.sub(r"^```\s*", "", text)
74
+ text = re.sub(r"```$", "", text)
75
+ # find JSON block
76
+ m = re.search(r"\{.*\}", text, flags=re.DOTALL)
77
+ if m:
78
+ text = m.group(0)
79
+ # attempt load
80
+ return json.loads(text)
81
+
82
+ def get_gemini_decision_single(segment, pd_val, lgd_val, ead_val, ecl_val):
83
+ """Single Gemini call per selected segment. Robust cleaning. Returns dict."""
84
+ # If API key missing, return deterministic fallback
85
+ if not GEN_API_KEY:
86
+ return {"action": "maintain", "rationale": "No API key configured", "confidence": 0.0}
87
 
88
  prompt = f"""
89
+ You are a financial risk advisor. Return ONLY one valid JSON object with this schema:
 
 
90
  {{"action":"increase_interest"|"reduce_disbursement"|"maintain","rationale":"string","confidence":float}}
91
 
92
  Segment: {segment}
 
96
  ECL: {ecl_val:,.0f}
97
 
98
  Rules:
99
+ - PD > 0.25 => increase_interest
100
+ - 0.20 <= PD <= 0.25 => reduce_disbursement
101
+ - PD < 0.15 => maintain
102
 
103
+ Respond with a single JSON object and nothing else.
104
  """
105
 
106
+ # Use model.generate_content with single prompt string (compat for HF)
107
  try:
108
+ model = genai.GenerativeModel("gemini-2.5-flash-lite")
109
+ resp = model.generate_content(prompt, generation_config={"temperature": 0.05})
110
+ raw = resp.text if hasattr(resp, "text") else str(resp)
111
+ # parse
112
+ data = sanitize_parse_json(raw)
113
+ # validate keys
114
+ for k in ("action", "rationale", "confidence"):
115
+ if k not in data:
116
+ raise ValueError(f"Missing key: {k}")
 
 
 
 
 
117
  return data
 
118
  except Exception as e:
119
+ # handle rate limits explicitly
120
+ msg = str(e)
121
+ if "429" in msg or "Resource exhausted" in msg:
122
+ return {"action": "maintain", "rationale": "API quota exhausted - retry later", "confidence": 0.0}
123
+ # fallback deterministic rule as final fallback
124
+ if pd_val > 0.25:
125
+ return {"action": "increase_interest", "rationale": "PD > 0.25 (deterministic fallback)", "confidence": 0.8}
126
+ if 0.20 <= pd_val <= 0.25:
127
+ return {"action": "reduce_disbursement", "rationale": "PD in 0.20-0.25 (deterministic fallback)", "confidence": 0.7}
128
+ return {"action": "maintain", "rationale": "Fallback - parse or API error", "confidence": 0.0}
129
+
130
+ def save_report_to_db(row, decision, username):
131
+ s = SessionLocal()
132
+ r = Report(
133
+ segment=row["segment"],
134
+ pd=float(row["PD"]),
135
+ lgd=float(row["LGD"]),
136
+ ead=float(row["EAD"]),
137
+ ecl=float(row["ECL"]),
138
+ action=decision.get("action"),
139
+ rationale=decision.get("rationale"),
140
+ confidence=float(decision.get("confidence", 0.0)),
141
+ generated_by=username,
142
+ created_at=datetime.utcnow()
143
+ )
144
+ s.add(r)
145
+ s.commit()
146
+ s.refresh(r)
147
+ s.close()
148
+ return r.id
149
+
150
+ def load_reports_from_db(username, role):
151
+ s = SessionLocal()
152
+ if role == "cro":
153
+ rows = s.query(Report).order_by(Report.created_at.desc()).all()
154
+ else:
155
+ rows = s.query(Report).filter(Report.generated_by == username).order_by(Report.created_at.desc()).all()
156
+ df = pd.DataFrame([{
157
+ "id": r.id,
158
+ "segment": r.segment,
159
+ "pd": r.pd,
160
+ "lgd": r.lgd,
161
+ "ead": r.ead,
162
+ "ecl": r.ecl,
163
+ "action": r.action,
164
+ "rationale": r.rationale,
165
+ "confidence": r.confidence,
166
+ "generated_by": r.generated_by,
167
+ "created_at": r.created_at
168
+ } for r in rows])
169
+ s.close()
170
+ return df
171
+
172
+ # ========== UI - AUTH ==========
173
+ st.sidebar.title("Login")
174
+ username = st.sidebar.text_input("Username")
175
+ password = st.sidebar.text_input("Password", type="password")
176
+ if "auth_ok" not in st.session_state:
177
+ st.session_state.auth_ok = False
178
+ if st.sidebar.button("Sign in"):
179
+ user = USERS.get(username)
180
+ if user and user["password"] == password:
181
+ st.session_state.auth_ok = True
182
+ st.session_state.username = username
183
+ st.session_state.role = user["role"]
184
+ st.sidebar.success(f"Signed in as {username} ({user['role']})")
185
+ else:
186
+ st.sidebar.error("Invalid credentials")
187
+ if not st.session_state.auth_ok:
188
+ st.stop()
189
+
190
+ # ========== MAIN ==========
191
+ st.header("ECL Decision Assistant")
192
+ st.write(f"Signed in as **{st.session_state.username}** ({st.session_state.role})")
193
+
194
+ # Upload CSV
195
+ uploaded = st.file_uploader("Upload loan CSV (must contain loan_intent, credit_score, loan_amnt, loan_status)", type=["csv"])
196
  if uploaded:
197
  df = pd.read_csv(uploaded)
198
+ st.write("Sample rows:")
199
+ st.dataframe(df.head(), width='stretch')
200
+ # allow user to choose segmentation column
201
+ seg_col = st.selectbox("Segment by column", options=[c for c in df.columns if df[c].dtype == object] , index=0)
202
+ ecl_df = process_loan_data(df, segment_col=seg_col)
203
+ st.subheader("Segment-level ECL Summary")
204
+ st.dataframe(ecl_df, width='stretch')
205
+
206
+ # Plots
207
  col1, col2 = st.columns(2)
208
  with col1:
209
  st.subheader("ECL by Segment")
210
+ fig, ax = plt.subplots(figsize=(8, 3))
211
+ ax.bar(ecl_df["segment"], ecl_df["ECL"])
212
+ ax.set_xlabel("Segment"); ax.set_ylabel("ECL"); plt.xticks(rotation=45)
 
213
  st.pyplot(fig)
214
  with col2:
215
  st.subheader("PD by Segment")
216
+ fig2, ax2 = plt.subplots(figsize=(8, 3))
217
+ ax2.bar(ecl_df["segment"], ecl_df["PD"], color="gray")
218
+ ax2.set_xlabel("Segment"); ax2.set_ylabel("PD"); plt.xticks(rotation=45)
 
219
  st.pyplot(fig2)
220
 
221
+ # Select single segment for Gemini
222
+ st.subheader("Analyze one segment (single API call)")
223
+ selected = st.selectbox("Choose a segment to analyze", ecl_df["segment"].tolist())
224
+ row = ecl_df[ecl_df["segment"] == selected].iloc[0]
225
+ st.write(f"PD: {row.PD:.3f} | LGD: {row.LGD:.3f} | EAD: {row.EAD:,.0f} | ECL: {row.ECL:,.0f}")
226
+
227
+ # Optionally show top segments only to reduce API usage
228
+ top_n = st.number_input("Show top N segments by ECL (for reference)", min_value=1, max_value=len(ecl_df), value=5)
229
+ st.write(ecl_df.sort_values("ECL", ascending=False).head(top_n))
230
+
231
+ if st.button("Request Gemini decision for selected segment"):
232
+ with st.spinner("Querying Gemini (single call)..."):
233
+ decision = get_gemini_decision_single(row["segment"], row["PD"], row["LGD"], row["EAD"], row["ECL"])
234
+ # save
235
+ rec_id = save_report_to_db(row, decision, st.session_state.username)
236
+ st.success("Decision recorded")
237
+ st.json({"record_id": rec_id, "segment": row["segment"], "decision": decision})
238
+
239
+ # Historical reports section
240
+ st.subheader("Past Reports")
241
+ reports_df = load_reports_from_db(st.session_state.username, st.session_state.role)
242
+ if not reports_df.empty:
243
+ st.dataframe(reports_df, width='stretch')
244
+ # allow filtering by action
245
+ action_filter = st.selectbox("Filter by action (All / increase_interest / reduce_disbursement / maintain)", ["All", "increase_interest", "reduce_disbursement", "maintain"])
246
+ if action_filter != "All":
247
+ st.dataframe(reports_df[reports_df["action"] == action_filter], width='stretch')
248
+ if st.button("Download reports CSV"):
249
+ st.download_button("Download", reports_df.to_csv(index=False).encode("utf-8"), file_name="reports.csv", mime="text/csv")
250
  else:
251
+ st.info("No reports recorded yet (use 'Request Gemini decision' to create one).")