GenAICoder commited on
Commit
bd0729b
·
verified ·
1 Parent(s): 201b105

Update visualization/segmentation_ranking.py

Browse files
Files changed (1) hide show
  1. visualization/segmentation_ranking.py +391 -0
visualization/segmentation_ranking.py CHANGED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # visualizations/segment_ranking.py
2
+
3
+ import plotly.graph_objects as go
4
+ import plotly.express as px
5
+ import pandas as pd
6
+ from metrics.metric_registry import METRIC_FUNCTIONS
7
+ from analytics.performance_analysis import generate_metric_view
8
+
9
+
10
+ def calculate_segment_risk_score(
11
+ df,
12
+ metric_name,
13
+ category
14
+ ):
15
+ """
16
+ Calculate risk scores for each segment in a category.
17
+
18
+ Args:
19
+ df: Master dataframe
20
+ metric_name: Metric name for risk calculation
21
+ category: Segmentation category
22
+
23
+ Returns:
24
+ DataFrame with segment and risk score
25
+ """
26
+
27
+ result = generate_metric_view(
28
+ df=df,
29
+ metric_name=metric_name,
30
+ group_col=category
31
+ )
32
+
33
+ rate_col = [
34
+ col for col in result.columns
35
+ if "rate" in col.lower()
36
+ ][0]
37
+
38
+ # Calculate average rate per segment
39
+ segment_risk = (
40
+ result.groupby(category)
41
+ .agg({
42
+ rate_col: "mean",
43
+ "total_accounts": "sum",
44
+ "total_balance": "sum"
45
+ })
46
+ .reset_index()
47
+ )
48
+
49
+ segment_risk = segment_risk.rename(
50
+ columns={
51
+ category: "Segment",
52
+ rate_col: "Risk_Score"
53
+ }
54
+ )
55
+
56
+ return segment_risk
57
+
58
+
59
+ def generate_segment_risk_heatmap(
60
+ df,
61
+ metrics=None,
62
+ categories=None
63
+ ):
64
+ """
65
+ Generate heatmap showing risk scores across segments and metrics.
66
+
67
+ Args:
68
+ df: Master dataframe
69
+ metrics: List of metrics to evaluate
70
+ categories: List of categories to analyze
71
+
72
+ Returns:
73
+ Plotly figure with heatmap
74
+ """
75
+
76
+ if metrics is None:
77
+ metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"]
78
+
79
+ if categories is None:
80
+ categories = [
81
+ "fico_band",
82
+ "sourcing_channel",
83
+ "city_tier",
84
+ "occupation_type"
85
+ ]
86
+
87
+ # Prepare data for heatmap
88
+ heatmap_data = {}
89
+ all_segments = {}
90
+
91
+ for metric in metrics:
92
+
93
+ metric_scores = {}
94
+
95
+ for category in categories:
96
+
97
+ try:
98
+ segment_risk = calculate_segment_risk_score(
99
+ df=df,
100
+ metric_name=metric,
101
+ category=category
102
+ )
103
+
104
+ for _, row in segment_risk.iterrows():
105
+ segment_key = f"{category}_{row['Segment']}"
106
+ metric_scores[segment_key] = row["Risk_Score"]
107
+ all_segments[segment_key] = f"{category.replace('_', ' ').title()}: {row['Segment']}"
108
+
109
+ except Exception as e:
110
+ print(f"Error processing {metric} x {category}: {e}")
111
+
112
+ heatmap_data[metric] = metric_scores
113
+
114
+ # Create DataFrame for heatmap
115
+ heatmap_df = pd.DataFrame(heatmap_data)
116
+ heatmap_df = heatmap_df.fillna(0)
117
+
118
+ # Sort by average risk
119
+ heatmap_df["avg_risk"] = heatmap_df.mean(axis=1)
120
+ heatmap_df = heatmap_df.sort_values("avg_risk", ascending=False)
121
+ heatmap_df = heatmap_df.drop("avg_risk", axis=1)
122
+
123
+ # Create heatmap
124
+ fig = go.Figure(
125
+ data=go.Heatmap(
126
+ z=heatmap_df.values,
127
+ x=heatmap_df.columns,
128
+ y=[all_segments.get(idx, idx) for idx in heatmap_df.index],
129
+ colorscale="RdYlGn_r",
130
+ hovertemplate=(
131
+ "<b>Segment: %{y}</b><br>" +
132
+ "<b>Metric: %{x}</b><br>" +
133
+ "Risk Score: %{z:.2f}%<br>" +
134
+ "<extra></extra>"
135
+ ),
136
+ text=[[f"{val:.2f}%" for val in row] for row in heatmap_df.values],
137
+ texttemplate="%{text}",
138
+ textfont={"size": 10},
139
+ colorbar=dict(
140
+ title="Risk Score<br>(%)"
141
+ )
142
+ )
143
+ )
144
+
145
+ fig.update_layout(
146
+ title="Segment Risk Heatmap Across Delinquency Metrics",
147
+ xaxis_title="Delinquency Metrics",
148
+ yaxis_title="Segments",
149
+ height=max(400, len(heatmap_df) * 25),
150
+ template="plotly_white",
151
+ hovermode="closest"
152
+ )
153
+
154
+ return fig
155
+
156
+
157
+ def generate_segment_risk_ranking(
158
+ df,
159
+ metric_name,
160
+ category,
161
+ top_n=10
162
+ ):
163
+ """
164
+ Generate bar chart ranking segments by risk within a category.
165
+
166
+ Args:
167
+ df: Master dataframe
168
+ metric_name: Metric name for risk calculation
169
+ category: Segmentation category
170
+ top_n: Number of top risk segments to display
171
+
172
+ Returns:
173
+ Plotly bar chart figure
174
+ """
175
+
176
+ segment_risk = calculate_segment_risk_score(
177
+ df=df,
178
+ metric_name=metric_name,
179
+ category=category
180
+ )
181
+
182
+ # Sort by risk score descending
183
+ segment_risk = segment_risk.sort_values(
184
+ "Risk_Score",
185
+ ascending=True
186
+ ).tail(top_n)
187
+
188
+ # Color code by risk level
189
+ colors = ["#d62728" if score > 10 else "#ff7f0e" if score > 5 else "#2ca02c"
190
+ for score in segment_risk["Risk_Score"]]
191
+
192
+ fig = go.Figure(
193
+ data=go.Bar(
194
+ y=segment_risk["Segment"],
195
+ x=segment_risk["Risk_Score"],
196
+ orientation="h",
197
+ marker=dict(
198
+ color=colors,
199
+ line=dict(color="white", width=1)
200
+ ),
201
+ text=segment_risk["Risk_Score"],
202
+ texttemplate="%{text:.2f}%",
203
+ textposition="outside",
204
+ hovertemplate=(
205
+ "<b>Segment: %{y}</b><br>" +
206
+ "Risk Score: %{x:.2f}%<br>" +
207
+ "Accounts: %{customdata[0]}<br>" +
208
+ "Balance: %{customdata[1]:,.0f}<br>" +
209
+ "<extra></extra>"
210
+ ),
211
+ customdata=segment_risk[["total_accounts", "total_balance"]].values
212
+ )
213
+ )
214
+
215
+ fig.update_layout(
216
+ title=f"Top {top_n} High-Risk Segments: {metric_name} by {category.replace('_', ' ').title()}",
217
+ xaxis_title="Risk Score (%)",
218
+ yaxis_title=category.replace('_', ' ').title(),
219
+ height=400 + (top_n * 15),
220
+ template="plotly_white",
221
+ hovermode="closest"
222
+ )
223
+
224
+ fig.update_xaxes(
225
+ showgrid=True,
226
+ gridwidth=1,
227
+ gridcolor="lightgray"
228
+ )
229
+
230
+ return fig
231
+
232
+
233
+ def generate_multi_category_risk_comparison(
234
+ df,
235
+ metric_name
236
+ ):
237
+ """
238
+ Compare risk across all categories for a single metric.
239
+
240
+ Args:
241
+ df: Master dataframe
242
+ metric_name: Metric name for risk calculation
243
+
244
+ Returns:
245
+ Plotly figure with subplots (one per category)
246
+ """
247
+
248
+ categories = [
249
+ "fico_band",
250
+ "sourcing_channel",
251
+ "city_tier",
252
+ "occupation_type"
253
+ ]
254
+
255
+ # Create subplots
256
+ from plotly.subplots import make_subplots
257
+
258
+ fig = make_subplots(
259
+ rows=2,
260
+ cols=2,
261
+ subplot_titles=[cat.replace('_', ' ').title() for cat in categories],
262
+ specs=[
263
+ [{"type": "bar"}, {"type": "bar"}],
264
+ [{"type": "bar"}, {"type": "bar"}]
265
+ ]
266
+ )
267
+
268
+ positions = [
269
+ (1, 1),
270
+ (1, 2),
271
+ (2, 1),
272
+ (2, 2)
273
+ ]
274
+
275
+ max_segments = 0
276
+
277
+ for category, (row, col) in zip(categories, positions):
278
+
279
+ try:
280
+ segment_risk = calculate_segment_risk_score(
281
+ df=df,
282
+ metric_name=metric_name,
283
+ category=category
284
+ )
285
+
286
+ # Sort and take top 5
287
+ segment_risk = segment_risk.sort_values(
288
+ "Risk_Score",
289
+ ascending=True
290
+ ).tail(5)
291
+
292
+ max_segments = max(max_segments, len(segment_risk))
293
+
294
+ fig.add_trace(
295
+ go.Bar(
296
+ y=segment_risk["Segment"],
297
+ x=segment_risk["Risk_Score"],
298
+ orientation="h",
299
+ name=category,
300
+ showlegend=False,
301
+ marker=dict(
302
+ color=segment_risk["Risk_Score"],
303
+ colorscale="Reds",
304
+ showscale=False
305
+ ),
306
+ text=segment_risk["Risk_Score"],
307
+ texttemplate="%{text:.2f}%",
308
+ textposition="outside",
309
+ hovertemplate=(
310
+ "<b>%{y}</b><br>" +
311
+ "Risk Score: %{x:.2f}%<br>" +
312
+ "<extra></extra>"
313
+ )
314
+ ),
315
+ row=row,
316
+ col=col
317
+ )
318
+
319
+ fig.update_xaxes(
320
+ title_text="Risk Score (%)",
321
+ row=row,
322
+ col=col
323
+ )
324
+
325
+ except Exception as e:
326
+ print(f"Error processing category {category}: {e}")
327
+
328
+ fig.update_layout(
329
+ title_text=f"High-Risk Segments Across Categories: {metric_name}",
330
+ height=800,
331
+ template="plotly_white",
332
+ hovermode="closest"
333
+ )
334
+
335
+ return fig
336
+
337
+
338
+ def calculate_portfolio_risk_summary(
339
+ df,
340
+ metrics=None
341
+ ):
342
+ """
343
+ Calculate overall portfolio risk summary across metrics and categories.
344
+
345
+ Args:
346
+ df: Master dataframe
347
+ metrics: List of metrics to evaluate
348
+
349
+ Returns:
350
+ DataFrame with portfolio risk summary
351
+ """
352
+
353
+ if metrics is None:
354
+ metrics = ["30+@3", "30+@6", "60+@6", "Yr1 NCL"]
355
+
356
+ summary_data = []
357
+
358
+ categories = [
359
+ "fico_band",
360
+ "sourcing_channel",
361
+ "city_tier",
362
+ "occupation_type"
363
+ ]
364
+
365
+ for metric in metrics:
366
+ for category in categories:
367
+ try:
368
+ segment_risk = calculate_segment_risk_score(
369
+ df=df,
370
+ metric_name=metric,
371
+ category=category
372
+ )
373
+
374
+ avg_risk = segment_risk["Risk_Score"].mean()
375
+ max_risk = segment_risk["Risk_Score"].max()
376
+ high_risk_count = len(segment_risk[segment_risk["Risk_Score"] > 10])
377
+
378
+ summary_data.append({
379
+ "Metric": metric,
380
+ "Category": category.replace('_', ' ').title(),
381
+ "Avg_Risk": avg_risk,
382
+ "Max_Risk": max_risk,
383
+ "High_Risk_Segments": high_risk_count
384
+ })
385
+
386
+ except Exception as e:
387
+ print(f"Error calculating summary for {metric} x {category}: {e}")
388
+
389
+ summary_df = pd.DataFrame(summary_data)
390
+
391
+ return summary_df