entropy25 commited on
Commit
5bacdfa
·
verified ·
1 Parent(s): 102b105

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +482 -400
app.py CHANGED
@@ -34,12 +34,13 @@ COLORS = {
34
  'indigo': '#6366f1'
35
  }
36
 
37
- plt.style.use('seaborn-v0_8-whitegrid')
38
  sns.set_palette("husl")
39
 
40
  class B2BCustomerAnalytics:
41
  def __init__(self):
42
  self.df = None
 
43
  self.model = None
44
  self.feature_importance = None
45
  self.predictions = None
@@ -50,188 +51,244 @@ class B2BCustomerAnalytics:
50
  if file is None:
51
  return "Please upload a CSV file", None, None, None
52
 
 
53
  self.df = pd.read_csv(file.name)
54
 
 
55
  required_columns = ['customer_id', 'order_date', 'amount']
56
- missing_cols = [col for col in required_columns if col not in self.df.columns]
57
- if missing_cols:
58
- return f"Missing required columns: {missing_cols}", None, None, None
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  self.df['order_date'] = pd.to_datetime(self.df['order_date'])
61
 
62
- if 'recency_days' not in self.df.columns or 'frequency' not in self.df.columns or 'monetary' not in self.df.columns:
63
- self.df = self.calculate_rfm_metrics(self.df)
64
 
65
- self.df = self.perform_customer_segmentation(self.df)
 
66
 
 
67
  summary_html, kpi_cards = self.generate_summary_dashboard()
68
 
69
- return "Data loaded successfully!", summary_html, self.df.head(20), kpi_cards
70
 
71
  except Exception as e:
72
  return f"Error loading data: {str(e)}", None, None, None
73
 
74
  def calculate_rfm_metrics(self, df):
75
  """Calculate RFM metrics from transaction data"""
76
- current_date = df['order_date'].max() + timedelta(days=1)
77
-
78
- customer_metrics = df.groupby('customer_id').agg({
79
- 'order_date': ['max', 'count'],
80
- 'amount': ['sum', 'mean']
81
- }).round(2)
82
-
83
- customer_metrics.columns = ['last_order_date', 'frequency', 'monetary', 'avg_order_value']
84
- customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
85
-
86
- df_with_rfm = df.merge(customer_metrics[['recency_days', 'frequency', 'monetary']],
87
- left_on='customer_id', right_index=True, how='left')
88
-
89
- return df_with_rfm
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  def perform_customer_segmentation(self, df):
92
  """Perform customer segmentation based on RFM analysis"""
93
- customer_df = df.groupby('customer_id').agg({
94
- 'recency_days': 'first',
95
- 'frequency': 'first',
96
- 'monetary': 'first'
97
- }).reset_index()
98
-
99
- customer_df['R_Score'] = pd.qcut(customer_df['recency_days'].rank(method='first'), 5, labels=[5,4,3,2,1])
100
- customer_df['F_Score'] = pd.qcut(customer_df['frequency'].rank(method='first'), 5, labels=[1,2,3,4,5])
101
- customer_df['M_Score'] = pd.qcut(customer_df['monetary'].rank(method='first'), 5, labels=[1,2,3,4,5])
102
-
103
- customer_df['R_Score'] = customer_df['R_Score'].astype(int)
104
- customer_df['F_Score'] = customer_df['F_Score'].astype(int)
105
- customer_df['M_Score'] = customer_df['M_Score'].astype(int)
106
-
107
- def segment_customers(row):
108
- if row['R_Score'] >= 4 and row['F_Score'] >= 4 and row['M_Score'] >= 4:
109
- return 'Champions'
110
- elif row['R_Score'] >= 3 and row['F_Score'] >= 3 and row['M_Score'] >= 3:
111
- return 'Loyal Customers'
112
- elif row['R_Score'] >= 3 and row['F_Score'] >= 2:
113
- return 'Potential Loyalists'
114
- elif row['R_Score'] >= 4 and row['F_Score'] <= 2:
115
- return 'New Customers'
116
- elif row['R_Score'] <= 2 and row['F_Score'] >= 3:
117
- return 'At Risk'
118
- elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] >= 3:
119
- return 'Cannot Lose Them'
120
- elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] <= 2:
121
- return 'Lost Customers'
122
- else:
123
- return 'Others'
124
-
125
- customer_df['Segment'] = customer_df.apply(segment_customers, axis=1)
126
-
127
- customer_df['Churn_Risk'] = customer_df.apply(lambda x:
128
- 'High' if x['Segment'] in ['Lost Customers', 'At Risk'] else
129
- 'Medium' if x['Segment'] in ['Others', 'Cannot Lose Them'] else 'Low', axis=1)
130
-
131
- segment_data = customer_df[['customer_id', 'Segment', 'Churn_Risk', 'R_Score', 'F_Score', 'M_Score']]
132
- df_with_segments = df.merge(segment_data, on='customer_id', how='left')
133
-
134
- return df_with_segments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  def generate_summary_dashboard(self):
137
  """Generate modern dashboard summary with KPI cards"""
138
- if self.df is None:
139
  return "No data loaded", ""
140
 
141
- total_customers = self.df['customer_id'].nunique()
142
- total_orders = len(self.df)
143
- total_revenue = self.df['amount'].sum()
144
- avg_order_value = self.df['amount'].mean()
145
-
146
- segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
147
- risk_dist = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts()
148
-
149
- # Create modern horizontal dashboard
150
- summary_html = f"""
151
- <div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); padding: 2rem; border-radius: 1rem; color: white; margin-bottom: 2rem; text-align: center;">
152
- <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">
153
- B2B Customer Analytics Platform
154
- </h1>
155
- <p style="font-size: 1.2rem; opacity: 0.9;">
156
- Enterprise Customer Health Monitoring & Churn Prediction System
157
- </p>
158
- </div>
159
-
160
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1.5rem; margin-bottom: 3rem;">
161
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #3b82f6;">
162
- <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
163
- <div style="padding: 0.75rem; background: #dbeafe; border-radius: 0.5rem; color: #1d4ed8;">📊</div>
164
- <span style="font-size: 2rem; font-weight: bold; color: #3b82f6;">{total_customers:,}</span>
165
- </div>
166
- <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Total Customers</h3>
167
- <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Active enterprise clients</p>
168
- </div>
169
 
170
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #10b981;">
171
- <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
172
- <div style="padding: 0.75rem; background: #d1fae5; border-radius: 0.5rem; color: #047857;">💰</div>
173
- <span style="font-size: 2rem; font-weight: bold; color: #10b981;">${(total_revenue/1000000):.1f}M</span>
174
- </div>
175
- <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Total Revenue</h3>
176
- <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Contract value sum</p>
177
- </div>
178
 
179
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #8b5cf6;">
180
- <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
181
- <div style="padding: 0.75rem; background: #ede9fe; border-radius: 0.5rem; color: #7c3aed;">📈</div>
182
- <span style="font-size: 2rem; font-weight: bold; color: #8b5cf6;">${avg_order_value:.0f}</span>
183
- </div>
184
- <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Avg Order Value</h3>
185
- <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Per order average</p>
 
 
186
  </div>
187
 
188
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #ef4444;">
189
- <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
190
- <div style="padding: 0.75rem; background: #fee2e2; border-radius: 0.5rem; color: #dc2626;">🚨</div>
191
- <span style="font-size: 2rem; font-weight: bold; color: #ef4444;">{risk_dist.get('High', 0)}</span>
 
 
 
 
192
  </div>
193
- <h3 style="color: #1f2937; font-weight: 600; margin: 0;">High Risk Clients</h3>
194
- <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Need immediate attention</p>
195
- </div>
196
-
197
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #f59e0b;">
198
- <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
199
- <div style="padding: 0.75rem; background: #fef3c7; border-radius: 0.5rem; color: #d97706;">🏆</div>
200
- <span style="font-size: 2rem; font-weight: bold; color: #f59e0b;">{segment_dist.get('Champions', 0)}</span>
201
  </div>
202
- <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Champion Customers</h3>
203
- <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Top tier clients</p>
204
- </div>
205
-
206
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #06b6d4;">
207
- <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
208
- <div style="padding: 0.75rem; background: #cffafe; border-radius: 0.5rem; color: #0891b2;"></div>
209
- <span style="font-size: 2rem; font-weight: bold; color: #06b6d4;">{risk_dist.get('Low', 0)}</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  </div>
211
- <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Healthy Customers</h3>
212
- <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Low churn risk</p>
213
  </div>
214
- </div>
215
- """
216
-
217
- kpi_data = [
218
- ["Total Customers", f"{total_customers:,}", "👥", "#3b82f6"],
219
- ["Total Revenue", f"${total_revenue/1000000:.1f}M", "💰", "#10b981"],
220
- ["Avg Order Value", f"${avg_order_value:.0f}", "📈", "#8b5cf6"],
221
- ["High Risk Customers", f"{risk_dist.get('High', 0)}", "🚨", "#ef4444"],
222
- ["Champion Customers", f"{segment_dist.get('Champions', 0)}", "🏆", "#f59e0b"],
223
- ["Healthy Customers", f"{risk_dist.get('Low', 0)}", "✅", "#06b6d4"]
224
- ]
225
-
226
- return summary_html, kpi_data
 
 
227
 
228
  def train_churn_model(self):
229
  """Train churn prediction model"""
230
- if self.df is None:
231
- return "No data available. Please upload a CSV file first.", None
232
 
233
  try:
234
- customer_features = self.df.groupby('customer_id').agg({
 
235
  'recency_days': 'first',
236
  'frequency': 'first',
237
  'monetary': 'first',
@@ -239,40 +296,46 @@ class B2BCustomerAnalytics:
239
  'order_date': ['min', 'max']
240
  }).reset_index()
241
 
 
242
  customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
243
  'avg_amount', 'std_amount', 'min_amount', 'max_amount',
244
  'first_order', 'last_order']
245
 
 
246
  customer_features['std_amount'].fillna(0, inplace=True)
247
 
 
248
  customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
249
  customer_features['customer_lifetime'].fillna(0, inplace=True)
250
 
251
- if 'churn_label' not in self.df.columns:
252
- customer_features['churn_label'] = (customer_features['recency_days'] > 90).astype(int)
253
- else:
254
- churn_labels = self.df.groupby('customer_id')['churn_label'].first().reset_index()
255
- customer_features = customer_features.merge(churn_labels, on='customer_id')
256
 
 
257
  feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
258
  'min_amount', 'max_amount', 'customer_lifetime']
259
 
260
  X = customer_features[feature_cols]
261
  y = customer_features['churn_label']
262
 
 
263
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
264
 
 
265
  self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
266
  self.model.fit(X_train, y_train)
267
 
 
268
  y_pred = self.model.predict(X_test)
269
  y_pred_proba = self.model.predict_proba(X_test)[:, 1]
270
 
 
271
  self.feature_importance = pd.DataFrame({
272
  'feature': feature_cols,
273
  'importance': self.model.feature_importances_
274
  }).sort_values('importance', ascending=False)
275
 
 
276
  all_predictions = self.model.predict_proba(X)[:, 1]
277
  customer_features['churn_probability'] = all_predictions
278
  self.predictions = customer_features
@@ -334,194 +397,209 @@ class B2BCustomerAnalytics:
334
  if self.feature_importance is None:
335
  return None
336
 
337
- fig = px.bar(
338
- self.feature_importance.head(8),
339
- x='importance',
340
- y='feature',
341
- orientation='h',
342
- title='Feature Importance Analysis',
343
- labels={'importance': 'Importance Score', 'feature': 'Features'},
344
- color='importance',
345
- color_continuous_scale='viridis'
346
- )
347
-
348
- fig.update_layout(
349
- height=500,
350
- showlegend=False,
351
- plot_bgcolor='white',
352
- paper_bgcolor='white',
353
- title={
354
- 'text': '<b>Feature Importance Analysis</b>',
355
- 'x': 0.5,
356
- 'xanchor': 'center',
357
- 'font': {'size': 20, 'color': '#1f2937'}
358
- },
359
- font=dict(family="Inter, system-ui, sans-serif", size=12),
360
- yaxis={'categoryorder': 'total ascending'},
361
- xaxis=dict(gridcolor='#f1f5f9'),
362
- yaxis_title=dict(font_size=14),
363
- xaxis_title=dict(font_size=14)
364
- )
365
-
366
- return fig
 
 
 
 
 
367
 
368
  def create_visualizations(self):
369
  """Create comprehensive modern visualizations"""
370
- if self.df is None:
371
  return None, None, None, None
372
 
373
- # 1. Customer Segment Distribution
374
- segment_data = self.df.groupby('customer_id')['Segment'].first().value_counts().reset_index()
375
- segment_data.columns = ['Segment', 'Count']
376
-
377
- fig1 = px.pie(
378
- segment_data,
379
- values='Count',
380
- names='Segment',
381
- title='<b>Customer Segment Distribution</b>',
382
- hole=0.4,
383
- color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899']
384
- )
385
- fig1.update_traces(textposition='inside', textinfo='percent+label', textfont_size=13)
386
- fig1.update_layout(
387
- height=450,
388
- showlegend=True,
389
- title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
390
- font=dict(family="Inter, system-ui, sans-serif", size=12),
391
- paper_bgcolor='white',
392
- plot_bgcolor='white'
393
- )
394
-
395
- # 2. RFM Analysis
396
- customer_rfm = self.df.groupby('customer_id').agg({
397
- 'recency_days': 'first',
398
- 'frequency': 'first',
399
- 'monetary': 'first',
400
- 'Segment': 'first'
401
- }).reset_index()
402
-
403
- fig2 = px.scatter(
404
- customer_rfm,
405
- x='recency_days',
406
- y='frequency',
407
- size='monetary',
408
- color='Segment',
409
- title='<b>RFM Customer Behavior Matrix</b>',
410
- labels={
411
- 'recency_days': 'Days Since Last Purchase',
412
- 'frequency': 'Purchase Frequency',
413
- 'monetary': 'Total Revenue'
414
- },
415
- color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6'],
416
- size_max=60
417
- )
418
- fig2.update_layout(
419
- height=500,
420
- title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
421
- font=dict(family="Inter, system-ui, sans-serif", size=12),
422
- paper_bgcolor='white',
423
- plot_bgcolor='white'
424
- )
425
-
426
- # 3. Churn Risk Analysis
427
- if self.predictions is not None:
428
- fig3 = px.histogram(
429
- self.predictions,
430
- x='churn_probability',
431
- nbins=20,
432
- title='<b>Churn Probability Distribution</b>',
433
- labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
434
- color_discrete_sequence=[COLORS['primary']]
435
  )
436
- fig3.add_vline(x=0.5, line_dash="dash", line_color="#ef4444", line_width=2,
437
- annotation_text="High Risk Threshold", annotation_position="top")
438
- else:
439
- risk_data = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts().reset_index()
440
- risk_data.columns = ['Risk_Level', 'Count']
441
- colors_map = {'High': '#ef4444', 'Medium': '#f59e0b', 'Low': '#10b981'}
442
- fig3 = px.bar(
443
- risk_data,
444
- x='Risk_Level',
445
- y='Count',
446
- title='<b>Customer Churn Risk Distribution</b>',
447
- color='Risk_Level',
448
- color_discrete_map=colors_map
449
  )
450
-
451
- fig3.update_layout(
452
- height=450,
453
- showlegend=False,
454
- title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
455
- font=dict(family="Inter, system-ui, sans-serif", size=12),
456
- plot_bgcolor='white',
457
- paper_bgcolor='white'
458
- )
459
-
460
- # 4. Revenue Trends
461
- self.df['order_month'] = self.df['order_date'].dt.to_period('M')
462
- monthly_revenue = self.df.groupby('order_month')['amount'].sum().reset_index()
463
- monthly_revenue['order_month'] = monthly_revenue['order_month'].astype(str)
464
-
465
- fig4 = px.line(
466
- monthly_revenue,
467
- x='order_month',
468
- y='amount',
469
- title='<b>Monthly Revenue Trends</b>',
470
- labels={'amount': 'Revenue ($)', 'order_month': 'Month'},
471
- line_shape='spline'
472
- )
473
- fig4.update_traces(line_color=COLORS['primary'], line_width=4, mode='lines+markers')
474
- fig4.update_layout(
475
- height=450,
476
- title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
477
- font=dict(family="Inter, system-ui, sans-serif", size=12),
478
- plot_bgcolor='white',
479
- paper_bgcolor='white',
480
- xaxis_tickangle=-45,
481
- xaxis=dict(gridcolor='#f1f5f9'),
482
- yaxis=dict(gridcolor='#f1f5f9')
483
- )
484
-
485
- return fig1, fig2, fig3, fig4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
  def create_customer_table(self):
488
  """Create modern customer segmentation table"""
489
- if self.df is None:
490
  return None
491
 
492
- customer_summary = self.df.groupby('customer_id').agg({
493
- 'Segment': 'first',
494
- 'Churn_Risk': 'first',
495
- 'recency_days': 'first',
496
- 'frequency': 'first',
497
- 'monetary': 'first',
498
- 'amount': 'mean'
499
- }).reset_index()
500
-
501
- if self.predictions is not None:
502
- customer_summary = customer_summary.merge(
503
- self.predictions[['customer_id', 'churn_probability']],
504
- on='customer_id',
505
- how='left'
506
- )
507
- customer_summary['churn_probability'] = customer_summary['churn_probability'].fillna(0)
508
- else:
509
- customer_summary['churn_probability'] = 0.5
510
-
511
- customer_summary['monetary'] = customer_summary['monetary'].round(2)
512
- customer_summary['amount'] = customer_summary['amount'].round(2)
513
- customer_summary['churn_probability'] = (customer_summary['churn_probability'] * 100).round(1)
514
-
515
- customer_summary.columns = [
516
- 'Customer ID', 'Segment', 'Risk Level', 'Recency (Days)',
517
- 'Frequency', 'Total Spent ($)', 'Avg Order ($)', 'Churn Probability (%)'
518
- ]
519
-
520
- return customer_summary.head(50)
 
 
 
 
 
521
 
522
  def generate_pdf_report(self):
523
  """Generate comprehensive PDF report"""
524
- if self.df is None:
525
  return None
526
 
527
  try:
@@ -546,10 +624,10 @@ class B2BCustomerAnalytics:
546
 
547
  story.append(Paragraph("Executive Summary", styles['Heading2']))
548
 
549
- total_customers = self.df['customer_id'].nunique()
550
- total_revenue = self.df['amount'].sum()
551
- avg_order_value = self.df['amount'].mean()
552
- high_risk_customers = len(self.df[self.df['Churn_Risk'] == 'High']['customer_id'].unique())
553
 
554
  summary_text = f"""
555
  This comprehensive analysis examines {total_customers} B2B customers with total revenue of ${total_revenue:,.2f}.
@@ -567,8 +645,8 @@ class B2BCustomerAnalytics:
567
 
568
  story.append(Paragraph("Key Performance Indicators", styles['Heading2']))
569
 
570
- segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
571
- risk_dist = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts()
572
 
573
  metrics_data = [
574
  ['Metric', 'Value', 'Status'],
@@ -640,87 +718,91 @@ class B2BCustomerAnalytics:
640
 
641
  def get_customer_insights(self, customer_id):
642
  """Get detailed insights for a specific customer"""
643
- if self.df is None:
644
  return "No data available"
645
 
646
- customer_data = self.df[self.df['customer_id'] == customer_id]
647
- if customer_data.empty:
648
- return f"Customer {customer_id} not found"
649
-
650
- total_orders = len(customer_data)
651
- total_spent = customer_data['amount'].sum()
652
- avg_order_value = customer_data['amount'].mean()
653
- first_order = customer_data['order_date'].min()
654
- last_order = customer_data['order_date'].max()
655
- segment = customer_data['Segment'].iloc[0]
656
- risk_level = customer_data['Churn_Risk'].iloc[0]
657
- recency = customer_data['recency_days'].iloc[0]
658
-
659
- churn_prob = 0.5
660
- if self.predictions is not None:
661
- pred_data = self.predictions[self.predictions['customer_id'] == customer_id]
662
- if not pred_data.empty:
663
- churn_prob = pred_data['churn_probability'].iloc[0]
664
-
665
- insights_html = f"""
666
- <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); margin-bottom: 2rem;">
667
- <div style="text-align: center; margin-bottom: 2rem;">
668
- <div style="display: inline-block; padding: 1.5rem; background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); border-radius: 50%; margin-bottom: 1rem;">
669
- <span style="font-size: 2rem; color: white;">📊</span>
670
- </div>
671
- <h3 style="color: #1f2937; font-size: 1.75rem; font-weight: bold; margin-bottom: 0.5rem;">
672
- Customer Profile: {customer_id}
673
- </h3>
674
- <p style="color: #6b7280; font-size: 1.1rem;">Comprehensive Customer Intelligence Report</p>
675
- </div>
676
 
677
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1.5rem; margin-bottom: 2rem;">
678
- <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
679
- <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CUSTOMER SEGMENT</h4>
680
- <div style="font-size: 1.5rem; font-weight: bold;">{segment}</div>
681
- </div>
682
- <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
683
- <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CHURN RISK</h4>
684
- <div style="font-size: 1.5rem; font-weight: bold;">{risk_level}</div>
685
- </div>
686
- <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
687
- <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CHURN PROBABILITY</h4>
688
- <div style="font-size: 1.5rem; font-weight: bold;">{churn_prob:.1%}</div>
689
- </div>
690
- </div>
691
 
692
- <div style="background: #f8fafc; padding: 2rem; border-radius: 1rem; margin-bottom: 2rem;">
693
- <h4 style="color: #374151; font-weight: 700; margin-bottom: 1.5rem; font-size: 1.3rem;">Transaction Analytics</h4>
694
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 2rem;">
695
- <div>
696
- <div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Total Orders</div>
697
- <div style="font-size: 2rem; font-weight: bold; color: #1f2937;">{total_orders}</div>
 
 
 
 
 
698
  </div>
699
- <div>
700
- <div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Total Spent</div>
701
- <div style="font-size: 2rem; font-weight: bold; color: #1f2937;">${total_spent:,.2f}</div>
 
 
 
 
 
 
 
702
  </div>
703
- <div>
704
- <div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Avg Order Value</div>
705
- <div style="font-size: 2rem; font-weight: bold; color: #1f2937;">${avg_order_value:.2f}</div>
706
  </div>
707
- <div>
708
- <div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Days Since Last Order</div>
709
- <div style="font-size: 2rem; font-weight: bold; color: #1f2937;">{recency}</div>
710
  </div>
711
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
712
  </div>
 
713
 
714
- <div style="background: linear-gradient(135deg, #f0f9ff, #e0f2fe); border-left: 4px solid #3b82f6; padding: 1.5rem; border-radius: 0.5rem;">
715
- <h4 style="color: #1e40af; font-weight: 700; margin-bottom: 1rem; font-size: 1.2rem;">Strategic Recommendations</h4>
716
- <p style="color: #1f2937; margin: 0; font-size: 1rem; line-height: 1.6;">
717
- {self._get_customer_recommendations(segment, risk_level, churn_prob, recency)}
718
- </p>
719
- </div>
720
- </div>
721
- """
722
-
723
- return insights_html
724
 
725
  def _get_customer_recommendations(self, segment, risk_level, churn_prob, recency):
726
  """Generate personalized recommendations based on customer profile"""
 
34
  'indigo': '#6366f1'
35
  }
36
 
37
+ plt.style.use('default') # Changed from seaborn-v0_8-whitegrid for compatibility
38
  sns.set_palette("husl")
39
 
40
  class B2BCustomerAnalytics:
41
  def __init__(self):
42
  self.df = None
43
+ self.processed_df = None
44
  self.model = None
45
  self.feature_importance = None
46
  self.predictions = None
 
51
  if file is None:
52
  return "Please upload a CSV file", None, None, None
53
 
54
+ # Load raw data
55
  self.df = pd.read_csv(file.name)
56
 
57
+ # Check for required columns - be flexible with column names
58
  required_columns = ['customer_id', 'order_date', 'amount']
59
+ df_columns_lower = [col.lower() for col in self.df.columns]
 
 
60
 
61
+ # Map common variations
62
+ column_mapping = {}
63
+ for req_col in required_columns:
64
+ found = False
65
+ for df_col in self.df.columns:
66
+ if req_col in df_col.lower() or df_col.lower() in req_col:
67
+ column_mapping[req_col] = df_col
68
+ found = True
69
+ break
70
+ if not found:
71
+ return f"Missing required column: {req_col}. Available columns: {list(self.df.columns)}", None, None, None
72
+
73
+ # Rename columns to standard names
74
+ self.df = self.df.rename(columns=column_mapping)
75
+
76
+ # Convert order_date to datetime
77
  self.df['order_date'] = pd.to_datetime(self.df['order_date'])
78
 
79
+ # Calculate RFM metrics
80
+ self.processed_df = self.calculate_rfm_metrics(self.df.copy())
81
 
82
+ # Perform customer segmentation
83
+ self.processed_df = self.perform_customer_segmentation(self.processed_df)
84
 
85
+ # Generate summary
86
  summary_html, kpi_cards = self.generate_summary_dashboard()
87
 
88
+ return "Data loaded successfully!", summary_html, self.processed_df.head(20), kpi_cards
89
 
90
  except Exception as e:
91
  return f"Error loading data: {str(e)}", None, None, None
92
 
93
  def calculate_rfm_metrics(self, df):
94
  """Calculate RFM metrics from transaction data"""
95
+ try:
96
+ current_date = df['order_date'].max() + timedelta(days=1)
97
+
98
+ # Calculate customer-level metrics
99
+ customer_metrics = df.groupby('customer_id').agg({
100
+ 'order_date': ['max', 'count'],
101
+ 'amount': ['sum', 'mean']
102
+ }).round(2)
103
+
104
+ # Flatten column names
105
+ customer_metrics.columns = ['last_order_date', 'frequency', 'monetary', 'avg_order_value']
106
+ customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
107
+
108
+ # Merge back with original data
109
+ df_with_rfm = df.merge(
110
+ customer_metrics[['recency_days', 'frequency', 'monetary']],
111
+ left_on='customer_id',
112
+ right_index=True,
113
+ how='left'
114
+ )
115
+
116
+ return df_with_rfm
117
+
118
+ except Exception as e:
119
+ print(f"Error in calculate_rfm_metrics: {e}")
120
+ return df
121
 
122
  def perform_customer_segmentation(self, df):
123
  """Perform customer segmentation based on RFM analysis"""
124
+ try:
125
+ # Get unique customer data
126
+ customer_df = df.groupby('customer_id').agg({
127
+ 'recency_days': 'first',
128
+ 'frequency': 'first',
129
+ 'monetary': 'first'
130
+ }).reset_index()
131
+
132
+ # Calculate RFM scores using quantiles
133
+ try:
134
+ customer_df['R_Score'] = pd.qcut(customer_df['recency_days'].rank(method='first'), 5, labels=[5,4,3,2,1], duplicates='drop')
135
+ customer_df['F_Score'] = pd.qcut(customer_df['frequency'].rank(method='first'), 5, labels=[1,2,3,4,5], duplicates='drop')
136
+ customer_df['M_Score'] = pd.qcut(customer_df['monetary'].rank(method='first'), 5, labels=[1,2,3,4,5], duplicates='drop')
137
+ except ValueError:
138
+ # If qcut fails due to duplicate values, use simple binning
139
+ customer_df['R_Score'] = pd.cut(customer_df['recency_days'], 5, labels=[5,4,3,2,1])
140
+ customer_df['F_Score'] = pd.cut(customer_df['frequency'], 5, labels=[1,2,3,4,5])
141
+ customer_df['M_Score'] = pd.cut(customer_df['monetary'], 5, labels=[1,2,3,4,5])
142
+
143
+ customer_df['R_Score'] = customer_df['R_Score'].astype(int)
144
+ customer_df['F_Score'] = customer_df['F_Score'].astype(int)
145
+ customer_df['M_Score'] = customer_df['M_Score'].astype(int)
146
+
147
+ def segment_customers(row):
148
+ if row['R_Score'] >= 4 and row['F_Score'] >= 4 and row['M_Score'] >= 4:
149
+ return 'Champions'
150
+ elif row['R_Score'] >= 3 and row['F_Score'] >= 3 and row['M_Score'] >= 3:
151
+ return 'Loyal Customers'
152
+ elif row['R_Score'] >= 3 and row['F_Score'] >= 2:
153
+ return 'Potential Loyalists'
154
+ elif row['R_Score'] >= 4 and row['F_Score'] <= 2:
155
+ return 'New Customers'
156
+ elif row['R_Score'] <= 2 and row['F_Score'] >= 3:
157
+ return 'At Risk'
158
+ elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] >= 3:
159
+ return 'Cannot Lose Them'
160
+ elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] <= 2:
161
+ return 'Lost Customers'
162
+ else:
163
+ return 'Others'
164
+
165
+ customer_df['Segment'] = customer_df.apply(segment_customers, axis=1)
166
+
167
+ customer_df['Churn_Risk'] = customer_df.apply(lambda x:
168
+ 'High' if x['Segment'] in ['Lost Customers', 'At Risk'] else
169
+ 'Medium' if x['Segment'] in ['Others', 'Cannot Lose Them'] else 'Low', axis=1)
170
+
171
+ # Merge segmentation data back
172
+ segment_data = customer_df[['customer_id', 'Segment', 'Churn_Risk', 'R_Score', 'F_Score', 'M_Score']]
173
+ df_with_segments = df.merge(segment_data, on='customer_id', how='left')
174
+
175
+ return df_with_segments
176
+
177
+ except Exception as e:
178
+ print(f"Error in perform_customer_segmentation: {e}")
179
+ # Return original df with dummy segments if segmentation fails
180
+ df['Segment'] = 'Others'
181
+ df['Churn_Risk'] = 'Medium'
182
+ df['R_Score'] = 3
183
+ df['F_Score'] = 3
184
+ df['M_Score'] = 3
185
+ return df
186
 
187
  def generate_summary_dashboard(self):
188
  """Generate modern dashboard summary with KPI cards"""
189
+ if self.processed_df is None:
190
  return "No data loaded", ""
191
 
192
+ try:
193
+ total_customers = self.processed_df['customer_id'].nunique()
194
+ total_orders = len(self.processed_df)
195
+ total_revenue = self.processed_df['amount'].sum()
196
+ avg_order_value = self.processed_df['amount'].mean()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
+ # Get segment and risk distributions
199
+ segment_dist = self.processed_df.groupby('customer_id')['Segment'].first().value_counts()
200
+ risk_dist = self.processed_df.groupby('customer_id')['Churn_Risk'].first().value_counts()
 
 
 
 
 
201
 
202
+ # Create modern horizontal dashboard
203
+ summary_html = f"""
204
+ <div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); padding: 2rem; border-radius: 1rem; color: white; margin-bottom: 2rem; text-align: center;">
205
+ <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">
206
+ B2B Customer Analytics Platform
207
+ </h1>
208
+ <p style="font-size: 1.2rem; opacity: 0.9;">
209
+ Enterprise Customer Health Monitoring & Churn Prediction System
210
+ </p>
211
  </div>
212
 
213
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1.5rem; margin-bottom: 3rem;">
214
+ <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #3b82f6;">
215
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
216
+ <div style="padding: 0.75rem; background: #dbeafe; border-radius: 0.5rem; color: #1d4ed8;">📊</div>
217
+ <span style="font-size: 2rem; font-weight: bold; color: #3b82f6;">{total_customers:,}</span>
218
+ </div>
219
+ <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Total Customers</h3>
220
+ <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Active enterprise clients</p>
221
  </div>
222
+
223
+ <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #10b981;">
224
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
225
+ <div style="padding: 0.75rem; background: #d1fae5; border-radius: 0.5rem; color: #047857;">💰</div>
226
+ <span style="font-size: 2rem; font-weight: bold; color: #10b981;">${(total_revenue/1000000):.1f}M</span>
227
+ </div>
228
+ <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Total Revenue</h3>
229
+ <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Contract value sum</p>
230
  </div>
231
+
232
+ <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #8b5cf6;">
233
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
234
+ <div style="padding: 0.75rem; background: #ede9fe; border-radius: 0.5rem; color: #7c3aed;">📈</div>
235
+ <span style="font-size: 2rem; font-weight: bold; color: #8b5cf6;">${avg_order_value:.0f}</span>
236
+ </div>
237
+ <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Avg Order Value</h3>
238
+ <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Per order average</p>
239
+ </div>
240
+
241
+ <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #ef4444;">
242
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
243
+ <div style="padding: 0.75rem; background: #fee2e2; border-radius: 0.5rem; color: #dc2626;">🚨</div>
244
+ <span style="font-size: 2rem; font-weight: bold; color: #ef4444;">{risk_dist.get('High', 0)}</span>
245
+ </div>
246
+ <h3 style="color: #1f2937; font-weight: 600; margin: 0;">High Risk Clients</h3>
247
+ <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Need immediate attention</p>
248
+ </div>
249
+
250
+ <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #f59e0b;">
251
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
252
+ <div style="padding: 0.75rem; background: #fef3c7; border-radius: 0.5rem; color: #d97706;">🏆</div>
253
+ <span style="font-size: 2rem; font-weight: bold; color: #f59e0b;">{segment_dist.get('Champions', 0)}</span>
254
+ </div>
255
+ <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Champion Customers</h3>
256
+ <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Top tier clients</p>
257
+ </div>
258
+
259
+ <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #06b6d4;">
260
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
261
+ <div style="padding: 0.75rem; background: #cffafe; border-radius: 0.5rem; color: #0891b2;">✅</div>
262
+ <span style="font-size: 2rem; font-weight: bold; color: #06b6d4;">{risk_dist.get('Low', 0)}</span>
263
+ </div>
264
+ <h3 style="color: #1f2937; font-weight: 600; margin: 0;">Healthy Customers</h3>
265
+ <p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Low churn risk</p>
266
  </div>
 
 
267
  </div>
268
+ """
269
+
270
+ kpi_data = [
271
+ ["Total Customers", f"{total_customers:,}", "👥", "#3b82f6"],
272
+ ["Total Revenue", f"${total_revenue/1000000:.1f}M", "💰", "#10b981"],
273
+ ["Avg Order Value", f"${avg_order_value:.0f}", "📈", "#8b5cf6"],
274
+ ["High Risk Customers", f"{risk_dist.get('High', 0)}", "🚨", "#ef4444"],
275
+ ["Champion Customers", f"{segment_dist.get('Champions', 0)}", "🏆", "#f59e0b"],
276
+ ["Healthy Customers", f"{risk_dist.get('Low', 0)}", "", "#06b6d4"]
277
+ ]
278
+
279
+ return summary_html, kpi_data
280
+
281
+ except Exception as e:
282
+ return f"Error generating dashboard: {str(e)}", []
283
 
284
  def train_churn_model(self):
285
  """Train churn prediction model"""
286
+ if self.processed_df is None:
287
+ return "No data available. Please upload and process a CSV file first.", None
288
 
289
  try:
290
+ # Prepare customer-level features
291
+ customer_features = self.processed_df.groupby('customer_id').agg({
292
  'recency_days': 'first',
293
  'frequency': 'first',
294
  'monetary': 'first',
 
296
  'order_date': ['min', 'max']
297
  }).reset_index()
298
 
299
+ # Flatten column names
300
  customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
301
  'avg_amount', 'std_amount', 'min_amount', 'max_amount',
302
  'first_order', 'last_order']
303
 
304
+ # Handle missing values
305
  customer_features['std_amount'].fillna(0, inplace=True)
306
 
307
+ # Calculate additional features
308
  customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
309
  customer_features['customer_lifetime'].fillna(0, inplace=True)
310
 
311
+ # Create churn labels based on recency
312
+ customer_features['churn_label'] = (customer_features['recency_days'] > 90).astype(int)
 
 
 
313
 
314
+ # Select features for modeling
315
  feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
316
  'min_amount', 'max_amount', 'customer_lifetime']
317
 
318
  X = customer_features[feature_cols]
319
  y = customer_features['churn_label']
320
 
321
+ # Train-test split
322
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
323
 
324
+ # Train model
325
  self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
326
  self.model.fit(X_train, y_train)
327
 
328
+ # Make predictions
329
  y_pred = self.model.predict(X_test)
330
  y_pred_proba = self.model.predict_proba(X_test)[:, 1]
331
 
332
+ # Feature importance
333
  self.feature_importance = pd.DataFrame({
334
  'feature': feature_cols,
335
  'importance': self.model.feature_importances_
336
  }).sort_values('importance', ascending=False)
337
 
338
+ # Predict for all customers
339
  all_predictions = self.model.predict_proba(X)[:, 1]
340
  customer_features['churn_probability'] = all_predictions
341
  self.predictions = customer_features
 
397
  if self.feature_importance is None:
398
  return None
399
 
400
+ try:
401
+ fig = px.bar(
402
+ self.feature_importance.head(8),
403
+ x='importance',
404
+ y='feature',
405
+ orientation='h',
406
+ title='Feature Importance Analysis',
407
+ labels={'importance': 'Importance Score', 'feature': 'Features'},
408
+ color='importance',
409
+ color_continuous_scale='viridis'
410
+ )
411
+
412
+ fig.update_layout(
413
+ height=500,
414
+ showlegend=False,
415
+ plot_bgcolor='white',
416
+ paper_bgcolor='white',
417
+ title={
418
+ 'text': '<b>Feature Importance Analysis</b>',
419
+ 'x': 0.5,
420
+ 'xanchor': 'center',
421
+ 'font': {'size': 20, 'color': '#1f2937'}
422
+ },
423
+ font=dict(family="Inter, system-ui, sans-serif", size=12),
424
+ yaxis={'categoryorder': 'total ascending'},
425
+ xaxis=dict(gridcolor='#f1f5f9'),
426
+ yaxis_title=dict(font_size=14),
427
+ xaxis_title=dict(font_size=14)
428
+ )
429
+
430
+ return fig
431
+
432
+ except Exception as e:
433
+ print(f"Error creating performance chart: {e}")
434
+ return None
435
 
436
  def create_visualizations(self):
437
  """Create comprehensive modern visualizations"""
438
+ if self.processed_df is None:
439
  return None, None, None, None
440
 
441
+ try:
442
+ # 1. Customer Segment Distribution
443
+ segment_data = self.processed_df.groupby('customer_id')['Segment'].first().value_counts().reset_index()
444
+ segment_data.columns = ['Segment', 'Count']
445
+
446
+ fig1 = px.pie(
447
+ segment_data,
448
+ values='Count',
449
+ names='Segment',
450
+ title='<b>Customer Segment Distribution</b>',
451
+ hole=0.4,
452
+ color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  )
454
+ fig1.update_traces(textposition='inside', textinfo='percent+label', textfont_size=13)
455
+ fig1.update_layout(
456
+ height=450,
457
+ showlegend=True,
458
+ title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
459
+ font=dict(family="Inter, system-ui, sans-serif", size=12),
460
+ paper_bgcolor='white',
461
+ plot_bgcolor='white'
 
 
 
 
 
462
  )
463
+
464
+ # 2. RFM Analysis
465
+ customer_rfm = self.processed_df.groupby('customer_id').agg({
466
+ 'recency_days': 'first',
467
+ 'frequency': 'first',
468
+ 'monetary': 'first',
469
+ 'Segment': 'first'
470
+ }).reset_index()
471
+
472
+ fig2 = px.scatter(
473
+ customer_rfm,
474
+ x='recency_days',
475
+ y='frequency',
476
+ size='monetary',
477
+ color='Segment',
478
+ title='<b>RFM Customer Behavior Matrix</b>',
479
+ labels={
480
+ 'recency_days': 'Days Since Last Purchase',
481
+ 'frequency': 'Purchase Frequency',
482
+ 'monetary': 'Total Revenue'
483
+ },
484
+ color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6'],
485
+ size_max=60
486
+ )
487
+ fig2.update_layout(
488
+ height=500,
489
+ title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
490
+ font=dict(family="Inter, system-ui, sans-serif", size=12),
491
+ paper_bgcolor='white',
492
+ plot_bgcolor='white'
493
+ )
494
+
495
+ # 3. Churn Risk Analysis
496
+ if self.predictions is not None:
497
+ fig3 = px.histogram(
498
+ self.predictions,
499
+ x='churn_probability',
500
+ nbins=20,
501
+ title='<b>Churn Probability Distribution</b>',
502
+ labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
503
+ color_discrete_sequence=[COLORS['primary']]
504
+ )
505
+ fig3.add_vline(x=0.5, line_dash="dash", line_color="#ef4444", line_width=2,
506
+ annotation_text="High Risk Threshold", annotation_position="top")
507
+ else:
508
+ risk_data = self.processed_df.groupby('customer_id')['Churn_Risk'].first().value_counts().reset_index()
509
+ risk_data.columns = ['Risk_Level', 'Count']
510
+ colors_map = {'High': '#ef4444', 'Medium': '#f59e0b', 'Low': '#10b981'}
511
+ fig3 = px.bar(
512
+ risk_data,
513
+ x='Risk_Level',
514
+ y='Count',
515
+ title='<b>Customer Churn Risk Distribution</b>',
516
+ color='Risk_Level',
517
+ color_discrete_map=colors_map
518
+ )
519
+
520
+ fig3.update_layout(
521
+ height=450,
522
+ showlegend=False,
523
+ title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
524
+ font=dict(family="Inter, system-ui, sans-serif", size=12),
525
+ plot_bgcolor='white',
526
+ paper_bgcolor='white'
527
+ )
528
+
529
+ # 4. Revenue Trends
530
+ self.processed_df['order_month'] = self.processed_df['order_date'].dt.to_period('M')
531
+ monthly_revenue = self.processed_df.groupby('order_month')['amount'].sum().reset_index()
532
+ monthly_revenue['order_month'] = monthly_revenue['order_month'].astype(str)
533
+
534
+ fig4 = px.line(
535
+ monthly_revenue,
536
+ x='order_month',
537
+ y='amount',
538
+ title='<b>Monthly Revenue Trends</b>',
539
+ labels={'amount': 'Revenue ($)', 'order_month': 'Month'},
540
+ line_shape='spline'
541
+ )
542
+ fig4.update_traces(line_color=COLORS['primary'], line_width=4, mode='lines+markers')
543
+ fig4.update_layout(
544
+ height=450,
545
+ title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
546
+ font=dict(family="Inter, system-ui, sans-serif", size=12),
547
+ plot_bgcolor='white',
548
+ paper_bgcolor='white',
549
+ xaxis_tickangle=-45,
550
+ xaxis=dict(gridcolor='#f1f5f9'),
551
+ yaxis=dict(gridcolor='#f1f5f9')
552
+ )
553
+
554
+ return fig1, fig2, fig3, fig4
555
+
556
+ except Exception as e:
557
+ print(f"Error creating visualizations: {e}")
558
+ return None, None, None, None
559
 
560
  def create_customer_table(self):
561
  """Create modern customer segmentation table"""
562
+ if self.processed_df is None:
563
  return None
564
 
565
+ try:
566
+ customer_summary = self.processed_df.groupby('customer_id').agg({
567
+ 'Segment': 'first',
568
+ 'Churn_Risk': 'first',
569
+ 'recency_days': 'first',
570
+ 'frequency': 'first',
571
+ 'monetary': 'first',
572
+ 'amount': 'mean'
573
+ }).reset_index()
574
+
575
+ if self.predictions is not None:
576
+ customer_summary = customer_summary.merge(
577
+ self.predictions[['customer_id', 'churn_probability']],
578
+ on='customer_id',
579
+ how='left'
580
+ )
581
+ customer_summary['churn_probability'] = customer_summary['churn_probability'].fillna(0)
582
+ else:
583
+ customer_summary['churn_probability'] = 0.5
584
+
585
+ customer_summary['monetary'] = customer_summary['monetary'].round(2)
586
+ customer_summary['amount'] = customer_summary['amount'].round(2)
587
+ customer_summary['churn_probability'] = (customer_summary['churn_probability'] * 100).round(1)
588
+
589
+ customer_summary.columns = [
590
+ 'Customer ID', 'Segment', 'Risk Level', 'Recency (Days)',
591
+ 'Frequency', 'Total Spent ($)', 'Avg Order ($)', 'Churn Probability (%)'
592
+ ]
593
+
594
+ return customer_summary.head(50)
595
+
596
+ except Exception as e:
597
+ print(f"Error creating customer table: {e}")
598
+ return None
599
 
600
  def generate_pdf_report(self):
601
  """Generate comprehensive PDF report"""
602
+ if self.processed_df is None:
603
  return None
604
 
605
  try:
 
624
 
625
  story.append(Paragraph("Executive Summary", styles['Heading2']))
626
 
627
+ total_customers = self.processed_df['customer_id'].nunique()
628
+ total_revenue = self.processed_df['amount'].sum()
629
+ avg_order_value = self.processed_df['amount'].mean()
630
+ high_risk_customers = len(self.processed_df[self.processed_df['Churn_Risk'] == 'High']['customer_id'].unique())
631
 
632
  summary_text = f"""
633
  This comprehensive analysis examines {total_customers} B2B customers with total revenue of ${total_revenue:,.2f}.
 
645
 
646
  story.append(Paragraph("Key Performance Indicators", styles['Heading2']))
647
 
648
+ segment_dist = self.processed_df.groupby('customer_id')['Segment'].first().value_counts()
649
+ risk_dist = self.processed_df.groupby('customer_id')['Churn_Risk'].first().value_counts()
650
 
651
  metrics_data = [
652
  ['Metric', 'Value', 'Status'],
 
718
 
719
  def get_customer_insights(self, customer_id):
720
  """Get detailed insights for a specific customer"""
721
+ if self.processed_df is None:
722
  return "No data available"
723
 
724
+ try:
725
+ customer_data = self.processed_df[self.processed_df['customer_id'] == customer_id]
726
+ if customer_data.empty:
727
+ return f"Customer {customer_id} not found"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
 
729
+ total_orders = len(customer_data)
730
+ total_spent = customer_data['amount'].sum()
731
+ avg_order_value = customer_data['amount'].mean()
732
+ first_order = customer_data['order_date'].min()
733
+ last_order = customer_data['order_date'].max()
734
+ segment = customer_data['Segment'].iloc[0]
735
+ risk_level = customer_data['Churn_Risk'].iloc[0]
736
+ recency = customer_data['recency_days'].iloc[0]
 
 
 
 
 
 
737
 
738
+ churn_prob = 0.5
739
+ if self.predictions is not None:
740
+ pred_data = self.predictions[self.predictions['customer_id'] == customer_id]
741
+ if not pred_data.empty:
742
+ churn_prob = pred_data['churn_probability'].iloc[0]
743
+
744
+ insights_html = f"""
745
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); margin-bottom: 2rem;">
746
+ <div style="text-align: center; margin-bottom: 2rem;">
747
+ <div style="display: inline-block; padding: 1.5rem; background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); border-radius: 50%; margin-bottom: 1rem;">
748
+ <span style="font-size: 2rem; color: white;">📊</span>
749
  </div>
750
+ <h3 style="color: #1f2937; font-size: 1.75rem; font-weight: bold; margin-bottom: 0.5rem;">
751
+ Customer Profile: {customer_id}
752
+ </h3>
753
+ <p style="color: #6b7280; font-size: 1.1rem;">Comprehensive Customer Intelligence Report</p>
754
+ </div>
755
+
756
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1.5rem; margin-bottom: 2rem;">
757
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
758
+ <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CUSTOMER SEGMENT</h4>
759
+ <div style="font-size: 1.5rem; font-weight: bold;">{segment}</div>
760
  </div>
761
+ <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
762
+ <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CHURN RISK</h4>
763
+ <div style="font-size: 1.5rem; font-weight: bold;">{risk_level}</div>
764
  </div>
765
+ <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
766
+ <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CHURN PROBABILITY</h4>
767
+ <div style="font-size: 1.5rem; font-weight: bold;">{churn_prob:.1%}</div>
768
  </div>
769
  </div>
770
+
771
+ <div style="background: #f8fafc; padding: 2rem; border-radius: 1rem; margin-bottom: 2rem;">
772
+ <h4 style="color: #374151; font-weight: 700; margin-bottom: 1.5rem; font-size: 1.3rem;">Transaction Analytics</h4>
773
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 2rem;">
774
+ <div>
775
+ <div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Total Orders</div>
776
+ <div style="font-size: 2rem; font-weight: bold; color: #1f2937;">{total_orders}</div>
777
+ </div>
778
+ <div>
779
+ <div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Total Spent</div>
780
+ <div style="font-size: 2rem; font-weight: bold; color: #1f2937;">${total_spent:,.2f}</div>
781
+ </div>
782
+ <div>
783
+ <div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Avg Order Value</div>
784
+ <div style="font-size: 2rem; font-weight: bold; color: #1f2937;">${avg_order_value:.2f}</div>
785
+ </div>
786
+ <div>
787
+ <div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Days Since Last Order</div>
788
+ <div style="font-size: 2rem; font-weight: bold; color: #1f2937;">{recency}</div>
789
+ </div>
790
+ </div>
791
+ </div>
792
+
793
+ <div style="background: linear-gradient(135deg, #f0f9ff, #e0f2fe); border-left: 4px solid #3b82f6; padding: 1.5rem; border-radius: 0.5rem;">
794
+ <h4 style="color: #1e40af; font-weight: 700; margin-bottom: 1rem; font-size: 1.2rem;">Strategic Recommendations</h4>
795
+ <p style="color: #1f2937; margin: 0; font-size: 1rem; line-height: 1.6;">
796
+ {self._get_customer_recommendations(segment, risk_level, churn_prob, recency)}
797
+ </p>
798
+ </div>
799
  </div>
800
+ """
801
 
802
+ return insights_html
803
+
804
+ except Exception as e:
805
+ return f"Error getting customer insights: {str(e)}"
 
 
 
 
 
 
806
 
807
  def _get_customer_recommendations(self, segment, risk_level, churn_prob, recency):
808
  """Generate personalized recommendations based on customer profile"""