entropy25 commited on
Commit
67a4318
·
verified ·
1 Parent(s): 0d5c5e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -680
app.py CHANGED
@@ -22,22 +22,18 @@ import base64
22
  import warnings
23
  warnings.filterwarnings('ignore')
24
 
25
- # Set modern color palette
26
  COLORS = {
27
  'primary': '#6366f1',
28
  'success': '#10b981',
29
  'warning': '#f59e0b',
30
  'danger': '#ef4444',
31
  'purple': '#8b5cf6',
32
- 'pink': '#ec4899',
33
  'blue': '#3b82f6',
34
- 'indigo': '#6366f1'
35
  }
36
 
37
- # Set plotting style for modern look
38
- plt.style.use('seaborn-v0_8-whitegrid')
39
- sns.set_palette("husl")
40
-
41
  class B2BCustomerAnalytics:
42
  def __init__(self):
43
  self.df = None
@@ -51,10 +47,9 @@ class B2BCustomerAnalytics:
51
  if file is None:
52
  return "Please upload a CSV file", None, None, None
53
 
54
- # Read the CSV file
55
  self.df = pd.read_csv(file.name)
56
 
57
- # Basic data validation
58
  required_columns = ['customer_id', 'order_date', 'amount']
59
  missing_cols = [col for col in required_columns if col not in self.df.columns]
60
  if missing_cols:
@@ -64,16 +59,16 @@ class B2BCustomerAnalytics:
64
  self.df['order_date'] = pd.to_datetime(self.df['order_date'])
65
 
66
  # Calculate RFM metrics if not present
67
- if 'recency_days' not in self.df.columns or 'frequency' not in self.df.columns or 'monetary' not in self.df.columns:
68
  self.df = self.calculate_rfm_metrics(self.df)
69
 
70
  # Customer segmentation
71
  self.df = self.perform_customer_segmentation(self.df)
72
 
73
- # Generate summary and KPIs
74
- summary_html, kpi_cards = self.generate_summary_dashboard()
75
 
76
- return "Data loaded successfully!", summary_html, self.df.head(20), kpi_cards
77
 
78
  except Exception as e:
79
  return f"Error loading data: {str(e)}", None, None, None
@@ -82,7 +77,6 @@ class B2BCustomerAnalytics:
82
  """Calculate RFM metrics from transaction data"""
83
  current_date = df['order_date'].max() + timedelta(days=1)
84
 
85
- # Group by customer
86
  customer_metrics = df.groupby('customer_id').agg({
87
  'order_date': ['max', 'count'],
88
  'amount': ['sum', 'mean']
@@ -91,7 +85,6 @@ class B2BCustomerAnalytics:
91
  customer_metrics.columns = ['last_order_date', 'frequency', 'monetary', 'avg_order_value']
92
  customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
93
 
94
- # Merge back with original data
95
  df_with_rfm = df.merge(customer_metrics[['recency_days', 'frequency', 'monetary']],
96
  left_on='customer_id', right_index=True, how='left')
97
 
@@ -110,12 +103,10 @@ class B2BCustomerAnalytics:
110
  customer_df['F_Score'] = pd.qcut(customer_df['frequency'].rank(method='first'), 5, labels=[1,2,3,4,5])
111
  customer_df['M_Score'] = pd.qcut(customer_df['monetary'].rank(method='first'), 5, labels=[1,2,3,4,5])
112
 
113
- # Convert to numeric
114
  customer_df['R_Score'] = customer_df['R_Score'].astype(int)
115
  customer_df['F_Score'] = customer_df['F_Score'].astype(int)
116
  customer_df['M_Score'] = customer_df['M_Score'].astype(int)
117
 
118
- # Create segments
119
  def segment_customers(row):
120
  if row['R_Score'] >= 4 and row['F_Score'] >= 4 and row['M_Score'] >= 4:
121
  return 'Champions'
@@ -135,83 +126,132 @@ class B2BCustomerAnalytics:
135
  return 'Others'
136
 
137
  customer_df['Segment'] = customer_df.apply(segment_customers, axis=1)
138
-
139
- # Calculate churn risk
140
  customer_df['Churn_Risk'] = customer_df.apply(lambda x:
141
  'High' if x['Segment'] in ['Lost Customers', 'At Risk'] else
142
  'Medium' if x['Segment'] in ['Others', 'Cannot Lose Them'] else 'Low', axis=1)
143
 
144
- # Merge segments back to original data
145
  segment_data = customer_df[['customer_id', 'Segment', 'Churn_Risk', 'R_Score', 'F_Score', 'M_Score']]
146
  df_with_segments = df.merge(segment_data, on='customer_id', how='left')
147
 
148
  return df_with_segments
149
 
150
- def generate_summary_dashboard(self):
151
- """Generate modern dashboard summary with KPI cards"""
152
  if self.df is None:
153
  return "No data loaded", ""
154
 
 
155
  total_customers = self.df['customer_id'].nunique()
156
  total_orders = len(self.df)
157
  total_revenue = self.df['amount'].sum()
158
  avg_order_value = self.df['amount'].mean()
159
 
160
- # Segment and risk distributions
161
  segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
162
  risk_dist = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts()
163
 
164
- # Create modern HTML summary
165
- summary_html = f"""
166
- <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 2rem; border-radius: 1rem; color: white; margin-bottom: 2rem;">
167
- <h2 style="font-size: 2rem; font-weight: bold; margin-bottom: 0.5rem; text-align: center;">
168
- 🏢 B2B Customer Analytics Dashboard
169
- </h2>
170
- <p style="text-align: center; font-size: 1.1rem; opacity: 0.9;">
 
 
 
 
171
  Enterprise Customer Health Monitoring & Churn Prediction System
172
  </p>
173
  </div>
174
 
175
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 1.5rem; margin-bottom: 2rem;">
176
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #3b82f6;">
177
- <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 1rem;">📊 Data Overview</h3>
178
- <p><strong>Total Customers:</strong> {total_customers:,}</p>
179
- <p><strong>Total Orders:</strong> {total_orders:,}</p>
180
- <p><strong>Total Revenue:</strong> ${total_revenue:,.2f}</p>
181
- <p><strong>Avg Order Value:</strong> ${avg_order_value:.2f}</p>
 
 
 
 
182
  </div>
183
 
184
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #10b981;">
185
- <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 1rem;">🎯 Customer Segments</h3>
186
- {''.join([f'<p><strong>{segment}:</strong> {count}</p>' for segment, count in segment_dist.items()])}
 
 
 
 
 
 
187
  </div>
188
 
189
- <div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #ef4444;">
190
- <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 1rem;">⚠️ Churn Risk Analysis</h3>
191
- {''.join([f'<p><strong>{risk} Risk:</strong> {count} customers</p>' for risk, count in risk_dist.items()])}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  </div>
193
  </div>
194
  """
195
 
196
- # Create KPI cards data
197
- kpi_data = [
198
- ["Total Customers", f"{total_customers:,}", "👥", "#3b82f6"],
199
- ["Total Revenue", f"${total_revenue/1000000:.1f}M", "💰", "#10b981"],
200
- ["Avg Order Value", f"${avg_order_value:.0f}", "📈", "#8b5cf6"],
201
- ["High Risk Customers", f"{risk_dist.get('High', 0)}", "🚨", "#ef4444"],
202
- ["Champion Customers", f"{segment_dist.get('Champions', 0)}", "🏆", "#f59e0b"],
203
- ["Healthy Customers", f"{risk_dist.get('Low', 0)}", "✅", "#06b6d4"]
204
  ]
205
 
206
- return summary_html, kpi_data
207
 
208
  def train_churn_model(self):
209
- """Train churn prediction model"""
210
  if self.df is None:
211
  return "No data available. Please upload a CSV file first.", None
212
 
213
  try:
214
- # Prepare data for modeling
215
  customer_features = self.df.groupby('customer_id').agg({
216
  'recency_days': 'first',
217
  'frequency': 'first',
@@ -220,94 +260,84 @@ class B2BCustomerAnalytics:
220
  'order_date': ['min', 'max']
221
  }).reset_index()
222
 
223
- # Flatten column names
224
  customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
225
  'avg_amount', 'std_amount', 'min_amount', 'max_amount',
226
  'first_order', 'last_order']
227
 
228
- # Fill NaN values
229
  customer_features['std_amount'].fillna(0, inplace=True)
230
-
231
- # Calculate additional features
232
  customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
233
  customer_features['customer_lifetime'].fillna(0, inplace=True)
234
 
235
- # Create churn labels (if not present)
236
- if 'churn_label' not in self.df.columns:
237
- customer_features['churn_label'] = (customer_features['recency_days'] > 90).astype(int)
238
- else:
239
- churn_labels = self.df.groupby('customer_id')['churn_label'].first().reset_index()
240
- customer_features = customer_features.merge(churn_labels, on='customer_id')
241
 
242
- # Select features for modeling
243
  feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
244
  'min_amount', 'max_amount', 'customer_lifetime']
245
 
246
  X = customer_features[feature_cols]
247
  y = customer_features['churn_label']
248
 
249
- # Split data
250
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
251
 
252
- # Train XGBoost model
253
  self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
254
  self.model.fit(X_train, y_train)
255
 
256
- # Make predictions
257
  y_pred = self.model.predict(X_test)
258
- y_pred_proba = self.model.predict_proba(X_test)[:, 1]
259
 
260
- # Calculate feature importance
261
  self.feature_importance = pd.DataFrame({
262
  'feature': feature_cols,
263
  'importance': self.model.feature_importances_
264
  }).sort_values('importance', ascending=False)
265
 
266
- # Generate predictions for all customers
267
  all_predictions = self.model.predict_proba(X)[:, 1]
268
  customer_features['churn_probability'] = all_predictions
269
  self.predictions = customer_features
270
 
271
- # Model performance
272
- accuracy = accuracy_score(y_test, y_pred)
273
-
274
- # Create modern results display
275
  results_html = f"""
276
- <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border: 1px solid #e5e7eb;">
277
  <div style="text-align: center; margin-bottom: 2rem;">
278
- <h3 style="font-size: 1.5rem; font-weight: bold; color: #1f2937; margin-bottom: 0.5rem;">
279
- 🤖 Model Training Completed
 
 
 
280
  </h3>
281
- <p style="color: #6b7280;">XGBoost Classifier with Advanced Feature Engineering</p>
282
  </div>
283
 
284
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
285
- <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 1rem; border-radius: 0.5rem; text-align: center; color: white;">
286
- <div style="font-size: 1.5rem; font-weight: bold;">{accuracy:.1%}</div>
287
- <div style="font-size: 0.9rem; opacity: 0.9;">Model Accuracy</div>
288
  </div>
289
- <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 1rem; border-radius: 0.5rem; text-align: center; color: white;">
290
- <div style="font-size: 1.5rem; font-weight: bold;">{len(feature_cols)}</div>
291
- <div style="font-size: 0.9rem; opacity: 0.9;">Features Used</div>
292
  </div>
293
- <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 1rem; border-radius: 0.5rem; text-align: center; color: white;">
294
- <div style="font-size: 1.5rem; font-weight: bold;">{len(X_train)}</div>
295
- <div style="font-size: 0.9rem; opacity: 0.9;">Training Samples</div>
296
  </div>
297
- <div style="background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%); padding: 1rem; border-radius: 0.5rem; text-align: center; color: white;">
298
- <div style="font-size: 1.5rem; font-weight: bold;">{len(X_test)}</div>
299
- <div style="font-size: 0.9rem; opacity: 0.9;">Test Samples</div>
300
  </div>
301
  </div>
302
 
303
- <div style="background: #f8fafc; padding: 1.5rem; border-radius: 0.5rem;">
304
- <h4 style="font-weight: 600; color: #374151; margin-bottom: 1rem;">🔍 Top Feature Importance</h4>
305
- <div style="space-y: 0.5rem;">
306
- {''.join([f'''<div style="display: flex; justify-content: space-between; align-items: center; padding: 0.5rem 0; border-bottom: 1px solid #e5e7eb;">
307
- <span style="font-weight: 500; color: #374151;">{row['feature'].replace('_', ' ').title()}</span>
308
- <span style="background: #3b82f6; color: white; padding: 0.25rem 0.75rem; border-radius: 9999px; font-size: 0.875rem;">
309
- {row['importance']:.3f}
310
- </span>
 
 
 
 
 
311
  </div>''' for _, row in self.feature_importance.head(5).iterrows()])}
312
  </div>
313
  </div>
@@ -320,7 +350,7 @@ class B2BCustomerAnalytics:
320
  return f"Error training model: {str(e)}", None
321
 
322
  def create_model_performance_chart(self):
323
- """Create model performance visualization"""
324
  if self.feature_importance is None:
325
  return None
326
 
@@ -329,34 +359,36 @@ class B2BCustomerAnalytics:
329
  x='importance',
330
  y='feature',
331
  orientation='h',
332
- title='Feature Importance - XGBoost Model',
333
  labels={'importance': 'Importance Score', 'feature': 'Features'},
334
  color='importance',
335
- color_continuous_scale='viridis'
336
  )
337
 
338
  fig.update_layout(
339
  height=400,
340
  showlegend=False,
341
  plot_bgcolor='white',
 
342
  title={
343
- 'text': 'Feature Importance - XGBoost Model',
344
  'x': 0.5,
345
  'xanchor': 'center',
346
- 'font': {'size': 18, 'color': '#1f2937'}
347
  },
348
- font=dict(family="Inter, sans-serif"),
349
- yaxis={'categoryorder': 'total ascending'}
 
350
  )
351
 
352
  return fig
353
 
354
  def create_visualizations(self):
355
- """Create comprehensive modern visualizations"""
356
  if self.df is None:
357
  return None, None, None, None
358
 
359
- # 1. Customer Segment Distribution (Donut Chart)
360
  segment_data = self.df.groupby('customer_id')['Segment'].first().value_counts().reset_index()
361
  segment_data.columns = ['Segment', 'Count']
362
 
@@ -368,15 +400,22 @@ class B2BCustomerAnalytics:
368
  hole=0.4,
369
  color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899']
370
  )
371
- fig1.update_traces(textposition='inside', textinfo='percent+label', textfont_size=12)
 
 
 
 
 
372
  fig1.update_layout(
373
  height=400,
374
  showlegend=True,
375
- title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#1f2937'}},
376
- font=dict(family="Inter, sans-serif")
 
 
377
  )
378
 
379
- # 2. RFM Analysis (3D Scatter)
380
  customer_rfm = self.df.groupby('customer_id').agg({
381
  'recency_days': 'first',
382
  'frequency': 'first',
@@ -384,12 +423,12 @@ class B2BCustomerAnalytics:
384
  'Segment': 'first'
385
  }).reset_index()
386
 
387
- fig2 = px.scatter_3d(
388
  customer_rfm,
389
  x='recency_days',
390
  y='frequency',
391
- z='monetary',
392
- color='Segment',
393
  title='RFM Analysis - Customer Behavior Matrix',
394
  labels={
395
  'recency_days': 'Recency (Days)',
@@ -399,9 +438,11 @@ class B2BCustomerAnalytics:
399
  color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6']
400
  )
401
  fig2.update_layout(
402
- height=500,
403
- title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#1f2937'}},
404
- font=dict(family="Inter, sans-serif")
 
 
405
  )
406
 
407
  # 3. Churn Risk Analysis
@@ -412,9 +453,9 @@ class B2BCustomerAnalytics:
412
  nbins=20,
413
  title='Churn Probability Distribution',
414
  labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
415
- color_discrete_sequence=[COLORS['primary']]
416
  )
417
- fig3.add_vline(x=0.5, line_dash="dash", line_color="red", annotation_text="High Risk Threshold")
418
  else:
419
  risk_data = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts().reset_index()
420
  risk_data.columns = ['Risk_Level', 'Count']
@@ -431,8 +472,9 @@ class B2BCustomerAnalytics:
431
  fig3.update_layout(
432
  height=400,
433
  showlegend=False,
434
- title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#1f2937'}},
435
- font=dict(family="Inter, sans-serif"),
 
436
  plot_bgcolor='white'
437
  )
438
 
@@ -449,566 +491,14 @@ class B2BCustomerAnalytics:
449
  labels={'amount': 'Revenue ($)', 'order_month': 'Month'},
450
  line_shape='spline'
451
  )
452
- fig4.update_traces(line_color=COLORS['primary'], line_width=3)
453
  fig4.update_layout(
454
  height=400,
455
- title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#1f2937'}},
456
- font=dict(family="Inter, sans-serif"),
 
457
  plot_bgcolor='white',
458
  xaxis_tickangle=-45
459
  )
460
 
461
- return fig1, fig2, fig3, fig4
462
-
463
- def create_customer_table(self):
464
- """Create modern customer segmentation table"""
465
- if self.df is None:
466
- return None
467
-
468
- # Aggregate customer data for table
469
- customer_summary = self.df.groupby('customer_id').agg({
470
- 'Segment': 'first',
471
- 'Churn_Risk': 'first',
472
- 'recency_days': 'first',
473
- 'frequency': 'first',
474
- 'monetary': 'first',
475
- 'amount': 'mean'
476
- }).reset_index()
477
-
478
- # Add churn probability if available
479
- if self.predictions is not None:
480
- customer_summary = customer_summary.merge(
481
- self.predictions[['customer_id', 'churn_probability']],
482
- on='customer_id',
483
- how='left'
484
- )
485
- customer_summary['churn_probability'] = customer_summary['churn_probability'].fillna(0)
486
- else:
487
- customer_summary['churn_probability'] = 0.5 # Default value
488
-
489
- # Format for display
490
- customer_summary['monetary'] = customer_summary['monetary'].round(2)
491
- customer_summary['amount'] = customer_summary['amount'].round(2)
492
- customer_summary['churn_probability'] = (customer_summary['churn_probability'] * 100).round(1)
493
-
494
- # Rename columns for better display
495
- customer_summary.columns = [
496
- 'Customer ID', 'Segment', 'Risk Level', 'Recency (Days)',
497
- 'Frequency', 'Total Spent ($)', 'Avg Order ($)', 'Churn Probability (%)'
498
- ]
499
-
500
- return customer_summary.head(50) # Show top 50 customers
501
-
502
- def generate_pdf_report(self):
503
- """Generate comprehensive PDF report"""
504
- if self.df is None:
505
- return None
506
-
507
- try:
508
- buffer = io.BytesIO()
509
- doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72,
510
- topMargin=72, bottomMargin=18)
511
-
512
- styles = getSampleStyleSheet()
513
- title_style = ParagraphStyle(
514
- 'CustomTitle',
515
- parent=styles['Heading1'],
516
- fontSize=24,
517
- spaceAfter=30,
518
- textColor=colors.HexColor('#6366f1'),
519
- alignment=1
520
- )
521
-
522
- story = []
523
-
524
- # Title
525
- story.append(Paragraph("B2B Customer Analytics Report", title_style))
526
- story.append(Spacer(1, 20))
527
-
528
- # Executive Summary
529
- story.append(Paragraph("Executive Summary", styles['Heading2']))
530
-
531
- total_customers = self.df['customer_id'].nunique()
532
- total_revenue = self.df['amount'].sum()
533
- avg_order_value = self.df['amount'].mean()
534
- high_risk_customers = len(self.df[self.df['Churn_Risk'] == 'High']['customer_id'].unique())
535
-
536
- summary_text = f"""
537
- This comprehensive analysis examines {total_customers} B2B customers with total revenue of ${total_revenue:,.2f}.
538
- The average order value stands at ${avg_order_value:.2f}, indicating healthy transaction volumes.
539
-
540
- Critical findings reveal {high_risk_customers} customers at high risk of churning, representing significant revenue exposure.
541
- Our machine learning model achieved 78% accuracy in predicting customer churn, enabling proactive retention strategies.
542
-
543
- The customer segmentation analysis identifies distinct behavioral patterns, with Champions showing the highest lifetime value
544
- and lowest churn risk, while At Risk customers require immediate intervention to prevent revenue loss.
545
- """
546
-
547
- story.append(Paragraph(summary_text, styles['Normal']))
548
- story.append(Spacer(1, 20))
549
-
550
- # Key Metrics
551
- story.append(Paragraph("Key Performance Indicators", styles['Heading2']))
552
-
553
- segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
554
- risk_dist = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts()
555
-
556
- metrics_data = [
557
- ['Metric', 'Value', 'Status'],
558
- ['Total Customers', f"{total_customers:,}", 'Baseline'],
559
- ['Total Revenue', f"${total_revenue:,.2f}", 'Strong'],
560
- ['Average Order Value', f"${avg_order_value:.2f}", 'Healthy'],
561
- ['Champions', f"{segment_dist.get('Champions', 0)}", 'Retain'],
562
- ['At Risk Customers', f"{segment_dist.get('At Risk', 0)}", 'Action Required'],
563
- ['High Risk Churn', f"{risk_dist.get('High', 0)}", 'Critical'],
564
- ['Low Risk Churn', f"{risk_dist.get('Low', 0)}", 'Stable']
565
- ]
566
-
567
-
568
- # Continue from where the code was cut off in generate_pdf_report method
569
-
570
- metrics_table.setStyle(TableStyle([
571
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#6366f1')),
572
- ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
573
- ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
574
- ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
575
- ('FONTSIZE', (0, 0), (-1, 0), 12),
576
- ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
577
- ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
578
- ('GRID', (0, 0), (-1, -1), 1, colors.black),
579
- ('FONTSIZE', (0, 1), (-1, -1), 10),
580
- ('VALIGN', (0, 0), (-1, -1), 'MIDDLE')
581
- ]))
582
-
583
- story.append(metrics_table)
584
- story.append(Spacer(1, 20))
585
-
586
- # Customer Segmentation Analysis
587
- story.append(Paragraph("Customer Segmentation Analysis", styles['Heading2']))
588
-
589
- segmentation_text = """
590
- Our RFM (Recency, Frequency, Monetary) analysis reveals distinct customer segments:
591
-
592
- • Champions: High-value, recent, and frequent customers - our most valuable segment
593
- • Loyal Customers: Consistent purchasers with good transaction history
594
- • Potential Loyalists: Recent customers with growth potential
595
- • At Risk: Previously good customers showing declining engagement
596
- • Cannot Lose Them: High-value customers with concerning recency patterns
597
- """
598
-
599
- story.append(Paragraph(segmentation_text, styles['Normal']))
600
- story.append(Spacer(1, 15))
601
-
602
- # Segment breakdown table
603
- segment_data = [['Segment', 'Count', 'Percentage', 'Avg Revenue', 'Strategy']]
604
- total_unique_customers = len(segment_dist)
605
-
606
- for segment, count in segment_dist.items():
607
- avg_revenue = self.df[self.df['Segment'] == segment]['amount'].mean()
608
- percentage = (count / total_unique_customers) * 100
609
-
610
- if segment == 'Champions':
611
- strategy = 'Reward & Retain'
612
- elif segment == 'Loyal Customers':
613
- strategy = 'Upsell & Cross-sell'
614
- elif segment == 'At Risk':
615
- strategy = 'Immediate Intervention'
616
- elif segment == 'Potential Loyalists':
617
- strategy = 'Nurture & Develop'
618
- else:
619
- strategy = 'Monitor & Engage'
620
-
621
- segment_data.append([
622
- segment,
623
- str(count),
624
- f"{percentage:.1f}%",
625
- f"${avg_revenue:.2f}",
626
- strategy
627
- ])
628
-
629
- segment_table = Table(segment_data, colWidths=[1.8*inch, 0.8*inch, 1*inch, 1*inch, 1.4*inch])
630
- segment_table.setStyle(TableStyle([
631
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#10b981')),
632
- ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
633
- ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
634
- ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
635
- ('FONTSIZE', (0, 0), (-1, 0), 10),
636
- ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
637
- ('BACKGROUND', (0, 1), (-1, -1), colors.lightblue),
638
- ('GRID', (0, 0), (-1, -1), 1, colors.black),
639
- ('FONTSIZE', (0, 1), (-1, -1), 9),
640
- ('VALIGN', (0, 0), (-1, -1), 'MIDDLE')
641
- ]))
642
-
643
- story.append(segment_table)
644
- story.append(PageBreak())
645
-
646
- # Churn Risk Analysis
647
- story.append(Paragraph("Churn Risk Assessment", styles['Heading2']))
648
-
649
- churn_text = f"""
650
- Machine Learning Model Performance:
651
- Our XGBoost classifier achieved high accuracy in predicting customer churn probability.
652
-
653
- Risk Distribution:
654
- • High Risk: {risk_dist.get('High', 0)} customers ({(risk_dist.get('High', 0)/total_unique_customers)*100:.1f}%)
655
- • Medium Risk: {risk_dist.get('Medium', 0)} customers ({(risk_dist.get('Medium', 0)/total_unique_customers)*100:.1f}%)
656
- • Low Risk: {risk_dist.get('Low', 0)} customers ({(risk_dist.get('Low', 0)/total_unique_customers)*100:.1f}%)
657
-
658
- Key Risk Factors:
659
- """
660
-
661
- story.append(Paragraph(churn_text, styles['Normal']))
662
-
663
- if self.feature_importance is not None:
664
- feature_text = "Top predictive features for churn:\n"
665
- for _, row in self.feature_importance.head(5).iterrows():
666
- feature_text += f"• {row['feature'].replace('_', ' ').title()}: {row['importance']:.3f}\n"
667
- story.append(Paragraph(feature_text, styles['Normal']))
668
-
669
- story.append(Spacer(1, 20))
670
-
671
- # Recommendations
672
- story.append(Paragraph("Strategic Recommendations", styles['Heading2']))
673
-
674
- recommendations_text = """
675
- Based on our comprehensive analysis, we recommend the following strategic actions:
676
-
677
- 1. IMMEDIATE ACTIONS (0-30 days):
678
- • Contact all high-risk customers personally
679
- • Offer retention incentives to at-risk segments
680
- • Implement automated early warning system
681
-
682
- 2. SHORT-TERM INITIATIVES (1-3 months):
683
- • Develop targeted marketing campaigns by segment
684
- • Launch loyalty program for Champions
685
- • Create win-back campaigns for lost customers
686
-
687
- 3. LONG-TERM STRATEGY (3-12 months):
688
- • Invest in customer success programs
689
- • Develop predictive analytics capabilities
690
- • Build comprehensive customer health scoring
691
- • Implement continuous model monitoring and improvement
692
-
693
- 4. TECHNOLOGY INVESTMENTS:
694
- • CRM integration for real-time scoring
695
- • Marketing automation platform
696
- • Customer success management tools
697
- • Advanced analytics infrastructure
698
- """
699
-
700
- story.append(Paragraph(recommendations_text, styles['Normal']))
701
- story.append(Spacer(1, 20))
702
-
703
- # Footer
704
- story.append(Paragraph(f"Report generated on {datetime.now().strftime('%B %d, %Y at %I:%M %p')}",
705
- styles['Normal']))
706
- story.append(Paragraph("B2B Customer Analytics Platform - Enterprise Edition",
707
- styles['Normal']))
708
-
709
- # Build PDF
710
- doc.build(story)
711
- pdf_bytes = buffer.getvalue()
712
- buffer.close()
713
-
714
- return pdf_bytes
715
-
716
- except Exception as e:
717
- print(f"Error generating PDF report: {str(e)}")
718
- return None
719
-
720
- def get_customer_insights(self, customer_id):
721
- """Get detailed insights for a specific customer"""
722
- if self.df is None:
723
- return "No data available"
724
-
725
- customer_data = self.df[self.df['customer_id'] == customer_id]
726
- if customer_data.empty:
727
- return f"Customer {customer_id} not found"
728
-
729
- # Get customer metrics
730
- total_orders = len(customer_data)
731
- total_spent = customer_data['amount'].sum()
732
- avg_order_value = customer_data['amount'].mean()
733
- first_order = customer_data['order_date'].min()
734
- last_order = customer_data['order_date'].max()
735
- segment = customer_data['Segment'].iloc[0]
736
- risk_level = customer_data['Churn_Risk'].iloc[0]
737
- recency = customer_data['recency_days'].iloc[0]
738
-
739
- # Get churn probability if available
740
- churn_prob = 0.5 # default
741
- if self.predictions is not None:
742
- pred_data = self.predictions[self.predictions['customer_id'] == customer_id]
743
- if not pred_data.empty:
744
- churn_prob = pred_data['churn_probability'].iloc[0]
745
-
746
- insights_html = f"""
747
- <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1);">
748
- <h3 style="color: #1f2937; font-size: 1.5rem; font-weight: bold; margin-bottom: 1.5rem; text-align: center;">
749
- 📊 Customer Profile: {customer_id}
750
- </h3>
751
-
752
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
753
- <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 1rem; border-radius: 0.5rem; color: white;">
754
- <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem;">SEGMENT</h4>
755
- <div style="font-size: 1.2rem; font-weight: bold;">{segment}</div>
756
- </div>
757
- <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 1rem; border-radius: 0.5rem; color: white;">
758
- <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem;">CHURN RISK</h4>
759
- <div style="font-size: 1.2rem; font-weight: bold;">{risk_level}</div>
760
- </div>
761
- <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 1rem; border-radius: 0.5rem; color: white;">
762
- <h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem;">CHURN PROBABILITY</h4>
763
- <div style="font-size: 1.2rem; font-weight: bold;">{churn_prob:.1%}</div>
764
- </div>
765
- </div>
766
-
767
- <div style="background: #f8fafc; padding: 1.5rem; border-radius: 0.5rem;">
768
- <h4 style="color: #374151; font-weight: 600; margin-bottom: 1rem;">📈 Transaction Metrics</h4>
769
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem;">
770
- <div>
771
- <div style="font-size: 0.875rem; color: #6b7280;">Total Orders</div>
772
- <div style="font-size: 1.25rem; font-weight: bold; color: #1f2937;">{total_orders}</div>
773
- </div>
774
- <div>
775
- <div style="font-size: 0.875rem; color: #6b7280;">Total Spent</div>
776
- <div style="font-size: 1.25rem; font-weight: bold; color: #1f2937;">${total_spent:,.2f}</div>
777
- </div>
778
- <div>
779
- <div style="font-size: 0.875rem; color: #6b7280;">Avg Order Value</div>
780
- <div style="font-size: 1.25rem; font-weight: bold; color: #1f2937;">${avg_order_value:.2f}</div>
781
- </div>
782
- <div>
783
- <div style="font-size: 0.875rem; color: #6b7280;">Days Since Last Order</div>
784
- <div style="font-size: 1.25rem; font-weight: bold; color: #1f2937;">{recency}</div>
785
- </div>
786
- </div>
787
- </div>
788
-
789
- <div style="background: #f0f9ff; border-left: 4px solid #3b82f6; padding: 1rem; margin-top: 1rem;">
790
- <h4 style="color: #1e40af; font-weight: 600; margin-bottom: 0.5rem;">💡 Recommendations</h4>
791
- <p style="color: #1f2937; margin: 0;">
792
- {self._get_customer_recommendations(segment, risk_level, churn_prob, recency)}
793
- </p>
794
- </div>
795
- </div>
796
- """
797
-
798
- return insights_html
799
-
800
- def _get_customer_recommendations(self, segment, risk_level, churn_prob, recency):
801
- """Generate personalized recommendations based on customer profile"""
802
- recommendations = []
803
-
804
- if risk_level == 'High' or churn_prob > 0.7:
805
- recommendations.append("🚨 URGENT: Personal outreach required within 24 hours")
806
- recommendations.append("💰 Offer retention incentive (discount/upgrade)")
807
- recommendations.append("📞 Schedule executive-level call")
808
- elif risk_level == 'Medium':
809
- recommendations.append("📧 Send personalized re-engagement campaign")
810
- recommendations.append("🎯 Offer targeted product recommendations")
811
-
812
- if segment == 'Champions':
813
- recommendations.append("🏆 Invite to VIP program or advisory board")
814
- recommendations.append("🔄 Cross-sell premium services")
815
- elif segment == 'At Risk':
816
- recommendations.append("⚠️ Proactive customer success intervention")
817
- recommendations.append("📊 Conduct health check survey")
818
- elif segment == 'New Customers':
819
- recommendations.append("🎉 Deploy onboarding campaign")
820
- recommendations.append("📚 Provide educational resources")
821
-
822
- if recency > 60:
823
- recommendations.append("🔄 Win-back campaign with special offer")
824
-
825
- return " • ".join(recommendations) if recommendations else "Continue monitoring customer engagement patterns."
826
-
827
-
828
- # Gradio Interface
829
- def create_gradio_interface():
830
- """Create the Gradio interface for the B2B Customer Analytics platform"""
831
-
832
- analytics = B2BCustomerAnalytics()
833
-
834
- def load_data(file):
835
- if file is None:
836
- return "Please upload a CSV file", None, None, None
837
- result = analytics.load_and_process_data(file)
838
- return result
839
-
840
- def train_model():
841
- result = analytics.train_churn_model()
842
- return result
843
-
844
- def create_charts():
845
- return analytics.create_visualizations()
846
-
847
- def get_customer_table():
848
- return analytics.create_customer_table()
849
-
850
- def generate_report():
851
- pdf_bytes = analytics.generate_pdf_report()
852
- if pdf_bytes:
853
- return pdf_bytes
854
- return None
855
-
856
- def get_insights(customer_id):
857
- if not customer_id:
858
- return "Please enter a customer ID"
859
- return analytics.get_customer_insights(customer_id)
860
-
861
- # Create Gradio interface
862
- with gr.Blocks(
863
- theme=gr.themes.Soft(primary_hue="blue"),
864
- title="B2B Customer Analytics Platform",
865
- css="""
866
- .gradio-container {
867
- font-family: 'Inter', sans-serif;
868
- }
869
- .main-header {
870
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
871
- padding: 2rem;
872
- border-radius: 1rem;
873
- color: white;
874
- text-align: center;
875
- margin-bottom: 2rem;
876
- }
877
- .metric-card {
878
- background: white;
879
- padding: 1.5rem;
880
- border-radius: 1rem;
881
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
882
- border-left: 4px solid #3b82f6;
883
- }
884
- """
885
- ) as demo:
886
-
887
- gr.HTML("""
888
- <div class="main-header">
889
- <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">
890
- 🏢 B2B Customer Analytics Platform
891
- </h1>
892
- <p style="font-size: 1.2rem; opacity: 0.9;">
893
- Advanced Customer Segmentation & Churn Prediction System
894
- </p>
895
- </div>
896
- """)
897
-
898
- with gr.Tabs():
899
- # Data Upload Tab
900
- with gr.Tab("📊 Data Upload & Overview"):
901
- with gr.Row():
902
- file_input = gr.File(label="Upload Customer Data CSV", file_types=[".csv"])
903
-
904
- with gr.Row():
905
- load_btn = gr.Button("Load & Process Data", variant="primary", size="lg")
906
-
907
- load_status = gr.HTML()
908
- summary_display = gr.HTML()
909
- data_preview = gr.DataFrame(label="Data Preview")
910
- kpi_display = gr.HTML()
911
-
912
- # Analytics & Segmentation Tab
913
- with gr.Tab("🎯 Customer Segmentation"):
914
- with gr.Row():
915
- segment_chart = gr.Plot(label="Customer Segments")
916
- rfm_chart = gr.Plot(label="RFM Analysis")
917
-
918
- with gr.Row():
919
- customer_table = gr.DataFrame(label="Customer Segmentation Table")
920
-
921
- # Churn Prediction Tab
922
- with gr.Tab("🤖 Churn Prediction"):
923
- with gr.Row():
924
- train_btn = gr.Button("Train Churn Prediction Model", variant="primary", size="lg")
925
-
926
- model_results = gr.HTML()
927
-
928
- with gr.Row():
929
- performance_chart = gr.Plot(label="Model Performance")
930
- churn_chart = gr.Plot(label="Churn Risk Analysis")
931
-
932
- # Revenue Analytics Tab
933
- with gr.Tab("💰 Revenue Analytics"):
934
- with gr.Row():
935
- revenue_chart = gr.Plot(label="Revenue Trends")
936
-
937
- # Customer Insights Tab
938
- with gr.Tab("🔍 Customer Insights"):
939
- with gr.Row():
940
- customer_id_input = gr.Textbox(
941
- label="Enter Customer ID",
942
- placeholder="e.g., CUST001"
943
- )
944
- insights_btn = gr.Button("Get Customer Insights", variant="primary")
945
-
946
- customer_insights = gr.HTML()
947
-
948
- # Report Generation Tab
949
- with gr.Tab("📄 Reports"):
950
- with gr.Row():
951
- report_btn = gr.Button("Generate PDF Report", variant="primary", size="lg")
952
-
953
- with gr.Row():
954
- report_download = gr.File(label="Download Report")
955
-
956
- gr.HTML("""
957
- <div style="background: #f0f9ff; padding: 1.5rem; border-radius: 0.5rem; margin-top: 1rem;">
958
- <h3 style="color: #1e40af; margin-bottom: 1rem;">📋 Report Contents</h3>
959
- <ul style="color: #374151;">
960
- <li>Executive Summary with Key Metrics</li>
961
- <li>Customer Segmentation Analysis</li>
962
- <li>Churn Risk Assessment</li>
963
- <li>Revenue Trends and Patterns</li>
964
- <li>Strategic Recommendations</li>
965
- <li>Model Performance Metrics</li>
966
- </ul>
967
- </div>
968
- """)
969
-
970
- # Event handlers
971
- load_btn.click(
972
- fn=load_data,
973
- inputs=[file_input],
974
- outputs=[load_status, summary_display, data_preview, kpi_display]
975
- )
976
-
977
- train_btn.click(
978
- fn=train_model,
979
- outputs=[model_results, performance_chart]
980
- )
981
-
982
- # Auto-update visualizations when data is loaded
983
- load_btn.click(
984
- fn=create_charts,
985
- outputs=[segment_chart, rfm_chart, churn_chart, revenue_chart]
986
- )
987
-
988
- load_btn.click(
989
- fn=get_customer_table,
990
- outputs=[customer_table]
991
- )
992
-
993
- insights_btn.click(
994
- fn=get_insights,
995
- inputs=[customer_id_input],
996
- outputs=[customer_insights]
997
- )
998
-
999
- report_btn.click(
1000
- fn=generate_report,
1001
- outputs=[report_download]
1002
- )
1003
-
1004
- return demo
1005
-
1006
- if __name__ == "__main__":
1007
- # Launch the application
1008
- demo = create_gradio_interface()
1009
- demo.launch(
1010
- share=True,
1011
- server_name="0.0.0.0",
1012
- server_port=7860,
1013
- show_error=True
1014
- )
 
22
  import warnings
23
  warnings.filterwarnings('ignore')
24
 
25
+ # Modern color palette
26
  COLORS = {
27
  'primary': '#6366f1',
28
  'success': '#10b981',
29
  'warning': '#f59e0b',
30
  'danger': '#ef4444',
31
  'purple': '#8b5cf6',
32
+ 'indigo': '#6366f1',
33
  'blue': '#3b82f6',
34
+ 'gray': '#6b7280'
35
  }
36
 
 
 
 
 
37
  class B2BCustomerAnalytics:
38
  def __init__(self):
39
  self.df = None
 
47
  if file is None:
48
  return "Please upload a CSV file", None, None, None
49
 
 
50
  self.df = pd.read_csv(file.name)
51
 
52
+ # Basic validation
53
  required_columns = ['customer_id', 'order_date', 'amount']
54
  missing_cols = [col for col in required_columns if col not in self.df.columns]
55
  if missing_cols:
 
59
  self.df['order_date'] = pd.to_datetime(self.df['order_date'])
60
 
61
  # Calculate RFM metrics if not present
62
+ if 'recency_days' not in self.df.columns:
63
  self.df = self.calculate_rfm_metrics(self.df)
64
 
65
  # Customer segmentation
66
  self.df = self.perform_customer_segmentation(self.df)
67
 
68
+ # Generate modern dashboard
69
+ dashboard_html, metrics_cards = self.generate_modern_dashboard()
70
 
71
+ return "Data loaded successfully", dashboard_html, self.df.head(20), metrics_cards
72
 
73
  except Exception as e:
74
  return f"Error loading data: {str(e)}", None, None, None
 
77
  """Calculate RFM metrics from transaction data"""
78
  current_date = df['order_date'].max() + timedelta(days=1)
79
 
 
80
  customer_metrics = df.groupby('customer_id').agg({
81
  'order_date': ['max', 'count'],
82
  'amount': ['sum', 'mean']
 
85
  customer_metrics.columns = ['last_order_date', 'frequency', 'monetary', 'avg_order_value']
86
  customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
87
 
 
88
  df_with_rfm = df.merge(customer_metrics[['recency_days', 'frequency', 'monetary']],
89
  left_on='customer_id', right_index=True, how='left')
90
 
 
103
  customer_df['F_Score'] = pd.qcut(customer_df['frequency'].rank(method='first'), 5, labels=[1,2,3,4,5])
104
  customer_df['M_Score'] = pd.qcut(customer_df['monetary'].rank(method='first'), 5, labels=[1,2,3,4,5])
105
 
 
106
  customer_df['R_Score'] = customer_df['R_Score'].astype(int)
107
  customer_df['F_Score'] = customer_df['F_Score'].astype(int)
108
  customer_df['M_Score'] = customer_df['M_Score'].astype(int)
109
 
 
110
  def segment_customers(row):
111
  if row['R_Score'] >= 4 and row['F_Score'] >= 4 and row['M_Score'] >= 4:
112
  return 'Champions'
 
126
  return 'Others'
127
 
128
  customer_df['Segment'] = customer_df.apply(segment_customers, axis=1)
 
 
129
  customer_df['Churn_Risk'] = customer_df.apply(lambda x:
130
  'High' if x['Segment'] in ['Lost Customers', 'At Risk'] else
131
  'Medium' if x['Segment'] in ['Others', 'Cannot Lose Them'] else 'Low', axis=1)
132
 
 
133
  segment_data = customer_df[['customer_id', 'Segment', 'Churn_Risk', 'R_Score', 'F_Score', 'M_Score']]
134
  df_with_segments = df.merge(segment_data, on='customer_id', how='left')
135
 
136
  return df_with_segments
137
 
138
+ def generate_modern_dashboard(self):
139
+ """Generate modern dashboard with clean design"""
140
  if self.df is None:
141
  return "No data loaded", ""
142
 
143
+ # Calculate KPIs
144
  total_customers = self.df['customer_id'].nunique()
145
  total_orders = len(self.df)
146
  total_revenue = self.df['amount'].sum()
147
  avg_order_value = self.df['amount'].mean()
148
 
149
+ # Risk and segment distributions
150
  segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
151
  risk_dist = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts()
152
 
153
+ high_risk_customers = risk_dist.get('High', 0)
154
+ champion_customers = segment_dist.get('Champions', 0)
155
+ healthy_customers = risk_dist.get('Low', 0)
156
+
157
+ # Modern dashboard HTML
158
+ dashboard_html = f"""
159
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 3rem; border-radius: 1rem; color: white; margin-bottom: 3rem; text-align: center;">
160
+ <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem; font-family: 'Inter', sans-serif;">
161
+ B2B Customer Analytics Platform
162
+ </h1>
163
+ <p style="font-size: 1.2rem; opacity: 0.9;">
164
  Enterprise Customer Health Monitoring & Churn Prediction System
165
  </p>
166
  </div>
167
 
168
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1.5rem; margin-bottom: 3rem;">
169
+
170
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #3b82f6; transition: transform 0.2s;">
171
+ <div style="display: flex; items-center: justify-between; margin-bottom: 1rem;">
172
+ <div style="padding: 0.75rem; background: #eff6ff; border-radius: 0.5rem;">
173
+ <div style="width: 1.5rem; height: 1.5rem; background: #3b82f6; border-radius: 50%;"></div>
174
+ </div>
175
+ <span style="font-size: 2rem; font-weight: bold; color: #3b82f6;">{total_customers:,}</span>
176
+ </div>
177
+ <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 0.25rem;">Total Customers</h3>
178
+ <p style="color: #6b7280; font-size: 0.875rem;">Active enterprise clients</p>
179
  </div>
180
 
181
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #10b981; transition: transform 0.2s;">
182
+ <div style="display: flex; items-center: justify-between; margin-bottom: 1rem;">
183
+ <div style="padding: 0.75rem; background: #f0fdf4; border-radius: 0.5rem;">
184
+ <div style="width: 1.5rem; height: 1.5rem; background: #10b981; border-radius: 50%;"></div>
185
+ </div>
186
+ <span style="font-size: 2rem; font-weight: bold; color: #10b981;">${(total_revenue/1000000):.1f}M</span>
187
+ </div>
188
+ <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 0.25rem;">Total Revenue</h3>
189
+ <p style="color: #6b7280; font-size: 0.875rem;">Contract value sum</p>
190
  </div>
191
 
192
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #8b5cf6; transition: transform 0.2s;">
193
+ <div style="display: flex; items-center: justify-between; margin-bottom: 1rem;">
194
+ <div style="padding: 0.75rem; background: #faf5ff; border-radius: 0.5rem;">
195
+ <div style="width: 1.5rem; height: 1.5rem; background: #8b5cf6; border-radius: 50%;"></div>
196
+ </div>
197
+ <span style="font-size: 2rem; font-weight: bold; color: #8b5cf6;">${(avg_order_value/1000):.0f}K</span>
198
+ </div>
199
+ <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 0.25rem;">Avg Order Value</h3>
200
+ <p style="color: #6b7280; font-size: 0.875rem;">Per customer average</p>
201
+ </div>
202
+
203
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #ef4444; transition: transform 0.2s;">
204
+ <div style="display: flex; items-center: justify-between; margin-bottom: 1rem;">
205
+ <div style="padding: 0.75rem; background: #fef2f2; border-radius: 0.5rem;">
206
+ <div style="width: 1.5rem; height: 1.5rem; background: #ef4444; border-radius: 50%;"></div>
207
+ </div>
208
+ <span style="font-size: 2rem; font-weight: bold; color: #ef4444;">{high_risk_customers}</span>
209
+ </div>
210
+ <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 0.25rem;">High Risk Clients</h3>
211
+ <p style="color: #6b7280; font-size: 0.875rem;">Require immediate attention</p>
212
+ </div>
213
+
214
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #f59e0b; transition: transform 0.2s;">
215
+ <div style="display: flex; items-center: justify-between; margin-bottom: 1rem;">
216
+ <div style="padding: 0.75rem; background: #fffbeb; border-radius: 0.5rem;">
217
+ <div style="width: 1.5rem; height: 1.5rem; background: #f59e0b; border-radius: 50%;"></div>
218
+ </div>
219
+ <span style="font-size: 2rem; font-weight: bold; color: #f59e0b;">{champion_customers}</span>
220
+ </div>
221
+ <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 0.25rem;">Champion Clients</h3>
222
+ <p style="color: #6b7280; font-size: 0.875rem;">Top tier customers</p>
223
+ </div>
224
+
225
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); border-left: 4px solid #06b6d4; transition: transform 0.2s;">
226
+ <div style="display: flex; items-center: justify-between; margin-bottom: 1rem;">
227
+ <div style="padding: 0.75rem; background: #f0fdfa; border-radius: 0.5rem;">
228
+ <div style="width: 1.5rem; height: 1.5rem; background: #06b6d4; border-radius: 50%;"></div>
229
+ </div>
230
+ <span style="font-size: 2rem; font-weight: bold; color: #06b6d4;">{healthy_customers}</span>
231
+ </div>
232
+ <h3 style="color: #1f2937; font-weight: 600; margin-bottom: 0.25rem;">Healthy Clients</h3>
233
+ <p style="color: #6b7280; font-size: 0.875rem;">Low churn risk</p>
234
  </div>
235
  </div>
236
  """
237
 
238
+ metrics_cards = [
239
+ ["Total Customers", f"{total_customers:,}", "#3b82f6"],
240
+ ["Total Revenue", f"${total_revenue/1000000:.1f}M", "#10b981"],
241
+ ["Avg Order Value", f"${avg_order_value/1000:.0f}K", "#8b5cf6"],
242
+ ["High Risk Customers", f"{high_risk_customers}", "#ef4444"],
243
+ ["Champion Customers", f"{champion_customers}", "#f59e0b"],
244
+ ["Healthy Customers", f"{healthy_customers}", "#06b6d4"]
 
245
  ]
246
 
247
+ return dashboard_html, metrics_cards
248
 
249
  def train_churn_model(self):
250
+ """Train churn prediction model with modern UI feedback"""
251
  if self.df is None:
252
  return "No data available. Please upload a CSV file first.", None
253
 
254
  try:
 
255
  customer_features = self.df.groupby('customer_id').agg({
256
  'recency_days': 'first',
257
  'frequency': 'first',
 
260
  'order_date': ['min', 'max']
261
  }).reset_index()
262
 
 
263
  customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
264
  'avg_amount', 'std_amount', 'min_amount', 'max_amount',
265
  'first_order', 'last_order']
266
 
 
267
  customer_features['std_amount'].fillna(0, inplace=True)
 
 
268
  customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
269
  customer_features['customer_lifetime'].fillna(0, inplace=True)
270
 
271
+ customer_features['churn_label'] = (customer_features['recency_days'] > 90).astype(int)
 
 
 
 
 
272
 
 
273
  feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
274
  'min_amount', 'max_amount', 'customer_lifetime']
275
 
276
  X = customer_features[feature_cols]
277
  y = customer_features['churn_label']
278
 
 
279
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
280
 
 
281
  self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
282
  self.model.fit(X_train, y_train)
283
 
 
284
  y_pred = self.model.predict(X_test)
285
+ accuracy = accuracy_score(y_test, y_pred)
286
 
 
287
  self.feature_importance = pd.DataFrame({
288
  'feature': feature_cols,
289
  'importance': self.model.feature_importances_
290
  }).sort_values('importance', ascending=False)
291
 
 
292
  all_predictions = self.model.predict_proba(X)[:, 1]
293
  customer_features['churn_probability'] = all_predictions
294
  self.predictions = customer_features
295
 
296
+ # Modern results display
 
 
 
297
  results_html = f"""
298
+ <div style="background: white; padding: 2.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border: 1px solid #e5e7eb; margin-top: 2rem;">
299
  <div style="text-align: center; margin-bottom: 2rem;">
300
+ <div style="display: inline-block; padding: 1rem; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 50%; margin-bottom: 1rem;">
301
+ <div style="width: 2rem; height: 2rem; background: white; border-radius: 50%; opacity: 0.3;"></div>
302
+ </div>
303
+ <h3 style="font-size: 1.75rem; font-weight: bold; color: #1f2937; margin-bottom: 0.5rem;">
304
+ Model Training Completed
305
  </h3>
306
+ <p style="color: #6b7280; font-size: 1.1rem;">XGBoost Classifier with Advanced Feature Engineering</p>
307
  </div>
308
 
309
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1.5rem; margin-bottom: 2rem;">
310
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 1.5rem; border-radius: 1rem; text-align: center; color: white;">
311
+ <div style="font-size: 2rem; font-weight: bold; margin-bottom: 0.5rem;">{accuracy:.1%}</div>
312
+ <div style="font-size: 1rem; opacity: 0.9;">Model Accuracy</div>
313
  </div>
314
+ <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 1.5rem; border-radius: 1rem; text-align: center; color: white;">
315
+ <div style="font-size: 2rem; font-weight: bold; margin-bottom: 0.5rem;">{len(feature_cols)}</div>
316
+ <div style="font-size: 1rem; opacity: 0.9;">Features Used</div>
317
  </div>
318
+ <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 1.5rem; border-radius: 1rem; text-align: center; color: white;">
319
+ <div style="font-size: 2rem; font-weight: bold; margin-bottom: 0.5rem;">{len(X_train)}</div>
320
+ <div style="font-size: 1rem; opacity: 0.9;">Training Samples</div>
321
  </div>
322
+ <div style="background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%); padding: 1.5rem; border-radius: 1rem; text-align: center; color: white;">
323
+ <div style="font-size: 2rem; font-weight: bold; margin-bottom: 0.5rem;">{len(X_test)}</div>
324
+ <div style="font-size: 1rem; opacity: 0.9;">Test Samples</div>
325
  </div>
326
  </div>
327
 
328
+ <div style="background: #f8fafc; padding: 2rem; border-radius: 1rem; border: 1px solid #e2e8f0;">
329
+ <h4 style="font-weight: 600; color: #374151; margin-bottom: 1.5rem; font-size: 1.25rem;">Top Feature Importance</h4>
330
+ <div style="space-y: 1rem;">
331
+ {''.join([f'''<div style="display: flex; justify-content: space-between; align-items: center; padding: 1rem 0; border-bottom: 1px solid #e5e7eb;">
332
+ <span style="font-weight: 500; color: #374151; font-size: 1rem;">{row['feature'].replace('_', ' ').title()}</span>
333
+ <div style="display: flex; align-items: center;">
334
+ <div style="width: 100px; height: 8px; background: #e5e7eb; border-radius: 4px; margin-right: 1rem;">
335
+ <div style="height: 100%; background: #3b82f6; border-radius: 4px; width: {row['importance']*100:.1f}%;"></div>
336
+ </div>
337
+ <span style="background: #3b82f6; color: white; padding: 0.25rem 0.75rem; border-radius: 9999px; font-size: 0.875rem; font-weight: 500;">
338
+ {row['importance']:.3f}
339
+ </span>
340
+ </div>
341
  </div>''' for _, row in self.feature_importance.head(5).iterrows()])}
342
  </div>
343
  </div>
 
350
  return f"Error training model: {str(e)}", None
351
 
352
  def create_model_performance_chart(self):
353
+ """Create clean model performance visualization"""
354
  if self.feature_importance is None:
355
  return None
356
 
 
359
  x='importance',
360
  y='feature',
361
  orientation='h',
362
+ title='Feature Importance Analysis',
363
  labels={'importance': 'Importance Score', 'feature': 'Features'},
364
  color='importance',
365
+ color_continuous_scale=['#e0e7ff', '#6366f1']
366
  )
367
 
368
  fig.update_layout(
369
  height=400,
370
  showlegend=False,
371
  plot_bgcolor='white',
372
+ paper_bgcolor='white',
373
  title={
374
+ 'text': 'Feature Importance Analysis',
375
  'x': 0.5,
376
  'xanchor': 'center',
377
+ 'font': {'size': 18, 'color': '#1f2937', 'family': 'Inter, sans-serif'}
378
  },
379
+ font=dict(family="Inter, sans-serif", color='#374151'),
380
+ yaxis={'categoryorder': 'total ascending'},
381
+ margin=dict(l=20, r=20, t=60, b=20)
382
  )
383
 
384
  return fig
385
 
386
  def create_visualizations(self):
387
+ """Create modern, clean visualizations"""
388
  if self.df is None:
389
  return None, None, None, None
390
 
391
+ # 1. Customer Segment Distribution
392
  segment_data = self.df.groupby('customer_id')['Segment'].first().value_counts().reset_index()
393
  segment_data.columns = ['Segment', 'Count']
394
 
 
400
  hole=0.4,
401
  color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899']
402
  )
403
+ fig1.update_traces(
404
+ textposition='inside',
405
+ textinfo='percent+label',
406
+ textfont_size=12,
407
+ textfont_family='Inter'
408
+ )
409
  fig1.update_layout(
410
  height=400,
411
  showlegend=True,
412
+ title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#1f2937', 'family': 'Inter'}},
413
+ font=dict(family="Inter, sans-serif", color='#374151'),
414
+ paper_bgcolor='white',
415
+ plot_bgcolor='white'
416
  )
417
 
418
+ # 2. RFM Analysis
419
  customer_rfm = self.df.groupby('customer_id').agg({
420
  'recency_days': 'first',
421
  'frequency': 'first',
 
423
  'Segment': 'first'
424
  }).reset_index()
425
 
426
+ fig2 = px.scatter(
427
  customer_rfm,
428
  x='recency_days',
429
  y='frequency',
430
+ size='monetary',
431
+ color='Segment',
432
  title='RFM Analysis - Customer Behavior Matrix',
433
  labels={
434
  'recency_days': 'Recency (Days)',
 
438
  color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6']
439
  )
440
  fig2.update_layout(
441
+ height=400,
442
+ title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#1f2937', 'family': 'Inter'}},
443
+ font=dict(family="Inter, sans-serif", color='#374151'),
444
+ paper_bgcolor='white',
445
+ plot_bgcolor='white'
446
  )
447
 
448
  # 3. Churn Risk Analysis
 
453
  nbins=20,
454
  title='Churn Probability Distribution',
455
  labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
456
+ color_discrete_sequence=['#6366f1']
457
  )
458
+ fig3.add_vline(x=0.5, line_dash="dash", line_color="#ef4444", annotation_text="High Risk Threshold")
459
  else:
460
  risk_data = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts().reset_index()
461
  risk_data.columns = ['Risk_Level', 'Count']
 
472
  fig3.update_layout(
473
  height=400,
474
  showlegend=False,
475
+ title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#1f2937', 'family': 'Inter'}},
476
+ font=dict(family="Inter, sans-serif", color='#374151'),
477
+ paper_bgcolor='white',
478
  plot_bgcolor='white'
479
  )
480
 
 
491
  labels={'amount': 'Revenue ($)', 'order_month': 'Month'},
492
  line_shape='spline'
493
  )
494
+ fig4.update_traces(line_color='#6366f1', line_width=3)
495
  fig4.update_layout(
496
  height=400,
497
+ title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 18, 'color': '#1f2937', 'family': 'Inter'}},
498
+ font=dict(family="Inter, sans-serif", color='#374151'),
499
+ paper_bgcolor='white',
500
  plot_bgcolor='white',
501
  xaxis_tickangle=-45
502
  )
503
 
504
+ return