entropy25 commited on
Commit
7ecef08
·
verified ·
1 Parent(s): 61d745b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +797 -443
app.py CHANGED
@@ -6,16 +6,19 @@ import matplotlib.pyplot as plt
6
  import seaborn as sns
7
  from sklearn.model_selection import train_test_split, cross_val_score
8
  from sklearn.ensemble import RandomForestClassifier
9
- from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
10
  import plotly.express as px
11
  import plotly.graph_objects as go
 
 
12
  from datetime import datetime, timedelta
13
  import io
14
  import base64
15
  import warnings
 
16
  warnings.filterwarnings('ignore')
17
 
18
- # Optional imports with fallbacks
19
  try:
20
  import xgboost as xgb
21
  XGBOOST_AVAILABLE = True
@@ -28,157 +31,178 @@ try:
28
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
29
  from reportlab.lib.units import inch
30
  from reportlab.lib import colors
31
- from reportlab.graphics.shapes import Drawing
32
- from reportlab.graphics.charts.piecharts import Pie
33
- from reportlab.graphics.charts.barcharts import VerticalBarChart
34
- from reportlab.graphics import renderPDF
35
  REPORTLAB_AVAILABLE = True
36
  except ImportError:
37
  REPORTLAB_AVAILABLE = False
38
 
39
- # Configuration
40
- CONFIG = {
41
  'churn_threshold_days': 90,
42
  'high_risk_probability': 0.7,
43
  'rfm_quantiles': 5,
44
- 'min_customers_for_training': 10
45
  }
46
 
 
47
  COLORS = {
48
  'primary': '#6366f1',
49
- 'success': '#10b981',
50
  'warning': '#f59e0b',
51
  'danger': '#ef4444',
52
- 'purple': '#8b5cf6'
 
 
 
53
  }
54
 
55
  class DataProcessor:
56
- """Handles data loading, cleaning, and validation"""
57
 
58
  @staticmethod
59
- def load_and_validate(file_path):
60
  """Load and validate CSV file"""
61
- df = pd.read_csv(file_path)
 
62
 
63
- # Column mapping
64
- column_map = DataProcessor._map_columns(df.columns)
65
- df = df.rename(columns=column_map)
66
-
67
- # Data cleaning
68
- df = DataProcessor._clean_data(df)
69
-
70
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  @staticmethod
73
- def _map_columns(columns):
74
- """Map various column name formats to standard names"""
 
75
  mapping = {}
76
- columns_lower = [col.lower().strip() for col in columns]
77
 
78
- variations = {
79
- 'customer_id': ['customer', 'cust_id', 'id', 'customerid', 'client_id'],
80
- 'order_date': ['date', 'orderdate', 'purchase_date', 'transaction_date'],
81
- 'amount': ['revenue', 'value', 'price', 'total', 'sales', 'order_value']
82
  }
83
 
84
- for standard_name, variants in variations.items():
85
- for col, col_lower in zip(columns, columns_lower):
86
- if (standard_name in col_lower or
87
- any(variant in col_lower for variant in variants)):
88
- mapping[col] = standard_name
 
 
89
  break
 
 
90
 
91
  return mapping
92
 
93
  @staticmethod
94
- def _clean_data(df):
95
- """Clean and convert data types"""
96
- required_cols = ['customer_id', 'order_date', 'amount']
97
-
98
- # Check required columns
99
- missing_cols = [col for col in required_cols if col not in df.columns]
100
- if missing_cols:
101
- raise ValueError(f"Missing columns: {missing_cols}")
102
-
103
- # Convert data types
104
  df['customer_id'] = df['customer_id'].astype(str)
105
  df['order_date'] = pd.to_datetime(df['order_date'], errors='coerce')
106
  df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
107
 
108
  # Remove invalid rows
109
- df = df.dropna(subset=required_cols)
110
- df = df[df['amount'] > 0] # Remove negative/zero amounts
111
 
112
  return df
113
 
114
- class FeatureEngineering:
115
- """Advanced feature engineering for customer analytics"""
116
 
117
  @staticmethod
118
- def calculate_rfm_features(df):
119
- """Calculate RFM and additional behavioral features"""
120
  current_date = df['order_date'].max() + timedelta(days=1)
121
 
122
- # Basic RFM
123
- customer_features = df.groupby('customer_id').agg({
124
- 'order_date': ['min', 'max', 'count'],
125
  'amount': ['sum', 'mean', 'std', 'min', 'max']
126
  })
127
 
128
- # Flatten columns
129
- customer_features.columns = [
130
- 'first_order', 'last_order', 'frequency',
131
- 'monetary', 'avg_amount', 'std_amount', 'min_amount', 'max_amount'
132
  ]
133
 
134
- # Calculate derived features
135
- customer_features['recency_days'] = (current_date - customer_features['last_order']).dt.days
136
- customer_features['customer_lifetime_days'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
137
- customer_features['std_amount'] = customer_features['std_amount'].fillna(0)
138
-
139
- # Behavioral features
140
- customer_features['order_frequency'] = customer_features['frequency'] / (customer_features['customer_lifetime_days'] + 1)
141
- customer_features['amount_trend'] = customer_features['max_amount'] / customer_features['min_amount']
142
- customer_features['amount_consistency'] = 1 - (customer_features['std_amount'] / customer_features['avg_amount']).fillna(0)
143
 
144
- return customer_features.reset_index()
145
 
146
  class CustomerSegmenter:
147
- """Customer segmentation using RFM analysis"""
148
 
149
  @staticmethod
150
- def perform_segmentation(customer_features):
151
- """Segment customers based on RFM scores"""
152
- df = customer_features.copy()
153
 
154
  # Calculate RFM scores
155
- if len(df) >= CONFIG['rfm_quantiles']:
156
- df['r_score'] = pd.qcut(df['recency_days'], CONFIG['rfm_quantiles'],
157
- labels=[5,4,3,2,1], duplicates='drop')
158
- df['f_score'] = pd.qcut(df['frequency'], CONFIG['rfm_quantiles'],
159
- labels=[1,2,3,4,5], duplicates='drop')
160
- df['m_score'] = pd.qcut(df['monetary'], CONFIG['rfm_quantiles'],
161
- labels=[1,2,3,4,5], duplicates='drop')
 
 
 
 
 
 
 
 
 
162
  else:
163
- # Simple scoring for small datasets
164
- df['r_score'] = pd.cut(df['recency_days'], bins=3, labels=[3,2,1])
165
- df['f_score'] = pd.cut(df['frequency'], bins=3, labels=[1,2,3])
166
- df['m_score'] = pd.cut(df['monetary'], bins=3, labels=[1,2,3])
167
 
168
- # Convert to numeric
169
- for col in ['r_score', 'f_score', 'm_score']:
170
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(3).astype(int)
171
 
172
- # Segment assignment
173
- df['segment'] = df.apply(CustomerSegmenter._assign_segment, axis=1)
174
- df['churn_risk'] = df['segment'].map(CustomerSegmenter._get_risk_mapping())
175
 
176
  return df
177
 
178
  @staticmethod
179
- def _assign_segment(row):
180
  """Assign customer segment based on RFM scores"""
181
- r, f, m = row['r_score'], row['f_score'], row['m_score']
182
 
183
  if r >= 4 and f >= 4 and m >= 4:
184
  return 'Champions'
@@ -191,59 +215,77 @@ class CustomerSegmenter:
191
  elif r <= 2 and f >= 3:
192
  return 'At Risk'
193
  elif r <= 2 and f <= 2 and m >= 3:
194
- return 'Cannot Lose'
195
  elif r <= 2 and f <= 2 and m <= 2:
196
- return 'Lost'
197
  else:
198
  return 'Others'
199
 
200
  @staticmethod
201
- def _get_risk_mapping():
202
- """Map segments to risk levels"""
203
- return {
204
- 'Champions': 'Low',
205
- 'Loyal Customers': 'Low',
206
- 'Potential Loyalists': 'Medium',
207
- 'New Customers': 'Low',
208
- 'At Risk': 'High',
209
- 'Cannot Lose': 'High',
210
- 'Lost': 'High',
211
- 'Others': 'Medium'
212
- }
213
 
214
  class ChurnPredictor:
215
- """Machine learning model for churn prediction"""
216
 
217
  def __init__(self):
218
  self.model = None
219
  self.feature_importance = None
220
-
221
- def train(self, customer_features):
 
222
  """Train churn prediction model"""
223
- df = customer_features.copy()
 
224
 
225
- # Create target variable
226
- df['churn_label'] = (df['recency_days'] > CONFIG['churn_threshold_days']).astype(int)
 
 
 
227
 
228
- # Validate data
229
- if len(df) < CONFIG['min_customers_for_training']:
230
- raise ValueError(f"Insufficient data: need at least {CONFIG['min_customers_for_training']} customers")
231
 
232
- if df['churn_label'].nunique() < 2:
233
- raise ValueError("All customers have same churn status - cannot train model")
 
234
 
235
- # Select features
236
- feature_cols = [
237
- 'recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
238
- 'customer_lifetime_days', 'order_frequency', 'amount_trend', 'amount_consistency'
239
- ]
240
 
241
- X = df[feature_cols].fillna(0)
242
- y = df['churn_label']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
- # Train model
245
- self.model = self._get_best_model()
246
- self.model.fit(X, y)
 
 
247
 
248
  # Feature importance
249
  self.feature_importance = pd.DataFrame({
@@ -251,181 +293,201 @@ class ChurnPredictor:
251
  'importance': self.model.feature_importances_
252
  }).sort_values('importance', ascending=False)
253
 
254
- # Model evaluation
255
- cv_scores = cross_val_score(self.model, X, y, cv=5, scoring='roc_auc')
256
-
257
- # Predictions for all customers
258
- df['churn_probability'] = self.model.predict_proba(X)[:, 1]
259
-
260
- return {
261
- 'model_type': type(self.model).__name__,
262
- 'cv_auc_mean': cv_scores.mean(),
263
- 'cv_auc_std': cv_scores.std(),
264
- 'feature_importance': self.feature_importance,
265
- 'predictions': df
266
  }
 
 
267
 
268
- def _get_best_model(self):
269
- """Select best available model"""
270
- if XGBOOST_AVAILABLE:
271
- try:
272
- return xgb.XGBClassifier(random_state=42, eval_metric='logloss')
273
- except:
274
- pass
275
- return RandomForestClassifier(random_state=42, n_estimators=100)
 
 
 
 
 
 
 
 
 
 
276
 
277
- class Visualizer:
278
- """Create interactive visualizations"""
279
 
280
  @staticmethod
281
- def create_segment_chart(df):
282
- """Customer segment distribution"""
283
- segment_counts = df['segment'].value_counts()
 
284
 
285
  fig = px.pie(
286
- values=segment_counts.values,
287
- names=segment_counts.index,
 
288
  title='Customer Segment Distribution',
289
  hole=0.4,
290
- color_discrete_sequence=px.colors.qualitative.Set3
291
  )
292
- fig.update_layout(height=400, title_x=0.5)
 
293
  return fig
294
 
295
  @staticmethod
296
- def create_rfm_scatter(df):
297
- """RFM behavior matrix"""
298
  fig = px.scatter(
299
- df, x='recency_days', y='frequency', size='monetary',
300
- color='segment', title='Customer Behavior Matrix (RFM)',
301
- labels={'recency_days': 'Days Since Last Order', 'frequency': 'Order Count'}
 
 
 
 
 
 
 
 
 
302
  )
303
- fig.update_layout(height=400, title_x=0.5)
304
  return fig
305
 
306
  @staticmethod
307
- def create_churn_distribution(df):
308
- """Churn probability distribution"""
309
- if 'churn_probability' in df.columns:
310
  fig = px.histogram(
311
- df, x='churn_probability', nbins=20,
 
 
312
  title='Churn Probability Distribution',
313
- labels={'churn_probability': 'Churn Probability'}
 
314
  )
315
- fig.add_vline(x=CONFIG['high_risk_probability'], line_dash="dash",
316
- line_color="red", annotation_text="High Risk Threshold")
317
  else:
318
- risk_counts = df['churn_risk'].value_counts()
319
- colors = {'High': COLORS['danger'], 'Medium': COLORS['warning'], 'Low': COLORS['success']}
 
 
320
  fig = px.bar(
321
- x=risk_counts.index, y=risk_counts.values,
322
- title='Churn Risk Distribution',
323
- color=risk_counts.index, color_discrete_map=colors
 
 
 
324
  )
 
325
 
326
- fig.update_layout(height=400, title_x=0.5)
327
  return fig
328
 
329
  @staticmethod
330
- def create_feature_importance_chart(feature_importance):
331
- """Feature importance visualization"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  fig = px.bar(
333
- feature_importance.head(8), x='importance', y='feature',
334
- orientation='h', title='Feature Importance Analysis',
335
- color='importance', color_continuous_scale='viridis'
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  )
337
- fig.update_layout(height=500, title_x=0.5, yaxis={'categoryorder': 'total ascending'})
338
  return fig
339
 
340
  class ReportGenerator:
341
- """Generate dashboards and PDF reports"""
342
-
343
- @staticmethod
344
- def create_dashboard(df, model_results=None):
345
- """Generate HTML dashboard"""
346
- total_customers = len(df)
347
- total_revenue = df['monetary'].sum()
348
- avg_order_value = df['avg_amount'].mean()
349
- high_risk_count = len(df[df['churn_risk'] == 'High'])
350
-
351
- dashboard_html = f"""
352
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
353
- <div style="background: linear-gradient(135deg, {COLORS['primary']}, #4f46e5); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
354
- <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Customers</h3>
355
- <div style="font-size: 2.5rem; font-weight: bold;">{total_customers:,}</div>
356
- </div>
357
- <div style="background: linear-gradient(135deg, {COLORS['success']}, #047857); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
358
- <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Revenue</h3>
359
- <div style="font-size: 2.5rem; font-weight: bold;">${total_revenue/1000:.0f}K</div>
360
- </div>
361
- <div style="background: linear-gradient(135deg, {COLORS['purple']}, #6d28d9); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
362
- <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Avg Order Value</h3>
363
- <div style="font-size: 2.5rem; font-weight: bold;">${avg_order_value:.0f}</div>
364
- </div>
365
- <div style="background: linear-gradient(135deg, {COLORS['danger']}, #dc2626); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
366
- <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">High Risk</h3>
367
- <div style="font-size: 2.5rem; font-weight: bold;">{high_risk_count}</div>
368
- </div>
369
- </div>
370
- """
371
-
372
- if model_results:
373
- dashboard_html += f"""
374
- <div style="background: #f8fafc; padding: 1.5rem; border-radius: 12px; border-left: 4px solid {COLORS['primary']}; margin-top: 1rem;">
375
- <h4 style="margin: 0 0 1rem 0; color: #374151;">Model Performance</h4>
376
- <p><strong>Model:</strong> {model_results['model_type']}</p>
377
- <p><strong>Cross-validation AUC:</strong> {model_results['cv_auc_mean']:.3f} ± {model_results['cv_auc_std']:.3f}</p>
378
- </div>
379
- """
380
-
381
- return dashboard_html
382
 
383
  @staticmethod
384
- def generate_pdf_report(df, model_results=None):
385
- """Generate comprehensive PDF report"""
386
  if not REPORTLAB_AVAILABLE:
387
- raise ImportError("ReportLab is required for PDF generation")
388
 
389
  buffer = io.BytesIO()
390
- doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72,
391
- topMargin=72, bottomMargin=18)
 
392
 
393
  styles = getSampleStyleSheet()
394
  story = []
395
 
396
  # Title
397
  title_style = ParagraphStyle('CustomTitle', parent=styles['Title'],
398
- fontSize=24, spaceAfter=30, alignment=1)
399
  story.append(Paragraph("B2B Customer Analytics Report", title_style))
400
  story.append(Spacer(1, 12))
401
 
402
- # Executive Summary
403
  story.append(Paragraph("Executive Summary", styles['Heading2']))
404
 
405
- total_customers = len(df)
406
- total_revenue = df['monetary'].sum()
407
- avg_revenue = df['monetary'].mean()
408
 
409
  summary_text = f"""
410
- <para>This comprehensive analysis covers <b>{total_customers:,}</b> customers with
411
- total revenue of <b>${total_revenue:,.0f}</b>. The average customer lifetime value
412
- is <b>${avg_revenue:.0f}</b>.</para>
413
- <para>Customers have been segmented using advanced RFM analysis, and machine learning
414
- models have been applied for churn prediction.</para>
415
  """
416
  story.append(Paragraph(summary_text, styles['Normal']))
417
- story.append(Spacer(1, 12))
418
 
419
- # Customer Segments
420
- story.append(Paragraph("Customer Segmentation", styles['Heading2']))
 
421
 
422
- segment_data = df['segment'].value_counts()
423
- segment_table_data = [['Segment', 'Count', 'Percentage']]
424
- for segment, count in segment_data.items():
425
- percentage = f"{count/len(df)*100:.1f}%"
426
- segment_table_data.append([segment, str(count), percentage])
427
 
428
- segment_table = Table(segment_table_data)
429
  segment_table.setStyle(TableStyle([
430
  ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
431
  ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
@@ -437,256 +499,548 @@ class ReportGenerator:
437
  ('GRID', (0, 0), (-1, -1), 1, colors.black)
438
  ]))
439
  story.append(segment_table)
440
- story.append(Spacer(1, 12))
441
 
442
- # Model Performance
443
- if model_results:
444
- story.append(Paragraph("Churn Prediction Model", styles['Heading2']))
445
  model_text = f"""
446
- <para><b>Model Type:</b> {model_results['model_type']}</para>
447
- <para><b>Cross-validation AUC:</b> {model_results['cv_auc_mean']:.3f} ± {model_results['cv_auc_std']:.3f}</para>
448
- <para>The model uses advanced feature engineering including behavioral patterns
449
- and customer lifecycle metrics for accurate churn prediction.</para>
 
 
450
  """
451
  story.append(Paragraph(model_text, styles['Normal']))
452
- story.append(Spacer(1, 12))
453
-
454
- # Top features
455
- if not model_results['feature_importance'].empty:
456
- story.append(Paragraph("Key Predictive Features", styles['Heading3']))
457
- feature_table_data = [['Feature', 'Importance']]
458
- for _, row in model_results['feature_importance'].head(5).iterrows():
459
- feature_table_data.append([row['feature'].replace('_', ' ').title(), f"{row['importance']:.3f}"])
460
-
461
- feature_table = Table(feature_table_data)
462
- feature_table.setStyle(TableStyle([
463
- ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
464
- ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
465
- ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
466
- ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
467
- ('GRID', (0, 0), (-1, -1), 1, colors.black)
468
- ]))
469
- story.append(feature_table)
470
-
471
- # Build PDF
472
  doc.build(story)
473
  pdf_bytes = buffer.getvalue()
474
  buffer.close()
475
-
476
  return pdf_bytes
477
 
478
- class B2BAnalyticsApp:
479
- """Main application orchestrator"""
480
 
481
  def __init__(self):
482
  self.raw_data = None
483
- self.customer_features = None
484
- self.segmented_data = None
485
- self.model_results = None
486
- self.predictor = ChurnPredictor()
487
 
488
- def load_data(self, file):
489
- """Load and process uploaded file"""
490
- try:
491
- if file is None:
492
- return "Please upload a CSV file", None, None
 
 
493
 
494
- # Load and process data
495
- self.raw_data = DataProcessor.load_and_validate(file.name)
496
- self.customer_features = FeatureEngineering.calculate_rfm_features(self.raw_data)
497
- self.segmented_data = CustomerSegmenter.perform_segmentation(self.customer_features)
498
 
499
  # Generate dashboard
500
- dashboard = ReportGenerator.create_dashboard(self.segmented_data)
501
- preview = self.segmented_data.head(20)
502
-
503
- status = f"Successfully processed {len(self.segmented_data)} customers from {len(self.raw_data)} transactions"
504
- return status, dashboard, preview
505
 
506
- except Exception as e:
507
- return f"Error: {str(e)}", None, None
 
508
 
509
- def train_churn_model(self):
510
  """Train churn prediction model"""
511
- try:
512
- if self.segmented_data is None:
513
- return "Please load data first", None
514
-
515
- self.model_results = self.predictor.train(self.segmented_data)
516
-
517
- # Update dashboard with model results
518
- dashboard = ReportGenerator.create_dashboard(self.segmented_data, self.model_results)
 
519
 
520
- # Create feature importance chart
521
- importance_chart = Visualizer.create_feature_importance_chart(
522
- self.model_results['feature_importance']
523
  )
524
-
525
- return dashboard, importance_chart
526
-
527
- except Exception as e:
528
- return f"Error: {str(e)}", None
529
 
530
- def create_visualizations(self):
531
- """Generate all visualization charts"""
532
- if self.segmented_data is None:
533
- return None, None, None
534
 
535
- try:
536
- # Use predictions if available, otherwise use segmented data
537
- data_for_viz = (self.model_results['predictions'] if self.model_results
538
- else self.segmented_data)
539
-
540
- segment_chart = Visualizer.create_segment_chart(data_for_viz)
541
- rfm_chart = Visualizer.create_rfm_scatter(data_for_viz)
542
- churn_chart = Visualizer.create_churn_distribution(data_for_viz)
543
-
544
- return segment_chart, rfm_chart, churn_chart
545
-
546
- except Exception as e:
547
- print(f"Visualization error: {e}")
548
- return None, None, None
549
 
550
- def get_customer_summary_table(self):
551
- """Generate customer summary table"""
552
- if self.segmented_data is None:
553
  return None
554
 
555
- try:
556
- display_data = self.segmented_data.copy()
557
-
558
- # Add predictions if available
559
- if self.model_results:
560
- pred_data = self.model_results['predictions']
561
- display_data = display_data.merge(
562
- pred_data[['customer_id', 'churn_probability']],
563
- on='customer_id', how='left'
564
- )
565
- display_data['churn_probability'] = (display_data['churn_probability'] * 100).round(1)
566
- else:
567
- display_data['churn_probability'] = 50.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
 
569
- # Select and format columns
570
- summary_table = display_data[[
571
- 'customer_id', 'segment', 'churn_risk', 'recency_days',
572
- 'frequency', 'monetary', 'avg_amount', 'churn_probability'
573
- ]].round(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
 
575
- summary_table.columns = [
576
- 'Customer ID', 'Segment', 'Risk Level', 'Recency (Days)',
577
- 'Orders', 'Total Revenue ($)', 'Avg Order ($)', 'Churn Risk (%)'
578
- ]
 
 
 
 
 
 
 
 
 
 
579
 
580
- return summary_table.head(100)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
 
582
- except Exception as e:
583
- print(f"Table generation error: {e}")
584
- return None
 
 
 
585
 
586
- def generate_pdf_report(self):
587
- """Generate and return PDF report"""
588
- try:
589
- if self.segmented_data is None:
590
- return None
591
-
592
- pdf_bytes = ReportGenerator.generate_pdf_report(
593
- self.segmented_data, self.model_results
594
- )
595
-
596
- # Save to temporary file for download
597
- import tempfile
598
- with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
599
- tmp_file.write(pdf_bytes)
600
- return tmp_file.name
601
-
602
- except Exception as e:
603
- print(f"PDF generation error: {e}")
604
- return None
 
 
 
 
 
605
 
606
- def create_interface():
607
- """Create Gradio interface"""
608
 
609
- app = B2BAnalyticsApp()
 
 
 
 
 
 
 
 
 
 
610
 
611
- with gr.Blocks(theme=gr.themes.Soft(), title="B2B Customer Analytics") as demo:
 
 
 
612
 
613
  gr.HTML("""
614
- <div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
615
- padding: 2rem; border-radius: 1rem; color: white; text-align: center; margin-bottom: 2rem;">
616
- <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">
617
- B2B Customer Analytics Platform
618
- </h1>
619
- <p style="font-size: 1.1rem; opacity: 0.9;">
620
- Advanced Customer Segmentation & Churn Prediction
621
- </p>
622
  </div>
623
  """)
624
 
625
  with gr.Tabs():
626
- # Data Upload Tab
627
- with gr.Tab("Data Upload & Dashboard"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628
  with gr.Row():
629
- file_input = gr.File(label="Upload Customer Data CSV", file_types=[".csv"])
630
- load_btn = gr.Button("Load & Process Data", variant="primary", size="lg")
 
 
 
 
631
 
632
- load_status = gr.Textbox(label="Status", interactive=False)
633
- dashboard_display = gr.HTML()
634
- data_preview = gr.DataFrame(label="Data Preview")
 
 
 
 
 
 
 
 
635
 
636
- # Segmentation Tab
637
- with gr.Tab("Customer Segmentation"):
 
 
 
 
 
 
638
  with gr.Row():
639
- segment_chart = gr.Plot(label="Customer Segments")
640
- rfm_chart = gr.Plot(label="RFM Analysis")
 
 
641
 
642
- customer_table = gr.DataFrame(label="Customer Summary")
 
 
 
 
 
 
 
 
643
 
644
- # Churn Prediction Tab
645
- with gr.Tab("Churn Prediction"):
646
- train_btn = gr.Button("Train Churn Model", variant="primary", size="lg")
647
- model_dashboard = gr.HTML()
648
 
 
 
 
 
 
 
 
 
 
 
649
  with gr.Row():
650
- importance_chart = gr.Plot(label="Feature Importance")
651
- churn_dist_chart = gr.Plot(label="Churn Risk Distribution")
 
 
 
 
 
 
 
 
 
 
652
 
653
- # Reports Tab
654
- with gr.Tab("Reports"):
655
- report_btn = gr.Button("Generate PDF Report", variant="primary", size="lg")
656
- report_status = gr.Textbox(label="Status", interactive=False)
657
- report_file = gr.File(label="Download Report")
658
-
659
- # Event handlers
660
- def load_and_visualize(file):
661
- status, dashboard, preview = app.load_data(file)
662
- if "Successfully" in status:
663
- charts = app.create_visualizations()
664
- table = app.get_customer_summary_table()
665
- return status, dashboard, preview, charts[0], charts[1], table
666
- return status, dashboard, preview, None, None, None
667
-
668
- def train_and_update():
669
- dashboard, importance = app.train_churn_model()
670
- if "Error" not in dashboard:
671
- charts = app.create_visualizations()
672
- return dashboard, importance, charts[2]
673
- return dashboard, importance, None
674
-
675
- def generate_report():
676
- report_path = app.generate_pdf_report()
677
- if report_path:
678
- return "PDF report generated successfully", report_path
679
- return "Error generating PDF report", None
680
-
681
- # Connect events
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
  load_btn.click(
683
- fn=load_and_visualize,
684
- inputs=[file_input],
685
- outputs=[load_status, dashboard_display, data_preview,
686
- segment_chart, rfm_chart, customer_table]
687
  )
688
 
689
  train_btn.click(
690
- fn=train_and_update,
691
- outputs=[model_dashboard, importance_chart, churn_dist_chart]
692
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import seaborn as sns
7
  from sklearn.model_selection import train_test_split, cross_val_score
8
  from sklearn.ensemble import RandomForestClassifier
9
+ from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, precision_recall_curve
10
  import plotly.express as px
11
  import plotly.graph_objects as go
12
+ from plotly.subplots import make_subplots
13
+ import plotly.io as pio
14
  from datetime import datetime, timedelta
15
  import io
16
  import base64
17
  import warnings
18
+ from typing import Optional, Tuple, Dict, Any
19
  warnings.filterwarnings('ignore')
20
 
21
+ # Try importing optional dependencies
22
  try:
23
  import xgboost as xgb
24
  XGBOOST_AVAILABLE = True
 
31
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
32
  from reportlab.lib.units import inch
33
  from reportlab.lib import colors
 
 
 
 
34
  REPORTLAB_AVAILABLE = True
35
  except ImportError:
36
  REPORTLAB_AVAILABLE = False
37
 
38
+ # Business configuration
39
+ BUSINESS_CONFIG = {
40
  'churn_threshold_days': 90,
41
  'high_risk_probability': 0.7,
42
  'rfm_quantiles': 5,
43
+ 'min_customers_for_model': 10
44
  }
45
 
46
+ # UI color scheme
47
  COLORS = {
48
  'primary': '#6366f1',
49
+ 'success': '#10b981',
50
  'warning': '#f59e0b',
51
  'danger': '#ef4444',
52
+ 'purple': '#8b5cf6',
53
+ 'pink': '#ec4899',
54
+ 'blue': '#3b82f6',
55
+ 'indigo': '#6366f1'
56
  }
57
 
58
  class DataProcessor:
59
+ """Handles data loading, validation, and preprocessing"""
60
 
61
  @staticmethod
62
+ def load_and_validate(file) -> Tuple[Optional[pd.DataFrame], str]:
63
  """Load and validate CSV file"""
64
+ if file is None:
65
+ return None, "Please upload a CSV file"
66
 
67
+ try:
68
+ df = pd.read_csv(file.name)
69
+
70
+ # Flexible column mapping
71
+ column_mapping = DataProcessor._map_columns(df.columns)
72
+ if not column_mapping:
73
+ return None, f"Required columns not found. Available: {list(df.columns)}"
74
+
75
+ df = df.rename(columns=column_mapping)
76
+
77
+ # Clean and validate data
78
+ initial_rows = len(df)
79
+ df = DataProcessor._clean_data(df)
80
+ final_rows = len(df)
81
+
82
+ if final_rows == 0:
83
+ return None, "No valid data after cleaning"
84
+
85
+ status = f"Data loaded successfully! {final_rows} records from {df['customer_id'].nunique()} customers"
86
+ if initial_rows != final_rows:
87
+ status += f" ({initial_rows - final_rows} invalid rows removed)"
88
+
89
+ return df, status
90
+
91
+ except Exception as e:
92
+ return None, f"Error loading data: {str(e)}"
93
 
94
  @staticmethod
95
+ def _map_columns(columns) -> Dict[str, str]:
96
+ """Map CSV columns to standard names"""
97
+ required = ['customer_id', 'order_date', 'amount']
98
  mapping = {}
 
99
 
100
+ column_variations = {
101
+ 'customer_id': ['customer', 'cust_id', 'id', 'customerid', 'client_id', 'customer_id'],
102
+ 'order_date': ['date', 'order_date', 'orderdate', 'purchase_date', 'transaction_date'],
103
+ 'amount': ['revenue', 'value', 'price', 'total', 'sales', 'order_value', 'amount']
104
  }
105
 
106
+ for req_col in required:
107
+ found = False
108
+ for col in columns:
109
+ col_lower = col.lower().strip()
110
+ if col_lower == req_col or any(var in col_lower for var in column_variations[req_col]):
111
+ mapping[col] = req_col
112
+ found = True
113
  break
114
+ if not found:
115
+ return {}
116
 
117
  return mapping
118
 
119
  @staticmethod
120
+ def _clean_data(df: pd.DataFrame) -> pd.DataFrame:
121
+ """Clean and prepare data"""
122
+ df = df.copy()
 
 
 
 
 
 
 
123
  df['customer_id'] = df['customer_id'].astype(str)
124
  df['order_date'] = pd.to_datetime(df['order_date'], errors='coerce')
125
  df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
126
 
127
  # Remove invalid rows
128
+ df = df.dropna(subset=['customer_id', 'order_date', 'amount'])
129
+ df = df[df['amount'] > 0] # Remove negative amounts
130
 
131
  return df
132
 
133
+ class RFMAnalyzer:
134
+ """Handles RFM analysis and customer metrics calculation"""
135
 
136
  @staticmethod
137
+ def calculate_rfm_metrics(df: pd.DataFrame) -> pd.DataFrame:
138
+ """Calculate RFM metrics for customers"""
139
  current_date = df['order_date'].max() + timedelta(days=1)
140
 
141
+ customer_metrics = df.groupby('customer_id').agg({
142
+ 'order_date': ['max', 'count', 'min'],
 
143
  'amount': ['sum', 'mean', 'std', 'min', 'max']
144
  })
145
 
146
+ # Flatten column names
147
+ customer_metrics.columns = [
148
+ 'last_order_date', 'frequency', 'first_order_date',
149
+ 'monetary', 'avg_order_value', 'std_amount', 'min_amount', 'max_amount'
150
  ]
151
 
152
+ # Calculate additional features
153
+ customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
154
+ customer_metrics['customer_lifetime_days'] = (
155
+ customer_metrics['last_order_date'] - customer_metrics['first_order_date']
156
+ ).dt.days
157
+ customer_metrics['std_amount'] = customer_metrics['std_amount'].fillna(0)
158
+ customer_metrics['customer_lifetime_days'] = customer_metrics['customer_lifetime_days'].fillna(0)
 
 
159
 
160
+ return customer_metrics.reset_index()
161
 
162
  class CustomerSegmenter:
163
+ """Handles customer segmentation based on RFM analysis"""
164
 
165
  @staticmethod
166
+ def perform_segmentation(customer_metrics: pd.DataFrame) -> pd.DataFrame:
167
+ """Segment customers using RFM scores"""
168
+ df = customer_metrics.copy()
169
 
170
  # Calculate RFM scores
171
+ if len(df) >= BUSINESS_CONFIG['rfm_quantiles']:
172
+ try:
173
+ df['R_Score'] = pd.qcut(df['recency_days'], BUSINESS_CONFIG['rfm_quantiles'],
174
+ labels=[5,4,3,2,1], duplicates='drop')
175
+ df['F_Score'] = pd.qcut(df['frequency'], BUSINESS_CONFIG['rfm_quantiles'],
176
+ labels=[1,2,3,4,5], duplicates='drop')
177
+ df['M_Score'] = pd.qcut(df['monetary'], BUSINESS_CONFIG['rfm_quantiles'],
178
+ labels=[1,2,3,4,5], duplicates='drop')
179
+ except ValueError:
180
+ # Fallback for small datasets
181
+ df['R_Score'] = pd.cut(df['recency_days'], bins=BUSINESS_CONFIG['rfm_quantiles'],
182
+ labels=[5,4,3,2,1], include_lowest=True)
183
+ df['F_Score'] = pd.cut(df['frequency'], bins=BUSINESS_CONFIG['rfm_quantiles'],
184
+ labels=[1,2,3,4,5], include_lowest=True)
185
+ df['M_Score'] = pd.cut(df['monetary'], bins=BUSINESS_CONFIG['rfm_quantiles'],
186
+ labels=[1,2,3,4,5], include_lowest=True)
187
  else:
188
+ df['R_Score'] = 3
189
+ df['F_Score'] = 3
190
+ df['M_Score'] = 3
 
191
 
192
+ # Convert to numeric and handle NaN
193
+ for col in ['R_Score', 'F_Score', 'M_Score']:
194
  df[col] = pd.to_numeric(df[col], errors='coerce').fillna(3).astype(int)
195
 
196
+ # Apply segmentation logic
197
+ df['Segment'] = df.apply(CustomerSegmenter._assign_segment, axis=1)
198
+ df['Churn_Risk'] = df.apply(CustomerSegmenter._assign_risk_level, axis=1)
199
 
200
  return df
201
 
202
  @staticmethod
203
+ def _assign_segment(row) -> str:
204
  """Assign customer segment based on RFM scores"""
205
+ r, f, m = row['R_Score'], row['F_Score'], row['M_Score']
206
 
207
  if r >= 4 and f >= 4 and m >= 4:
208
  return 'Champions'
 
215
  elif r <= 2 and f >= 3:
216
  return 'At Risk'
217
  elif r <= 2 and f <= 2 and m >= 3:
218
+ return 'Cannot Lose Them'
219
  elif r <= 2 and f <= 2 and m <= 2:
220
+ return 'Lost Customers'
221
  else:
222
  return 'Others'
223
 
224
  @staticmethod
225
+ def _assign_risk_level(row) -> str:
226
+ """Assign churn risk level"""
227
+ segment = CustomerSegmenter._assign_segment(row)
228
+ if segment in ['Lost Customers', 'At Risk']:
229
+ return 'High'
230
+ elif segment in ['Others', 'Cannot Lose Them']:
231
+ return 'Medium'
232
+ else:
233
+ return 'Low'
 
 
 
234
 
235
  class ChurnPredictor:
236
+ """Handles churn prediction model training and inference"""
237
 
238
  def __init__(self):
239
  self.model = None
240
  self.feature_importance = None
241
+ self.model_metrics = {}
242
+
243
+ def train_model(self, customer_metrics: pd.DataFrame) -> Tuple[bool, str, Dict]:
244
  """Train churn prediction model"""
245
+ if len(customer_metrics) < BUSINESS_CONFIG['min_customers_for_model']:
246
+ return False, f"Insufficient data for training (minimum {BUSINESS_CONFIG['min_customers_for_model']} customers required)", {}
247
 
248
+ # Prepare features
249
+ feature_cols = [
250
+ 'recency_days', 'frequency', 'monetary', 'avg_order_value',
251
+ 'std_amount', 'min_amount', 'max_amount', 'customer_lifetime_days'
252
+ ]
253
 
254
+ X = customer_metrics[feature_cols]
255
+ y = (customer_metrics['recency_days'] > BUSINESS_CONFIG['churn_threshold_days']).astype(int)
 
256
 
257
+ # Check for sufficient class diversity
258
+ if y.nunique() < 2:
259
+ return False, "Cannot train model: all customers have the same churn status", {}
260
 
261
+ # Train-test split
262
+ X_train, X_test, y_train, y_test = train_test_split(
263
+ X, y, test_size=0.2, random_state=42, stratify=y
264
+ )
 
265
 
266
+ # Select and train model
267
+ if XGBOOST_AVAILABLE:
268
+ try:
269
+ self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
270
+ model_name = "XGBoost Classifier"
271
+ except:
272
+ self.model = RandomForestClassifier(random_state=42, n_estimators=100)
273
+ model_name = "Random Forest Classifier"
274
+ else:
275
+ self.model = RandomForestClassifier(random_state=42, n_estimators=100)
276
+ model_name = "Random Forest Classifier"
277
+
278
+ self.model.fit(X_train, y_train)
279
+
280
+ # Evaluate model
281
+ y_pred = self.model.predict(X_test)
282
+ y_pred_proba = self.model.predict_proba(X_test)[:, 1]
283
 
284
+ accuracy = accuracy_score(y_test, y_pred)
285
+ auc_score = roc_auc_score(y_test, y_pred_proba)
286
+
287
+ # Cross-validation
288
+ cv_scores = cross_val_score(self.model, X, y, cv=5, scoring='roc_auc')
289
 
290
  # Feature importance
291
  self.feature_importance = pd.DataFrame({
 
293
  'importance': self.model.feature_importances_
294
  }).sort_values('importance', ascending=False)
295
 
296
+ self.model_metrics = {
297
+ 'accuracy': accuracy,
298
+ 'auc_score': auc_score,
299
+ 'cv_mean': cv_scores.mean(),
300
+ 'cv_std': cv_scores.std(),
301
+ 'model_name': model_name,
302
+ 'n_features': len(feature_cols),
303
+ 'n_samples': len(X_train)
 
 
 
 
304
  }
305
+
306
+ return True, "Model trained successfully", self.model_metrics
307
 
308
+ def predict(self, customer_metrics: pd.DataFrame) -> pd.DataFrame:
309
+ """Make churn predictions"""
310
+ if self.model is None:
311
+ return customer_metrics
312
+
313
+ feature_cols = [
314
+ 'recency_days', 'frequency', 'monetary', 'avg_order_value',
315
+ 'std_amount', 'min_amount', 'max_amount', 'customer_lifetime_days'
316
+ ]
317
+
318
+ X = customer_metrics[feature_cols]
319
+ predictions = self.model.predict_proba(X)[:, 1]
320
+
321
+ result = customer_metrics.copy()
322
+ result['churn_probability'] = predictions
323
+ result['predicted_churn'] = (predictions > BUSINESS_CONFIG['high_risk_probability']).astype(int)
324
+
325
+ return result
326
 
327
+ class VisualizationEngine:
328
+ """Handles all chart creation and visualization"""
329
 
330
  @staticmethod
331
+ def create_segment_chart(customer_data: pd.DataFrame):
332
+ """Create customer segment distribution chart"""
333
+ segment_counts = customer_data['Segment'].value_counts().reset_index()
334
+ segment_counts.columns = ['Segment', 'Count']
335
 
336
  fig = px.pie(
337
+ segment_counts,
338
+ values='Count',
339
+ names='Segment',
340
  title='Customer Segment Distribution',
341
  hole=0.4,
342
+ color_discrete_sequence=list(COLORS.values())
343
  )
344
+ fig.update_traces(textposition='inside', textinfo='percent+label')
345
+ fig.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
346
  return fig
347
 
348
  @staticmethod
349
+ def create_rfm_scatter(customer_data: pd.DataFrame):
350
+ """Create RFM analysis scatter plot"""
351
  fig = px.scatter(
352
+ customer_data,
353
+ x='recency_days',
354
+ y='frequency',
355
+ size='monetary',
356
+ color='Segment',
357
+ title='RFM Customer Behavior Matrix',
358
+ labels={
359
+ 'recency_days': 'Days Since Last Purchase',
360
+ 'frequency': 'Purchase Frequency',
361
+ 'monetary': 'Total Revenue'
362
+ },
363
+ color_discrete_sequence=list(COLORS.values())
364
  )
365
+ fig.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
366
  return fig
367
 
368
  @staticmethod
369
+ def create_churn_chart(customer_data: pd.DataFrame, has_predictions: bool = False):
370
+ """Create churn risk visualization"""
371
+ if has_predictions and 'churn_probability' in customer_data.columns:
372
  fig = px.histogram(
373
+ customer_data,
374
+ x='churn_probability',
375
+ nbins=20,
376
  title='Churn Probability Distribution',
377
+ labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
378
+ color_discrete_sequence=[COLORS['primary']]
379
  )
380
+ fig.add_vline(x=BUSINESS_CONFIG['high_risk_probability'], line_dash="dash",
381
+ line_color=COLORS['danger'], annotation_text="High Risk Threshold")
382
  else:
383
+ risk_counts = customer_data['Churn_Risk'].value_counts().reset_index()
384
+ risk_counts.columns = ['Risk_Level', 'Count']
385
+
386
+ colors_map = {'High': COLORS['danger'], 'Medium': COLORS['warning'], 'Low': COLORS['success']}
387
  fig = px.bar(
388
+ risk_counts,
389
+ x='Risk_Level',
390
+ y='Count',
391
+ title='Customer Churn Risk Distribution',
392
+ color='Risk_Level',
393
+ color_discrete_map=colors_map
394
  )
395
+ fig.update_layout(showlegend=False)
396
 
397
+ fig.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
398
  return fig
399
 
400
  @staticmethod
401
+ def create_revenue_trend(df: pd.DataFrame):
402
+ """Create revenue trend visualization"""
403
+ df_copy = df.copy()
404
+ df_copy['order_month'] = df_copy['order_date'].dt.to_period('M')
405
+ monthly_revenue = df_copy.groupby('order_month')['amount'].sum().reset_index()
406
+ monthly_revenue['order_month'] = monthly_revenue['order_month'].astype(str)
407
+
408
+ fig = px.line(
409
+ monthly_revenue,
410
+ x='order_month',
411
+ y='amount',
412
+ title='Monthly Revenue Trends',
413
+ labels={'amount': 'Revenue ($)', 'order_month': 'Month'}
414
+ )
415
+ fig.update_traces(line_color=COLORS['primary'], line_width=3)
416
+ fig.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
417
+ return fig
418
+
419
+ @staticmethod
420
+ def create_feature_importance_chart(feature_importance: pd.DataFrame):
421
+ """Create feature importance chart"""
422
  fig = px.bar(
423
+ feature_importance.head(8),
424
+ x='importance',
425
+ y='feature',
426
+ orientation='h',
427
+ title='Feature Importance Analysis',
428
+ labels={'importance': 'Importance Score', 'feature': 'Features'},
429
+ color='importance',
430
+ color_continuous_scale='viridis'
431
+ )
432
+ fig.update_layout(
433
+ height=500,
434
+ showlegend=False,
435
+ plot_bgcolor='white',
436
+ paper_bgcolor='white',
437
+ title={'x': 0.5, 'xanchor': 'center'},
438
+ yaxis={'categoryorder': 'total ascending'}
439
  )
 
440
  return fig
441
 
442
  class ReportGenerator:
443
+ """Handles report generation"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
 
445
  @staticmethod
446
+ def generate_pdf_report(customer_data: pd.DataFrame, model_metrics: Dict) -> bytes:
447
+ """Generate PDF report"""
448
  if not REPORTLAB_AVAILABLE:
449
+ raise ImportError("PDF generation requires ReportLab library")
450
 
451
  buffer = io.BytesIO()
452
+ doc = SimpleDocTemplate(buffer, pagesize=A4,
453
+ rightMargin=72, leftMargin=72,
454
+ topMargin=72, bottomMargin=18)
455
 
456
  styles = getSampleStyleSheet()
457
  story = []
458
 
459
  # Title
460
  title_style = ParagraphStyle('CustomTitle', parent=styles['Title'],
461
+ fontSize=24, spaceAfter=30, alignment=1)
462
  story.append(Paragraph("B2B Customer Analytics Report", title_style))
463
  story.append(Spacer(1, 12))
464
 
465
+ # Executive summary
466
  story.append(Paragraph("Executive Summary", styles['Heading2']))
467
 
468
+ total_customers = len(customer_data)
469
+ total_revenue = customer_data['monetary'].sum()
470
+ avg_revenue = customer_data['monetary'].mean()
471
 
472
  summary_text = f"""
473
+ This comprehensive analysis covers {total_customers:,} customers with combined revenue of ${total_revenue:,.2f}.
474
+ The average customer value is ${avg_revenue:,.2f}. Customer segmentation and churn risk assessment
475
+ have been performed using advanced RFM analysis and machine learning techniques.
 
 
476
  """
477
  story.append(Paragraph(summary_text, styles['Normal']))
478
+ story.append(Spacer(1, 20))
479
 
480
+ # Segment distribution
481
+ story.append(Paragraph("Customer Segmentation Overview", styles['Heading2']))
482
+ segment_dist = customer_data['Segment'].value_counts()
483
 
484
+ segment_data = []
485
+ segment_data.append(['Segment', 'Count', 'Percentage'])
486
+ for segment, count in segment_dist.items():
487
+ percentage = (count / total_customers) * 100
488
+ segment_data.append([segment, str(count), f"{percentage:.1f}%"])
489
 
490
+ segment_table = Table(segment_data)
491
  segment_table.setStyle(TableStyle([
492
  ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
493
  ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
 
499
  ('GRID', (0, 0), (-1, -1), 1, colors.black)
500
  ]))
501
  story.append(segment_table)
502
+ story.append(Spacer(1, 20))
503
 
504
+ # Model performance (if available)
505
+ if model_metrics:
506
+ story.append(Paragraph("Churn Prediction Model Performance", styles['Heading2']))
507
  model_text = f"""
508
+ Model Type: {model_metrics['model_name']}<br/>
509
+ Accuracy: {model_metrics['accuracy']:.1%}<br/>
510
+ AUC Score: {model_metrics['auc_score']:.3f}<br/>
511
+ Cross-validation Score: {model_metrics['cv_mean']:.3f} ± {model_metrics['cv_std']:.3f}<br/>
512
+ Features Used: {model_metrics['n_features']}<br/>
513
+ Training Samples: {model_metrics['n_samples']}
514
  """
515
  story.append(Paragraph(model_text, styles['Normal']))
516
+
517
+ # Build and return PDF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
  doc.build(story)
519
  pdf_bytes = buffer.getvalue()
520
  buffer.close()
 
521
  return pdf_bytes
522
 
523
+ class B2BCustomerAnalytics:
524
+ """Main analytics orchestrator"""
525
 
526
  def __init__(self):
527
  self.raw_data = None
528
+ self.customer_metrics = None
529
+ self.churn_predictor = ChurnPredictor()
530
+ self.has_trained_model = False
 
531
 
532
+ def load_data(self, file) -> Tuple[str, str, Optional[pd.DataFrame]]:
533
+ """Load and process data"""
534
+ self.raw_data, status = DataProcessor.load_and_validate(file)
535
+
536
+ if self.raw_data is not None:
537
+ # Calculate RFM metrics
538
+ self.customer_metrics = RFMAnalyzer.calculate_rfm_metrics(self.raw_data)
539
 
540
+ # Perform segmentation
541
+ self.customer_metrics = CustomerSegmenter.perform_segmentation(self.customer_metrics)
 
 
542
 
543
  # Generate dashboard
544
+ dashboard_html = self._generate_dashboard()
545
+ preview_data = self._prepare_preview_data()
 
 
 
546
 
547
+ return status, dashboard_html, preview_data
548
+
549
+ return status, "", None
550
 
551
+ def train_churn_model(self) -> Tuple[str, Optional[Any]]:
552
  """Train churn prediction model"""
553
+ if self.customer_metrics is None:
554
+ return "No data available. Please upload data first.", None
555
+
556
+ success, message, metrics = self.churn_predictor.train_model(self.customer_metrics)
557
+
558
+ if success:
559
+ self.has_trained_model = True
560
+ # Update predictions
561
+ self.customer_metrics = self.churn_predictor.predict(self.customer_metrics)
562
 
563
+ results_html = self._format_model_results(metrics)
564
+ chart = VisualizationEngine.create_feature_importance_chart(
565
+ self.churn_predictor.feature_importance
566
  )
567
+ return results_html, chart
568
+
569
+ return f"Model training failed: {message}", None
 
 
570
 
571
+ def get_visualizations(self) -> Tuple[Any, Any, Any, Any]:
572
+ """Get all visualizations"""
573
+ if self.customer_metrics is None:
574
+ return None, None, None, None
575
 
576
+ segment_chart = VisualizationEngine.create_segment_chart(self.customer_metrics)
577
+ rfm_chart = VisualizationEngine.create_rfm_scatter(self.customer_metrics)
578
+ churn_chart = VisualizationEngine.create_churn_chart(
579
+ self.customer_metrics, self.has_trained_model
580
+ )
581
+ revenue_chart = VisualizationEngine.create_revenue_trend(self.raw_data)
582
+
583
+ return segment_chart, rfm_chart, churn_chart, revenue_chart
 
 
 
 
 
 
584
 
585
+ def get_customer_table(self) -> Optional[pd.DataFrame]:
586
+ """Get formatted customer table"""
587
+ if self.customer_metrics is None:
588
  return None
589
 
590
+ columns = ['customer_id', 'Segment', 'Churn_Risk', 'recency_days',
591
+ 'frequency', 'monetary', 'avg_order_value']
592
+
593
+ if 'churn_probability' in self.customer_metrics.columns:
594
+ columns.append('churn_probability')
595
+ self.customer_metrics['churn_probability'] = (
596
+ self.customer_metrics['churn_probability'] * 100
597
+ ).round(1)
598
+
599
+ table_data = self.customer_metrics[columns].copy()
600
+ table_data['monetary'] = table_data['monetary'].round(2)
601
+ table_data['avg_order_value'] = table_data['avg_order_value'].round(2)
602
+
603
+ # Rename columns for display
604
+ display_names = {
605
+ 'customer_id': 'Customer ID',
606
+ 'Segment': 'Segment',
607
+ 'Churn_Risk': 'Risk Level',
608
+ 'recency_days': 'Recency (Days)',
609
+ 'frequency': 'Frequency',
610
+ 'monetary': 'Total Spent ($)',
611
+ 'avg_order_value': 'Avg Order ($)',
612
+ 'churn_probability': 'Churn Probability (%)'
613
+ }
614
+
615
+ table_data = table_data.rename(columns=display_names)
616
+ return table_data.head(50)
617
+
618
+ def get_customer_insights(self, customer_id: str) -> str:
619
+ """Get detailed customer insights"""
620
+ if self.customer_metrics is None or not customer_id:
621
+ return "Please enter a valid customer ID"
622
+
623
+ customer_data = self.customer_metrics[
624
+ self.customer_metrics['customer_id'] == customer_id
625
+ ]
626
+
627
+ if customer_data.empty:
628
+ return f"Customer {customer_id} not found"
629
+
630
+ customer = customer_data.iloc[0]
631
+ return self._format_customer_profile(customer)
632
+
633
+ def generate_report(self) -> bytes:
634
+ """Generate PDF report"""
635
+ if self.customer_metrics is None:
636
+ raise ValueError("No data available for report generation")
637
+
638
+ return ReportGenerator.generate_pdf_report(
639
+ self.customer_metrics,
640
+ self.churn_predictor.model_metrics
641
+ )
642
+
643
+ def _generate_dashboard(self) -> str:
644
+ """Generate dashboard HTML"""
645
+ total_customers = len(self.customer_metrics)
646
+ total_revenue = self.customer_metrics['monetary'].sum()
647
+ avg_order_value = self.customer_metrics['avg_order_value'].mean()
648
+ high_risk_customers = (self.customer_metrics['Churn_Risk'] == 'High').sum()
649
+
650
+ segment_dist = self.customer_metrics['Segment'].value_counts()
651
+
652
+ return f"""
653
+ <div style="display: flex; flex-wrap: wrap; gap: 1rem; margin-bottom: 2rem;">
654
+ <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #3b82f6, #1d4ed8); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
655
+ <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Customers</h3>
656
+ <div style="font-size: 2.5rem; font-weight: bold;">{total_customers:,}</div>
657
+ </div>
658
+ <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #10b981, #047857); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
659
+ <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Revenue</h3>
660
+ <div style="font-size: 2.5rem; font-weight: bold;">${total_revenue/1000000:.1f}M</div>
661
+ </div>
662
+ <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #8b5cf6, #6d28d9); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
663
+ <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Avg Order Value</h3>
664
+ <div style="font-size: 2.5rem; font-weight: bold;">${avg_order_value:.0f}</div>
665
+ </div>
666
+ <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #ef4444, #dc2626); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
667
+ <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">High Risk Customers</h3>
668
+ <div style="font-size: 2.5rem; font-weight: bold;">{high_risk_customers}</div>
669
+ </div>
670
+ </div>
671
+ <div style="background: #f8fafc; padding: 1.5rem; border-radius: 12px; border-left: 4px solid #6366f1;">
672
+ <h4 style="margin: 0 0 1rem 0; color: #374151;">Customer Segments Overview</h4>
673
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem;">
674
+ {' '.join([f'<div><strong>{segment}:</strong> {count}</div>' for segment, count in segment_dist.items()])}
675
+ </div>
676
+ </div>
677
+ """
678
+
679
+ def _prepare_preview_data(self) -> pd.DataFrame:
680
+ """Prepare data preview"""
681
+ if self.raw_data is None:
682
+ return pd.DataFrame()
683
+
684
+ preview = self.raw_data.merge(
685
+ self.customer_metrics[['customer_id', 'Segment', 'Churn_Risk']],
686
+ on='customer_id',
687
+ how='left'
688
+ )
689
+ return preview.head(20)
690
+
691
+ def _format_model_results(self, metrics: Dict) -> str:
692
+ """Format model training results"""
693
+ return f"""
694
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 2rem;">
695
+ <div style="text-align: center; margin-bottom: 2rem;">
696
+ <h3 style="color: #1f2937; font-size: 1.5rem; font-weight: bold; margin-bottom: 0.5rem;">
697
+ Model Training Completed Successfully
698
+ </h3>
699
+ <p style="color: #6b7280;">{metrics['model_name']} with Advanced Feature Engineering</p>
700
+ </div>
701
 
702
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
703
+ <div style="background: linear-gradient(135deg, #6366f1, #4f46e5); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
704
+ <div style="font-size: 2rem; font-weight: bold;">{metrics['accuracy']:.1%}</div>
705
+ <div style="font-size: 0.9rem;">Accuracy</div>
706
+ </div>
707
+ <div style="background: linear-gradient(135deg, #10b981, #059669); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
708
+ <div style="font-size: 2rem; font-weight: bold;">{metrics['auc_score']:.3f}</div>
709
+ <div style="font-size: 0.9rem;">AUC Score</div>
710
+ </div>
711
+ <div style="background: linear-gradient(135deg, #f59e0b, #d97706); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
712
+ <div style="font-size: 2rem; font-weight: bold;">{metrics['n_features']}</div>
713
+ <div style="font-size: 0.9rem;">Features Used</div>
714
+ </div>
715
+ <div style="background: linear-gradient(135deg, #8b5cf6, #7c3aed); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
716
+ <div style="font-size: 2rem; font-weight: bold;">{metrics['cv_mean']:.3f}</div>
717
+ <div style="font-size: 0.9rem;">CV Score</div>
718
+ </div>
719
+ </div>
720
+ </div>
721
+ """
722
+
723
+ def _format_customer_profile(self, customer) -> str:
724
+ """Format individual customer profile"""
725
+ churn_prob = customer.get('churn_probability', 0.5)
726
+ recommendations = self._get_customer_recommendations(
727
+ customer['Segment'], customer['Churn_Risk'], churn_prob, customer['recency_days']
728
+ )
729
+
730
+ return f"""
731
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 1rem;">
732
+ <h3 style="text-align: center; color: #1f2937; margin-bottom: 1.5rem;">Customer Profile: {customer['customer_id']}</h3>
733
 
734
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
735
+ <div style="background: linear-gradient(135deg, #6366f1, #4f46e5); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
736
+ <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Segment</h4>
737
+ <div style="font-size: 1.2rem; font-weight: bold;">{customer['Segment']}</div>
738
+ </div>
739
+ <div style="background: linear-gradient(135deg, #ef4444, #dc2626); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
740
+ <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Churn Risk</h4>
741
+ <div style="font-size: 1.2rem; font-weight: bold;">{customer['Churn_Risk']}</div>
742
+ </div>
743
+ <div style="background: linear-gradient(135deg, #8b5cf6, #6d28d9); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
744
+ <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Churn Probability</h4>
745
+ <div style="font-size: 1.2rem; font-weight: bold;">{churn_prob:.1%}</div>
746
+ </div>
747
+ </div>
748
 
749
+ <div style="background: #f8fafc; padding: 1.5rem; border-radius: 8px; margin-bottom: 1rem;">
750
+ <h4 style="color: #374151; margin-bottom: 1rem;">Transaction Analytics</h4>
751
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem;">
752
+ <div>
753
+ <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Purchase Frequency</div>
754
+ <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">{customer['frequency']}</div>
755
+ </div>
756
+ <div>
757
+ <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Total Spent</div>
758
+ <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">${customer['monetary']:,.0f}</div>
759
+ </div>
760
+ <div>
761
+ <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Avg Order Value</div>
762
+ <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">${customer['avg_order_value']:.0f}</div>
763
+ </div>
764
+ <div>
765
+ <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Days Since Last Order</div>
766
+ <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">{customer['recency_days']}</div>
767
+ </div>
768
+ </div>
769
+ </div>
770
 
771
+ <div style="background: linear-gradient(135deg, #f0f9ff, #e0f2fe); border-left: 4px solid #3b82f6; padding: 1rem; border-radius: 4px;">
772
+ <h4 style="color: #1e40af; margin-bottom: 0.5rem;">Recommendations</h4>
773
+ <p style="color: #1f2937; margin: 0;">{recommendations}</p>
774
+ </div>
775
+ </div>
776
+ """
777
 
778
+ def _get_customer_recommendations(self, segment: str, risk_level: str,
779
+ churn_prob: float, recency: int) -> str:
780
+ """Generate personalized recommendations"""
781
+ recommendations = []
782
+
783
+ if risk_level == 'High' or churn_prob > BUSINESS_CONFIG['high_risk_probability']:
784
+ recommendations.append("URGENT: Personal outreach required within 24 hours")
785
+ recommendations.append("Offer retention incentive or loyalty program")
786
+ elif risk_level == 'Medium':
787
+ recommendations.append("Send personalized re-engagement campaign")
788
+
789
+ if segment == 'Champions':
790
+ recommendations.append("Invite to VIP program or advisory board")
791
+ elif segment == 'At Risk':
792
+ recommendations.append("Proactive customer success intervention needed")
793
+ elif segment == 'New Customers':
794
+ recommendations.append("Deploy onboarding campaign sequence")
795
+ elif segment == 'Lost Customers':
796
+ recommendations.append("Win-back campaign with deep discount offer")
797
+
798
+ if recency > 60:
799
+ recommendations.append("Re-engagement campaign with special offer recommended")
800
+
801
+ return " • ".join(recommendations) if recommendations else "Continue monitoring customer engagement patterns."
802
 
803
+ def create_gradio_interface():
804
+ """Create the enhanced Gradio interface"""
805
 
806
+ # Custom CSS for modern styling
807
+ custom_css = """
808
+ .gradio-container {
809
+ font-family: 'Inter', system-ui, sans-serif !important;
810
+ max-width: 1200px !important;
811
+ }
812
+ .tab-nav {
813
+ background: #f8fafc !important;
814
+ border-radius: 8px !important;
815
+ }
816
+ """
817
 
818
+ with gr.Blocks(theme=gr.themes.Soft(), title="B2B Customer Analytics", css=custom_css) as demo:
819
+
820
+ # Initialize analytics instance per session
821
+ analytics = gr.State(B2BCustomerAnalytics())
822
 
823
  gr.HTML("""
824
+ <div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); padding: 2rem; border-radius: 1rem; color: white; text-align: center; margin-bottom: 2rem;">
825
+ <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">B2B Customer Analytics Platform</h1>
826
+ <p style="font-size: 1.1rem; opacity: 0.9;">Advanced Customer Segmentation & Churn Prediction</p>
827
+ <div style="font-size: 0.9rem; opacity: 0.8; margin-top: 1rem;">
828
+ Upload your customer data CSV with columns: customer_id, order_date, amount (or similar)
829
+ </div>
 
 
830
  </div>
831
  """)
832
 
833
  with gr.Tabs():
834
+
835
+ with gr.Tab("📊 Data Upload & Dashboard"):
836
+ with gr.Row():
837
+ with gr.Column():
838
+ file_input = gr.File(
839
+ label="Upload Customer Data CSV",
840
+ file_types=[".csv"],
841
+ type="filepath"
842
+ )
843
+ load_btn = gr.Button(
844
+ "Load & Process Data",
845
+ variant="primary",
846
+ size="lg"
847
+ )
848
+ load_status = gr.Textbox(
849
+ label="Status",
850
+ interactive=False,
851
+ max_lines=2
852
+ )
853
+
854
+ summary_display = gr.HTML()
855
+ data_preview = gr.DataFrame(label="Data Preview (First 20 Rows)")
856
+
857
+ with gr.Tab("🎯 Customer Segmentation"):
858
  with gr.Row():
859
+ with gr.Column():
860
+ segment_chart = gr.Plot(label="Customer Segments Distribution")
861
+ with gr.Column():
862
+ rfm_chart = gr.Plot(label="RFM Behavior Analysis")
863
+
864
+ customer_table = gr.DataFrame(label="Customer Segmentation Details")
865
 
866
+ gr.HTML("""
867
+ <div style="background: #f0f9ff; padding: 1rem; border-radius: 8px; border-left: 4px solid #3b82f6; margin-top: 1rem;">
868
+ <h4 style="color: #1e40af; margin: 0 0 0.5rem 0;">Segment Definitions</h4>
869
+ <p style="margin: 0; color: #1f2937; font-size: 0.9rem;">
870
+ <strong>Champions:</strong> High value, frequent customers •
871
+ <strong>Loyal Customers:</strong> Regular, valuable customers •
872
+ <strong>At Risk:</strong> Previously valuable but declining activity •
873
+ <strong>Lost Customers:</strong> Haven't purchased recently
874
+ </p>
875
+ </div>
876
+ """)
877
 
878
+ with gr.Tab("🤖 Churn Prediction"):
879
+ train_btn = gr.Button(
880
+ "Train Churn Prediction Model",
881
+ variant="primary",
882
+ size="lg"
883
+ )
884
+ model_results = gr.HTML()
885
+
886
  with gr.Row():
887
+ with gr.Column():
888
+ feature_importance_chart = gr.Plot(label="Feature Importance Analysis")
889
+ with gr.Column():
890
+ churn_distribution_chart = gr.Plot(label="Churn Risk Distribution")
891
 
892
+ gr.HTML("""
893
+ <div style="background: #fef3c7; padding: 1rem; border-radius: 8px; border-left: 4px solid #f59e0b; margin-top: 1rem;">
894
+ <h4 style="color: #92400e; margin: 0 0 0.5rem 0;">Model Information</h4>
895
+ <p style="margin: 0; color: #1f2937; font-size: 0.9rem;">
896
+ The model uses advanced features including customer lifetime, purchase patterns, and RFM metrics.
897
+ Customers with >90 days since last purchase are considered churned for training purposes.
898
+ </p>
899
+ </div>
900
+ """)
901
 
902
+ with gr.Tab("📈 Revenue Analytics"):
903
+ revenue_chart = gr.Plot(label="Monthly Revenue Trends")
 
 
904
 
905
+ gr.HTML("""
906
+ <div style="background: #ecfdf5; padding: 1rem; border-radius: 8px; border-left: 4px solid #10b981; margin-top: 1rem;">
907
+ <h4 style="color: #065f46; margin: 0 0 0.5rem 0;">Revenue Insights</h4>
908
+ <p style="margin: 0; color: #1f2937; font-size: 0.9rem;">
909
+ Track revenue trends over time to identify seasonal patterns, growth trajectories, and potential business impact of customer segments.
910
+ </p>
911
+ </div>
912
+ """)
913
+
914
+ with gr.Tab("👤 Customer Insights"):
915
  with gr.Row():
916
+ customer_id_input = gr.Textbox(
917
+ label="Customer ID",
918
+ placeholder="Enter customer ID for detailed analysis",
919
+ scale=3
920
+ )
921
+ insights_btn = gr.Button(
922
+ "Get Customer Profile",
923
+ variant="primary",
924
+ scale=1
925
+ )
926
+
927
+ customer_insights = gr.HTML()
928
 
929
+ with gr.Tab("📋 Reports"):
930
+ with gr.Row():
931
+ with gr.Column():
932
+ gr.HTML("""
933
+ <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
934
+ <h3 style="color: #1f2937; margin-bottom: 1rem;">Generate Comprehensive Report</h3>
935
+ <p style="color: #6b7280; margin-bottom: 1.5rem;">
936
+ Create a detailed PDF report including customer segmentation analysis,
937
+ churn predictions, and actionable business insights.
938
+ </p>
939
+ </div>
940
+ """)
941
+ report_btn = gr.Button(
942
+ "Generate PDF Report",
943
+ variant="primary",
944
+ size="lg"
945
+ )
946
+ with gr.Column():
947
+ report_file = gr.File(
948
+ label="Download Report",
949
+ interactive=False
950
+ )
951
+
952
+ # Event handlers with proper error handling
953
+ def safe_load_data(analytics_instance, file):
954
+ try:
955
+ if file is None:
956
+ return analytics_instance, "Please upload a CSV file", "", None, None, None, None, None, None
957
+
958
+ status, dashboard, preview = analytics_instance.load_data(file)
959
+
960
+ if "successfully" in status:
961
+ charts = analytics_instance.get_visualizations()
962
+ table = analytics_instance.get_customer_table()
963
+ return analytics_instance, status, dashboard, preview, *charts, table
964
+ else:
965
+ return analytics_instance, status, "", None, None, None, None, None, None
966
+
967
+ except Exception as e:
968
+ error_msg = f"Error loading data: {str(e)}"
969
+ return analytics_instance, error_msg, "", None, None, None, None, None, None
970
+
971
+ def safe_train_model(analytics_instance):
972
+ try:
973
+ result_html, chart = analytics_instance.train_churn_model()
974
+ # Update churn chart after training
975
+ updated_charts = analytics_instance.get_visualizations()
976
+ return analytics_instance, result_html, chart, updated_charts[2]
977
+ except Exception as e:
978
+ error_msg = f"Error training model: {str(e)}"
979
+ return analytics_instance, error_msg, None, None
980
+
981
+ def safe_get_insights(analytics_instance, customer_id):
982
+ try:
983
+ return analytics_instance.get_customer_insights(customer_id)
984
+ except Exception as e:
985
+ return f"Error getting insights: {str(e)}"
986
+
987
+ def safe_generate_report(analytics_instance):
988
+ try:
989
+ if analytics_instance.customer_metrics is None:
990
+ return None
991
+
992
+ pdf_bytes = analytics_instance.generate_report()
993
+
994
+ # Save to temporary file
995
+ import tempfile
996
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
997
+ tmp.write(pdf_bytes)
998
+ return tmp.name
999
+
1000
+ except Exception as e:
1001
+ gr.Warning(f"Error generating report: {str(e)}")
1002
+ return None
1003
+
1004
+ # Wire up events
1005
  load_btn.click(
1006
+ fn=safe_load_data,
1007
+ inputs=[analytics, file_input],
1008
+ outputs=[analytics, load_status, summary_display, data_preview,
1009
+ segment_chart, rfm_chart, churn_distribution_chart, revenue_chart, customer_table]
1010
  )
1011
 
1012
  train_btn.click(
1013
+ fn=safe_train_model,
1014
+ inputs=[analytics],
1015
+ outputs=[analytics, model_results, feature_importance_chart, churn_distribution_chart]
1016
+ )
1017
+
1018
+ insights_btn.click(
1019
+ fn=safe_get_insights,
1020
+ inputs=[analytics, customer_id_input],
1021
+ outputs=[customer_insights]
1022
+ )
1023
+
1024
+ report_btn.click(
1025
+ fn=safe_generate_report,
1026
+ inputs=[analytics],
1027
+ outputs=[report_file]
1028
+ )
1029
+
1030
+ # Auto-update customer insights on Enter key
1031
+ customer_id_input.submit(
1032
+ fn=safe_get_insights,
1033
+ inputs=[analytics, customer_id_input],
1034
+ outputs=[customer_insights]
1035
+ )
1036
+
1037
+ return demo
1038
+
1039
+ if __name__ == "__main__":
1040
+ demo = create_gradio_interface()
1041
+ demo.launch(
1042
+ server_name="0.0.0.0",
1043
+ server_port=7860,
1044
+ share=True,
1045
+ show_error=True
1046
+ )