Update app.py
Browse files
app.py
CHANGED
|
@@ -34,12 +34,13 @@ COLORS = {
|
|
| 34 |
'indigo': '#6366f1'
|
| 35 |
}
|
| 36 |
|
| 37 |
-
plt.style.use('seaborn-v0_8-whitegrid
|
| 38 |
sns.set_palette("husl")
|
| 39 |
|
| 40 |
class B2BCustomerAnalytics:
|
| 41 |
def __init__(self):
|
| 42 |
self.df = None
|
|
|
|
| 43 |
self.model = None
|
| 44 |
self.feature_importance = None
|
| 45 |
self.predictions = None
|
|
@@ -50,188 +51,244 @@ class B2BCustomerAnalytics:
|
|
| 50 |
if file is None:
|
| 51 |
return "Please upload a CSV file", None, None, None
|
| 52 |
|
|
|
|
| 53 |
self.df = pd.read_csv(file.name)
|
| 54 |
|
|
|
|
| 55 |
required_columns = ['customer_id', 'order_date', 'amount']
|
| 56 |
-
|
| 57 |
-
if missing_cols:
|
| 58 |
-
return f"Missing required columns: {missing_cols}", None, None, None
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
self.df['order_date'] = pd.to_datetime(self.df['order_date'])
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
|
| 65 |
-
|
|
|
|
| 66 |
|
|
|
|
| 67 |
summary_html, kpi_cards = self.generate_summary_dashboard()
|
| 68 |
|
| 69 |
-
return "Data loaded successfully!", summary_html, self.
|
| 70 |
|
| 71 |
except Exception as e:
|
| 72 |
return f"Error loading data: {str(e)}", None, None, None
|
| 73 |
|
| 74 |
def calculate_rfm_metrics(self, df):
|
| 75 |
"""Calculate RFM metrics from transaction data"""
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
def perform_customer_segmentation(self, df):
|
| 92 |
"""Perform customer segmentation based on RFM analysis"""
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
def generate_summary_dashboard(self):
|
| 137 |
"""Generate modern dashboard summary with KPI cards"""
|
| 138 |
-
if self.
|
| 139 |
return "No data loaded", ""
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
|
| 147 |
-
risk_dist = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts()
|
| 148 |
-
|
| 149 |
-
# Create modern horizontal dashboard
|
| 150 |
-
summary_html = f"""
|
| 151 |
-
<div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); padding: 2rem; border-radius: 1rem; color: white; margin-bottom: 2rem; text-align: center;">
|
| 152 |
-
<h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">
|
| 153 |
-
B2B Customer Analytics Platform
|
| 154 |
-
</h1>
|
| 155 |
-
<p style="font-size: 1.2rem; opacity: 0.9;">
|
| 156 |
-
Enterprise Customer Health Monitoring & Churn Prediction System
|
| 157 |
-
</p>
|
| 158 |
-
</div>
|
| 159 |
-
|
| 160 |
-
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1.5rem; margin-bottom: 3rem;">
|
| 161 |
-
<div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #3b82f6;">
|
| 162 |
-
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
|
| 163 |
-
<div style="padding: 0.75rem; background: #dbeafe; border-radius: 0.5rem; color: #1d4ed8;">📊</div>
|
| 164 |
-
<span style="font-size: 2rem; font-weight: bold; color: #3b82f6;">{total_customers:,}</span>
|
| 165 |
-
</div>
|
| 166 |
-
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">Total Customers</h3>
|
| 167 |
-
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Active enterprise clients</p>
|
| 168 |
-
</div>
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
<span style="font-size: 2rem; font-weight: bold; color: #10b981;">${(total_revenue/1000000):.1f}M</span>
|
| 174 |
-
</div>
|
| 175 |
-
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">Total Revenue</h3>
|
| 176 |
-
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Contract value sum</p>
|
| 177 |
-
</div>
|
| 178 |
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
<
|
| 185 |
-
<p style="
|
|
|
|
|
|
|
| 186 |
</div>
|
| 187 |
|
| 188 |
-
<div style="
|
| 189 |
-
<div style="
|
| 190 |
-
<div style="
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
</div>
|
| 193 |
-
|
| 194 |
-
<
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
<
|
| 200 |
-
<
|
| 201 |
</div>
|
| 202 |
-
|
| 203 |
-
<
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
<
|
| 209 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
</div>
|
| 211 |
-
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">Healthy Customers</h3>
|
| 212 |
-
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Low churn risk</p>
|
| 213 |
</div>
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
| 227 |
|
| 228 |
def train_churn_model(self):
|
| 229 |
"""Train churn prediction model"""
|
| 230 |
-
if self.
|
| 231 |
-
return "No data available. Please upload a CSV file first.", None
|
| 232 |
|
| 233 |
try:
|
| 234 |
-
|
|
|
|
| 235 |
'recency_days': 'first',
|
| 236 |
'frequency': 'first',
|
| 237 |
'monetary': 'first',
|
|
@@ -239,40 +296,46 @@ class B2BCustomerAnalytics:
|
|
| 239 |
'order_date': ['min', 'max']
|
| 240 |
}).reset_index()
|
| 241 |
|
|
|
|
| 242 |
customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
|
| 243 |
'avg_amount', 'std_amount', 'min_amount', 'max_amount',
|
| 244 |
'first_order', 'last_order']
|
| 245 |
|
|
|
|
| 246 |
customer_features['std_amount'].fillna(0, inplace=True)
|
| 247 |
|
|
|
|
| 248 |
customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
|
| 249 |
customer_features['customer_lifetime'].fillna(0, inplace=True)
|
| 250 |
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
else:
|
| 254 |
-
churn_labels = self.df.groupby('customer_id')['churn_label'].first().reset_index()
|
| 255 |
-
customer_features = customer_features.merge(churn_labels, on='customer_id')
|
| 256 |
|
|
|
|
| 257 |
feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
|
| 258 |
'min_amount', 'max_amount', 'customer_lifetime']
|
| 259 |
|
| 260 |
X = customer_features[feature_cols]
|
| 261 |
y = customer_features['churn_label']
|
| 262 |
|
|
|
|
| 263 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
|
| 264 |
|
|
|
|
| 265 |
self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
|
| 266 |
self.model.fit(X_train, y_train)
|
| 267 |
|
|
|
|
| 268 |
y_pred = self.model.predict(X_test)
|
| 269 |
y_pred_proba = self.model.predict_proba(X_test)[:, 1]
|
| 270 |
|
|
|
|
| 271 |
self.feature_importance = pd.DataFrame({
|
| 272 |
'feature': feature_cols,
|
| 273 |
'importance': self.model.feature_importances_
|
| 274 |
}).sort_values('importance', ascending=False)
|
| 275 |
|
|
|
|
| 276 |
all_predictions = self.model.predict_proba(X)[:, 1]
|
| 277 |
customer_features['churn_probability'] = all_predictions
|
| 278 |
self.predictions = customer_features
|
|
@@ -334,194 +397,209 @@ class B2BCustomerAnalytics:
|
|
| 334 |
if self.feature_importance is None:
|
| 335 |
return None
|
| 336 |
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
def create_visualizations(self):
|
| 369 |
"""Create comprehensive modern visualizations"""
|
| 370 |
-
if self.
|
| 371 |
return None, None, None, None
|
| 372 |
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
fig1.update_traces(textposition='inside', textinfo='percent+label', textfont_size=13)
|
| 386 |
-
fig1.update_layout(
|
| 387 |
-
height=450,
|
| 388 |
-
showlegend=True,
|
| 389 |
-
title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
|
| 390 |
-
font=dict(family="Inter, system-ui, sans-serif", size=12),
|
| 391 |
-
paper_bgcolor='white',
|
| 392 |
-
plot_bgcolor='white'
|
| 393 |
-
)
|
| 394 |
-
|
| 395 |
-
# 2. RFM Analysis
|
| 396 |
-
customer_rfm = self.df.groupby('customer_id').agg({
|
| 397 |
-
'recency_days': 'first',
|
| 398 |
-
'frequency': 'first',
|
| 399 |
-
'monetary': 'first',
|
| 400 |
-
'Segment': 'first'
|
| 401 |
-
}).reset_index()
|
| 402 |
-
|
| 403 |
-
fig2 = px.scatter(
|
| 404 |
-
customer_rfm,
|
| 405 |
-
x='recency_days',
|
| 406 |
-
y='frequency',
|
| 407 |
-
size='monetary',
|
| 408 |
-
color='Segment',
|
| 409 |
-
title='<b>RFM Customer Behavior Matrix</b>',
|
| 410 |
-
labels={
|
| 411 |
-
'recency_days': 'Days Since Last Purchase',
|
| 412 |
-
'frequency': 'Purchase Frequency',
|
| 413 |
-
'monetary': 'Total Revenue'
|
| 414 |
-
},
|
| 415 |
-
color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6'],
|
| 416 |
-
size_max=60
|
| 417 |
-
)
|
| 418 |
-
fig2.update_layout(
|
| 419 |
-
height=500,
|
| 420 |
-
title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
|
| 421 |
-
font=dict(family="Inter, system-ui, sans-serif", size=12),
|
| 422 |
-
paper_bgcolor='white',
|
| 423 |
-
plot_bgcolor='white'
|
| 424 |
-
)
|
| 425 |
-
|
| 426 |
-
# 3. Churn Risk Analysis
|
| 427 |
-
if self.predictions is not None:
|
| 428 |
-
fig3 = px.histogram(
|
| 429 |
-
self.predictions,
|
| 430 |
-
x='churn_probability',
|
| 431 |
-
nbins=20,
|
| 432 |
-
title='<b>Churn Probability Distribution</b>',
|
| 433 |
-
labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
|
| 434 |
-
color_discrete_sequence=[COLORS['primary']]
|
| 435 |
)
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
x='Risk_Level',
|
| 445 |
-
y='Count',
|
| 446 |
-
title='<b>Customer Churn Risk Distribution</b>',
|
| 447 |
-
color='Risk_Level',
|
| 448 |
-
color_discrete_map=colors_map
|
| 449 |
)
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
|
| 487 |
def create_customer_table(self):
|
| 488 |
"""Create modern customer segmentation table"""
|
| 489 |
-
if self.
|
| 490 |
return None
|
| 491 |
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 521 |
|
| 522 |
def generate_pdf_report(self):
|
| 523 |
"""Generate comprehensive PDF report"""
|
| 524 |
-
if self.
|
| 525 |
return None
|
| 526 |
|
| 527 |
try:
|
|
@@ -546,10 +624,10 @@ class B2BCustomerAnalytics:
|
|
| 546 |
|
| 547 |
story.append(Paragraph("Executive Summary", styles['Heading2']))
|
| 548 |
|
| 549 |
-
total_customers = self.
|
| 550 |
-
total_revenue = self.
|
| 551 |
-
avg_order_value = self.
|
| 552 |
-
high_risk_customers = len(self.
|
| 553 |
|
| 554 |
summary_text = f"""
|
| 555 |
This comprehensive analysis examines {total_customers} B2B customers with total revenue of ${total_revenue:,.2f}.
|
|
@@ -567,8 +645,8 @@ class B2BCustomerAnalytics:
|
|
| 567 |
|
| 568 |
story.append(Paragraph("Key Performance Indicators", styles['Heading2']))
|
| 569 |
|
| 570 |
-
segment_dist = self.
|
| 571 |
-
risk_dist = self.
|
| 572 |
|
| 573 |
metrics_data = [
|
| 574 |
['Metric', 'Value', 'Status'],
|
|
@@ -640,87 +718,91 @@ class B2BCustomerAnalytics:
|
|
| 640 |
|
| 641 |
def get_customer_insights(self, customer_id):
|
| 642 |
"""Get detailed insights for a specific customer"""
|
| 643 |
-
if self.
|
| 644 |
return "No data available"
|
| 645 |
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
total_orders = len(customer_data)
|
| 651 |
-
total_spent = customer_data['amount'].sum()
|
| 652 |
-
avg_order_value = customer_data['amount'].mean()
|
| 653 |
-
first_order = customer_data['order_date'].min()
|
| 654 |
-
last_order = customer_data['order_date'].max()
|
| 655 |
-
segment = customer_data['Segment'].iloc[0]
|
| 656 |
-
risk_level = customer_data['Churn_Risk'].iloc[0]
|
| 657 |
-
recency = customer_data['recency_days'].iloc[0]
|
| 658 |
-
|
| 659 |
-
churn_prob = 0.5
|
| 660 |
-
if self.predictions is not None:
|
| 661 |
-
pred_data = self.predictions[self.predictions['customer_id'] == customer_id]
|
| 662 |
-
if not pred_data.empty:
|
| 663 |
-
churn_prob = pred_data['churn_probability'].iloc[0]
|
| 664 |
-
|
| 665 |
-
insights_html = f"""
|
| 666 |
-
<div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); margin-bottom: 2rem;">
|
| 667 |
-
<div style="text-align: center; margin-bottom: 2rem;">
|
| 668 |
-
<div style="display: inline-block; padding: 1.5rem; background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); border-radius: 50%; margin-bottom: 1rem;">
|
| 669 |
-
<span style="font-size: 2rem; color: white;">📊</span>
|
| 670 |
-
</div>
|
| 671 |
-
<h3 style="color: #1f2937; font-size: 1.75rem; font-weight: bold; margin-bottom: 0.5rem;">
|
| 672 |
-
Customer Profile: {customer_id}
|
| 673 |
-
</h3>
|
| 674 |
-
<p style="color: #6b7280; font-size: 1.1rem;">Comprehensive Customer Intelligence Report</p>
|
| 675 |
-
</div>
|
| 676 |
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
</div>
|
| 686 |
-
<div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
|
| 687 |
-
<h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CHURN PROBABILITY</h4>
|
| 688 |
-
<div style="font-size: 1.5rem; font-weight: bold;">{churn_prob:.1%}</div>
|
| 689 |
-
</div>
|
| 690 |
-
</div>
|
| 691 |
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
</div>
|
| 699 |
-
<
|
| 700 |
-
|
| 701 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
</div>
|
| 703 |
-
<div>
|
| 704 |
-
<
|
| 705 |
-
<div style="font-size:
|
| 706 |
</div>
|
| 707 |
-
<div>
|
| 708 |
-
<
|
| 709 |
-
<div style="font-size:
|
| 710 |
</div>
|
| 711 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
</div>
|
|
|
|
| 713 |
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
</p>
|
| 719 |
-
</div>
|
| 720 |
-
</div>
|
| 721 |
-
"""
|
| 722 |
-
|
| 723 |
-
return insights_html
|
| 724 |
|
| 725 |
def _get_customer_recommendations(self, segment, risk_level, churn_prob, recency):
|
| 726 |
"""Generate personalized recommendations based on customer profile"""
|
|
|
|
| 34 |
'indigo': '#6366f1'
|
| 35 |
}
|
| 36 |
|
| 37 |
+
plt.style.use('default') # Changed from seaborn-v0_8-whitegrid for compatibility
|
| 38 |
sns.set_palette("husl")
|
| 39 |
|
| 40 |
class B2BCustomerAnalytics:
|
| 41 |
def __init__(self):
|
| 42 |
self.df = None
|
| 43 |
+
self.processed_df = None
|
| 44 |
self.model = None
|
| 45 |
self.feature_importance = None
|
| 46 |
self.predictions = None
|
|
|
|
| 51 |
if file is None:
|
| 52 |
return "Please upload a CSV file", None, None, None
|
| 53 |
|
| 54 |
+
# Load raw data
|
| 55 |
self.df = pd.read_csv(file.name)
|
| 56 |
|
| 57 |
+
# Check for required columns - be flexible with column names
|
| 58 |
required_columns = ['customer_id', 'order_date', 'amount']
|
| 59 |
+
df_columns_lower = [col.lower() for col in self.df.columns]
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
# Map common variations
|
| 62 |
+
column_mapping = {}
|
| 63 |
+
for req_col in required_columns:
|
| 64 |
+
found = False
|
| 65 |
+
for df_col in self.df.columns:
|
| 66 |
+
if req_col in df_col.lower() or df_col.lower() in req_col:
|
| 67 |
+
column_mapping[req_col] = df_col
|
| 68 |
+
found = True
|
| 69 |
+
break
|
| 70 |
+
if not found:
|
| 71 |
+
return f"Missing required column: {req_col}. Available columns: {list(self.df.columns)}", None, None, None
|
| 72 |
+
|
| 73 |
+
# Rename columns to standard names
|
| 74 |
+
self.df = self.df.rename(columns=column_mapping)
|
| 75 |
+
|
| 76 |
+
# Convert order_date to datetime
|
| 77 |
self.df['order_date'] = pd.to_datetime(self.df['order_date'])
|
| 78 |
|
| 79 |
+
# Calculate RFM metrics
|
| 80 |
+
self.processed_df = self.calculate_rfm_metrics(self.df.copy())
|
| 81 |
|
| 82 |
+
# Perform customer segmentation
|
| 83 |
+
self.processed_df = self.perform_customer_segmentation(self.processed_df)
|
| 84 |
|
| 85 |
+
# Generate summary
|
| 86 |
summary_html, kpi_cards = self.generate_summary_dashboard()
|
| 87 |
|
| 88 |
+
return "Data loaded successfully!", summary_html, self.processed_df.head(20), kpi_cards
|
| 89 |
|
| 90 |
except Exception as e:
|
| 91 |
return f"Error loading data: {str(e)}", None, None, None
|
| 92 |
|
| 93 |
def calculate_rfm_metrics(self, df):
|
| 94 |
"""Calculate RFM metrics from transaction data"""
|
| 95 |
+
try:
|
| 96 |
+
current_date = df['order_date'].max() + timedelta(days=1)
|
| 97 |
+
|
| 98 |
+
# Calculate customer-level metrics
|
| 99 |
+
customer_metrics = df.groupby('customer_id').agg({
|
| 100 |
+
'order_date': ['max', 'count'],
|
| 101 |
+
'amount': ['sum', 'mean']
|
| 102 |
+
}).round(2)
|
| 103 |
+
|
| 104 |
+
# Flatten column names
|
| 105 |
+
customer_metrics.columns = ['last_order_date', 'frequency', 'monetary', 'avg_order_value']
|
| 106 |
+
customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
|
| 107 |
+
|
| 108 |
+
# Merge back with original data
|
| 109 |
+
df_with_rfm = df.merge(
|
| 110 |
+
customer_metrics[['recency_days', 'frequency', 'monetary']],
|
| 111 |
+
left_on='customer_id',
|
| 112 |
+
right_index=True,
|
| 113 |
+
how='left'
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
return df_with_rfm
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error in calculate_rfm_metrics: {e}")
|
| 120 |
+
return df
|
| 121 |
|
| 122 |
def perform_customer_segmentation(self, df):
|
| 123 |
"""Perform customer segmentation based on RFM analysis"""
|
| 124 |
+
try:
|
| 125 |
+
# Get unique customer data
|
| 126 |
+
customer_df = df.groupby('customer_id').agg({
|
| 127 |
+
'recency_days': 'first',
|
| 128 |
+
'frequency': 'first',
|
| 129 |
+
'monetary': 'first'
|
| 130 |
+
}).reset_index()
|
| 131 |
+
|
| 132 |
+
# Calculate RFM scores using quantiles
|
| 133 |
+
try:
|
| 134 |
+
customer_df['R_Score'] = pd.qcut(customer_df['recency_days'].rank(method='first'), 5, labels=[5,4,3,2,1], duplicates='drop')
|
| 135 |
+
customer_df['F_Score'] = pd.qcut(customer_df['frequency'].rank(method='first'), 5, labels=[1,2,3,4,5], duplicates='drop')
|
| 136 |
+
customer_df['M_Score'] = pd.qcut(customer_df['monetary'].rank(method='first'), 5, labels=[1,2,3,4,5], duplicates='drop')
|
| 137 |
+
except ValueError:
|
| 138 |
+
# If qcut fails due to duplicate values, use simple binning
|
| 139 |
+
customer_df['R_Score'] = pd.cut(customer_df['recency_days'], 5, labels=[5,4,3,2,1])
|
| 140 |
+
customer_df['F_Score'] = pd.cut(customer_df['frequency'], 5, labels=[1,2,3,4,5])
|
| 141 |
+
customer_df['M_Score'] = pd.cut(customer_df['monetary'], 5, labels=[1,2,3,4,5])
|
| 142 |
+
|
| 143 |
+
customer_df['R_Score'] = customer_df['R_Score'].astype(int)
|
| 144 |
+
customer_df['F_Score'] = customer_df['F_Score'].astype(int)
|
| 145 |
+
customer_df['M_Score'] = customer_df['M_Score'].astype(int)
|
| 146 |
+
|
| 147 |
+
def segment_customers(row):
|
| 148 |
+
if row['R_Score'] >= 4 and row['F_Score'] >= 4 and row['M_Score'] >= 4:
|
| 149 |
+
return 'Champions'
|
| 150 |
+
elif row['R_Score'] >= 3 and row['F_Score'] >= 3 and row['M_Score'] >= 3:
|
| 151 |
+
return 'Loyal Customers'
|
| 152 |
+
elif row['R_Score'] >= 3 and row['F_Score'] >= 2:
|
| 153 |
+
return 'Potential Loyalists'
|
| 154 |
+
elif row['R_Score'] >= 4 and row['F_Score'] <= 2:
|
| 155 |
+
return 'New Customers'
|
| 156 |
+
elif row['R_Score'] <= 2 and row['F_Score'] >= 3:
|
| 157 |
+
return 'At Risk'
|
| 158 |
+
elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] >= 3:
|
| 159 |
+
return 'Cannot Lose Them'
|
| 160 |
+
elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] <= 2:
|
| 161 |
+
return 'Lost Customers'
|
| 162 |
+
else:
|
| 163 |
+
return 'Others'
|
| 164 |
+
|
| 165 |
+
customer_df['Segment'] = customer_df.apply(segment_customers, axis=1)
|
| 166 |
+
|
| 167 |
+
customer_df['Churn_Risk'] = customer_df.apply(lambda x:
|
| 168 |
+
'High' if x['Segment'] in ['Lost Customers', 'At Risk'] else
|
| 169 |
+
'Medium' if x['Segment'] in ['Others', 'Cannot Lose Them'] else 'Low', axis=1)
|
| 170 |
+
|
| 171 |
+
# Merge segmentation data back
|
| 172 |
+
segment_data = customer_df[['customer_id', 'Segment', 'Churn_Risk', 'R_Score', 'F_Score', 'M_Score']]
|
| 173 |
+
df_with_segments = df.merge(segment_data, on='customer_id', how='left')
|
| 174 |
+
|
| 175 |
+
return df_with_segments
|
| 176 |
+
|
| 177 |
+
except Exception as e:
|
| 178 |
+
print(f"Error in perform_customer_segmentation: {e}")
|
| 179 |
+
# Return original df with dummy segments if segmentation fails
|
| 180 |
+
df['Segment'] = 'Others'
|
| 181 |
+
df['Churn_Risk'] = 'Medium'
|
| 182 |
+
df['R_Score'] = 3
|
| 183 |
+
df['F_Score'] = 3
|
| 184 |
+
df['M_Score'] = 3
|
| 185 |
+
return df
|
| 186 |
|
| 187 |
def generate_summary_dashboard(self):
|
| 188 |
"""Generate modern dashboard summary with KPI cards"""
|
| 189 |
+
if self.processed_df is None:
|
| 190 |
return "No data loaded", ""
|
| 191 |
|
| 192 |
+
try:
|
| 193 |
+
total_customers = self.processed_df['customer_id'].nunique()
|
| 194 |
+
total_orders = len(self.processed_df)
|
| 195 |
+
total_revenue = self.processed_df['amount'].sum()
|
| 196 |
+
avg_order_value = self.processed_df['amount'].mean()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
+
# Get segment and risk distributions
|
| 199 |
+
segment_dist = self.processed_df.groupby('customer_id')['Segment'].first().value_counts()
|
| 200 |
+
risk_dist = self.processed_df.groupby('customer_id')['Churn_Risk'].first().value_counts()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
# Create modern horizontal dashboard
|
| 203 |
+
summary_html = f"""
|
| 204 |
+
<div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); padding: 2rem; border-radius: 1rem; color: white; margin-bottom: 2rem; text-align: center;">
|
| 205 |
+
<h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">
|
| 206 |
+
B2B Customer Analytics Platform
|
| 207 |
+
</h1>
|
| 208 |
+
<p style="font-size: 1.2rem; opacity: 0.9;">
|
| 209 |
+
Enterprise Customer Health Monitoring & Churn Prediction System
|
| 210 |
+
</p>
|
| 211 |
</div>
|
| 212 |
|
| 213 |
+
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1.5rem; margin-bottom: 3rem;">
|
| 214 |
+
<div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #3b82f6;">
|
| 215 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
|
| 216 |
+
<div style="padding: 0.75rem; background: #dbeafe; border-radius: 0.5rem; color: #1d4ed8;">📊</div>
|
| 217 |
+
<span style="font-size: 2rem; font-weight: bold; color: #3b82f6;">{total_customers:,}</span>
|
| 218 |
+
</div>
|
| 219 |
+
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">Total Customers</h3>
|
| 220 |
+
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Active enterprise clients</p>
|
| 221 |
</div>
|
| 222 |
+
|
| 223 |
+
<div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #10b981;">
|
| 224 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
|
| 225 |
+
<div style="padding: 0.75rem; background: #d1fae5; border-radius: 0.5rem; color: #047857;">💰</div>
|
| 226 |
+
<span style="font-size: 2rem; font-weight: bold; color: #10b981;">${(total_revenue/1000000):.1f}M</span>
|
| 227 |
+
</div>
|
| 228 |
+
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">Total Revenue</h3>
|
| 229 |
+
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Contract value sum</p>
|
| 230 |
</div>
|
| 231 |
+
|
| 232 |
+
<div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #8b5cf6;">
|
| 233 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
|
| 234 |
+
<div style="padding: 0.75rem; background: #ede9fe; border-radius: 0.5rem; color: #7c3aed;">📈</div>
|
| 235 |
+
<span style="font-size: 2rem; font-weight: bold; color: #8b5cf6;">${avg_order_value:.0f}</span>
|
| 236 |
+
</div>
|
| 237 |
+
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">Avg Order Value</h3>
|
| 238 |
+
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Per order average</p>
|
| 239 |
+
</div>
|
| 240 |
+
|
| 241 |
+
<div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #ef4444;">
|
| 242 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
|
| 243 |
+
<div style="padding: 0.75rem; background: #fee2e2; border-radius: 0.5rem; color: #dc2626;">🚨</div>
|
| 244 |
+
<span style="font-size: 2rem; font-weight: bold; color: #ef4444;">{risk_dist.get('High', 0)}</span>
|
| 245 |
+
</div>
|
| 246 |
+
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">High Risk Clients</h3>
|
| 247 |
+
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Need immediate attention</p>
|
| 248 |
+
</div>
|
| 249 |
+
|
| 250 |
+
<div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #f59e0b;">
|
| 251 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
|
| 252 |
+
<div style="padding: 0.75rem; background: #fef3c7; border-radius: 0.5rem; color: #d97706;">🏆</div>
|
| 253 |
+
<span style="font-size: 2rem; font-weight: bold; color: #f59e0b;">{segment_dist.get('Champions', 0)}</span>
|
| 254 |
+
</div>
|
| 255 |
+
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">Champion Customers</h3>
|
| 256 |
+
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Top tier clients</p>
|
| 257 |
+
</div>
|
| 258 |
+
|
| 259 |
+
<div style="background: white; padding: 1.5rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); border-left: 4px solid #06b6d4;">
|
| 260 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
|
| 261 |
+
<div style="padding: 0.75rem; background: #cffafe; border-radius: 0.5rem; color: #0891b2;">✅</div>
|
| 262 |
+
<span style="font-size: 2rem; font-weight: bold; color: #06b6d4;">{risk_dist.get('Low', 0)}</span>
|
| 263 |
+
</div>
|
| 264 |
+
<h3 style="color: #1f2937; font-weight: 600; margin: 0;">Healthy Customers</h3>
|
| 265 |
+
<p style="color: #6b7280; font-size: 0.875rem; margin: 0.25rem 0 0 0;">Low churn risk</p>
|
| 266 |
</div>
|
|
|
|
|
|
|
| 267 |
</div>
|
| 268 |
+
"""
|
| 269 |
+
|
| 270 |
+
kpi_data = [
|
| 271 |
+
["Total Customers", f"{total_customers:,}", "👥", "#3b82f6"],
|
| 272 |
+
["Total Revenue", f"${total_revenue/1000000:.1f}M", "💰", "#10b981"],
|
| 273 |
+
["Avg Order Value", f"${avg_order_value:.0f}", "📈", "#8b5cf6"],
|
| 274 |
+
["High Risk Customers", f"{risk_dist.get('High', 0)}", "🚨", "#ef4444"],
|
| 275 |
+
["Champion Customers", f"{segment_dist.get('Champions', 0)}", "🏆", "#f59e0b"],
|
| 276 |
+
["Healthy Customers", f"{risk_dist.get('Low', 0)}", "✅", "#06b6d4"]
|
| 277 |
+
]
|
| 278 |
+
|
| 279 |
+
return summary_html, kpi_data
|
| 280 |
+
|
| 281 |
+
except Exception as e:
|
| 282 |
+
return f"Error generating dashboard: {str(e)}", []
|
| 283 |
|
| 284 |
def train_churn_model(self):
|
| 285 |
"""Train churn prediction model"""
|
| 286 |
+
if self.processed_df is None:
|
| 287 |
+
return "No data available. Please upload and process a CSV file first.", None
|
| 288 |
|
| 289 |
try:
|
| 290 |
+
# Prepare customer-level features
|
| 291 |
+
customer_features = self.processed_df.groupby('customer_id').agg({
|
| 292 |
'recency_days': 'first',
|
| 293 |
'frequency': 'first',
|
| 294 |
'monetary': 'first',
|
|
|
|
| 296 |
'order_date': ['min', 'max']
|
| 297 |
}).reset_index()
|
| 298 |
|
| 299 |
+
# Flatten column names
|
| 300 |
customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
|
| 301 |
'avg_amount', 'std_amount', 'min_amount', 'max_amount',
|
| 302 |
'first_order', 'last_order']
|
| 303 |
|
| 304 |
+
# Handle missing values
|
| 305 |
customer_features['std_amount'].fillna(0, inplace=True)
|
| 306 |
|
| 307 |
+
# Calculate additional features
|
| 308 |
customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
|
| 309 |
customer_features['customer_lifetime'].fillna(0, inplace=True)
|
| 310 |
|
| 311 |
+
# Create churn labels based on recency
|
| 312 |
+
customer_features['churn_label'] = (customer_features['recency_days'] > 90).astype(int)
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
+
# Select features for modeling
|
| 315 |
feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
|
| 316 |
'min_amount', 'max_amount', 'customer_lifetime']
|
| 317 |
|
| 318 |
X = customer_features[feature_cols]
|
| 319 |
y = customer_features['churn_label']
|
| 320 |
|
| 321 |
+
# Train-test split
|
| 322 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
|
| 323 |
|
| 324 |
+
# Train model
|
| 325 |
self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
|
| 326 |
self.model.fit(X_train, y_train)
|
| 327 |
|
| 328 |
+
# Make predictions
|
| 329 |
y_pred = self.model.predict(X_test)
|
| 330 |
y_pred_proba = self.model.predict_proba(X_test)[:, 1]
|
| 331 |
|
| 332 |
+
# Feature importance
|
| 333 |
self.feature_importance = pd.DataFrame({
|
| 334 |
'feature': feature_cols,
|
| 335 |
'importance': self.model.feature_importances_
|
| 336 |
}).sort_values('importance', ascending=False)
|
| 337 |
|
| 338 |
+
# Predict for all customers
|
| 339 |
all_predictions = self.model.predict_proba(X)[:, 1]
|
| 340 |
customer_features['churn_probability'] = all_predictions
|
| 341 |
self.predictions = customer_features
|
|
|
|
| 397 |
if self.feature_importance is None:
|
| 398 |
return None
|
| 399 |
|
| 400 |
+
try:
|
| 401 |
+
fig = px.bar(
|
| 402 |
+
self.feature_importance.head(8),
|
| 403 |
+
x='importance',
|
| 404 |
+
y='feature',
|
| 405 |
+
orientation='h',
|
| 406 |
+
title='Feature Importance Analysis',
|
| 407 |
+
labels={'importance': 'Importance Score', 'feature': 'Features'},
|
| 408 |
+
color='importance',
|
| 409 |
+
color_continuous_scale='viridis'
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
fig.update_layout(
|
| 413 |
+
height=500,
|
| 414 |
+
showlegend=False,
|
| 415 |
+
plot_bgcolor='white',
|
| 416 |
+
paper_bgcolor='white',
|
| 417 |
+
title={
|
| 418 |
+
'text': '<b>Feature Importance Analysis</b>',
|
| 419 |
+
'x': 0.5,
|
| 420 |
+
'xanchor': 'center',
|
| 421 |
+
'font': {'size': 20, 'color': '#1f2937'}
|
| 422 |
+
},
|
| 423 |
+
font=dict(family="Inter, system-ui, sans-serif", size=12),
|
| 424 |
+
yaxis={'categoryorder': 'total ascending'},
|
| 425 |
+
xaxis=dict(gridcolor='#f1f5f9'),
|
| 426 |
+
yaxis_title=dict(font_size=14),
|
| 427 |
+
xaxis_title=dict(font_size=14)
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
return fig
|
| 431 |
+
|
| 432 |
+
except Exception as e:
|
| 433 |
+
print(f"Error creating performance chart: {e}")
|
| 434 |
+
return None
|
| 435 |
|
| 436 |
def create_visualizations(self):
|
| 437 |
"""Create comprehensive modern visualizations"""
|
| 438 |
+
if self.processed_df is None:
|
| 439 |
return None, None, None, None
|
| 440 |
|
| 441 |
+
try:
|
| 442 |
+
# 1. Customer Segment Distribution
|
| 443 |
+
segment_data = self.processed_df.groupby('customer_id')['Segment'].first().value_counts().reset_index()
|
| 444 |
+
segment_data.columns = ['Segment', 'Count']
|
| 445 |
+
|
| 446 |
+
fig1 = px.pie(
|
| 447 |
+
segment_data,
|
| 448 |
+
values='Count',
|
| 449 |
+
names='Segment',
|
| 450 |
+
title='<b>Customer Segment Distribution</b>',
|
| 451 |
+
hole=0.4,
|
| 452 |
+
color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
)
|
| 454 |
+
fig1.update_traces(textposition='inside', textinfo='percent+label', textfont_size=13)
|
| 455 |
+
fig1.update_layout(
|
| 456 |
+
height=450,
|
| 457 |
+
showlegend=True,
|
| 458 |
+
title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
|
| 459 |
+
font=dict(family="Inter, system-ui, sans-serif", size=12),
|
| 460 |
+
paper_bgcolor='white',
|
| 461 |
+
plot_bgcolor='white'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
)
|
| 463 |
+
|
| 464 |
+
# 2. RFM Analysis
|
| 465 |
+
customer_rfm = self.processed_df.groupby('customer_id').agg({
|
| 466 |
+
'recency_days': 'first',
|
| 467 |
+
'frequency': 'first',
|
| 468 |
+
'monetary': 'first',
|
| 469 |
+
'Segment': 'first'
|
| 470 |
+
}).reset_index()
|
| 471 |
+
|
| 472 |
+
fig2 = px.scatter(
|
| 473 |
+
customer_rfm,
|
| 474 |
+
x='recency_days',
|
| 475 |
+
y='frequency',
|
| 476 |
+
size='monetary',
|
| 477 |
+
color='Segment',
|
| 478 |
+
title='<b>RFM Customer Behavior Matrix</b>',
|
| 479 |
+
labels={
|
| 480 |
+
'recency_days': 'Days Since Last Purchase',
|
| 481 |
+
'frequency': 'Purchase Frequency',
|
| 482 |
+
'monetary': 'Total Revenue'
|
| 483 |
+
},
|
| 484 |
+
color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6'],
|
| 485 |
+
size_max=60
|
| 486 |
+
)
|
| 487 |
+
fig2.update_layout(
|
| 488 |
+
height=500,
|
| 489 |
+
title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
|
| 490 |
+
font=dict(family="Inter, system-ui, sans-serif", size=12),
|
| 491 |
+
paper_bgcolor='white',
|
| 492 |
+
plot_bgcolor='white'
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
# 3. Churn Risk Analysis
|
| 496 |
+
if self.predictions is not None:
|
| 497 |
+
fig3 = px.histogram(
|
| 498 |
+
self.predictions,
|
| 499 |
+
x='churn_probability',
|
| 500 |
+
nbins=20,
|
| 501 |
+
title='<b>Churn Probability Distribution</b>',
|
| 502 |
+
labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
|
| 503 |
+
color_discrete_sequence=[COLORS['primary']]
|
| 504 |
+
)
|
| 505 |
+
fig3.add_vline(x=0.5, line_dash="dash", line_color="#ef4444", line_width=2,
|
| 506 |
+
annotation_text="High Risk Threshold", annotation_position="top")
|
| 507 |
+
else:
|
| 508 |
+
risk_data = self.processed_df.groupby('customer_id')['Churn_Risk'].first().value_counts().reset_index()
|
| 509 |
+
risk_data.columns = ['Risk_Level', 'Count']
|
| 510 |
+
colors_map = {'High': '#ef4444', 'Medium': '#f59e0b', 'Low': '#10b981'}
|
| 511 |
+
fig3 = px.bar(
|
| 512 |
+
risk_data,
|
| 513 |
+
x='Risk_Level',
|
| 514 |
+
y='Count',
|
| 515 |
+
title='<b>Customer Churn Risk Distribution</b>',
|
| 516 |
+
color='Risk_Level',
|
| 517 |
+
color_discrete_map=colors_map
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
fig3.update_layout(
|
| 521 |
+
height=450,
|
| 522 |
+
showlegend=False,
|
| 523 |
+
title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
|
| 524 |
+
font=dict(family="Inter, system-ui, sans-serif", size=12),
|
| 525 |
+
plot_bgcolor='white',
|
| 526 |
+
paper_bgcolor='white'
|
| 527 |
+
)
|
| 528 |
+
|
| 529 |
+
# 4. Revenue Trends
|
| 530 |
+
self.processed_df['order_month'] = self.processed_df['order_date'].dt.to_period('M')
|
| 531 |
+
monthly_revenue = self.processed_df.groupby('order_month')['amount'].sum().reset_index()
|
| 532 |
+
monthly_revenue['order_month'] = monthly_revenue['order_month'].astype(str)
|
| 533 |
+
|
| 534 |
+
fig4 = px.line(
|
| 535 |
+
monthly_revenue,
|
| 536 |
+
x='order_month',
|
| 537 |
+
y='amount',
|
| 538 |
+
title='<b>Monthly Revenue Trends</b>',
|
| 539 |
+
labels={'amount': 'Revenue ($)', 'order_month': 'Month'},
|
| 540 |
+
line_shape='spline'
|
| 541 |
+
)
|
| 542 |
+
fig4.update_traces(line_color=COLORS['primary'], line_width=4, mode='lines+markers')
|
| 543 |
+
fig4.update_layout(
|
| 544 |
+
height=450,
|
| 545 |
+
title={'x': 0.5, 'xanchor': 'center', 'font': {'size': 20, 'color': '#1f2937'}},
|
| 546 |
+
font=dict(family="Inter, system-ui, sans-serif", size=12),
|
| 547 |
+
plot_bgcolor='white',
|
| 548 |
+
paper_bgcolor='white',
|
| 549 |
+
xaxis_tickangle=-45,
|
| 550 |
+
xaxis=dict(gridcolor='#f1f5f9'),
|
| 551 |
+
yaxis=dict(gridcolor='#f1f5f9')
|
| 552 |
+
)
|
| 553 |
+
|
| 554 |
+
return fig1, fig2, fig3, fig4
|
| 555 |
+
|
| 556 |
+
except Exception as e:
|
| 557 |
+
print(f"Error creating visualizations: {e}")
|
| 558 |
+
return None, None, None, None
|
| 559 |
|
| 560 |
def create_customer_table(self):
|
| 561 |
"""Create modern customer segmentation table"""
|
| 562 |
+
if self.processed_df is None:
|
| 563 |
return None
|
| 564 |
|
| 565 |
+
try:
|
| 566 |
+
customer_summary = self.processed_df.groupby('customer_id').agg({
|
| 567 |
+
'Segment': 'first',
|
| 568 |
+
'Churn_Risk': 'first',
|
| 569 |
+
'recency_days': 'first',
|
| 570 |
+
'frequency': 'first',
|
| 571 |
+
'monetary': 'first',
|
| 572 |
+
'amount': 'mean'
|
| 573 |
+
}).reset_index()
|
| 574 |
+
|
| 575 |
+
if self.predictions is not None:
|
| 576 |
+
customer_summary = customer_summary.merge(
|
| 577 |
+
self.predictions[['customer_id', 'churn_probability']],
|
| 578 |
+
on='customer_id',
|
| 579 |
+
how='left'
|
| 580 |
+
)
|
| 581 |
+
customer_summary['churn_probability'] = customer_summary['churn_probability'].fillna(0)
|
| 582 |
+
else:
|
| 583 |
+
customer_summary['churn_probability'] = 0.5
|
| 584 |
+
|
| 585 |
+
customer_summary['monetary'] = customer_summary['monetary'].round(2)
|
| 586 |
+
customer_summary['amount'] = customer_summary['amount'].round(2)
|
| 587 |
+
customer_summary['churn_probability'] = (customer_summary['churn_probability'] * 100).round(1)
|
| 588 |
+
|
| 589 |
+
customer_summary.columns = [
|
| 590 |
+
'Customer ID', 'Segment', 'Risk Level', 'Recency (Days)',
|
| 591 |
+
'Frequency', 'Total Spent ($)', 'Avg Order ($)', 'Churn Probability (%)'
|
| 592 |
+
]
|
| 593 |
+
|
| 594 |
+
return customer_summary.head(50)
|
| 595 |
+
|
| 596 |
+
except Exception as e:
|
| 597 |
+
print(f"Error creating customer table: {e}")
|
| 598 |
+
return None
|
| 599 |
|
| 600 |
def generate_pdf_report(self):
|
| 601 |
"""Generate comprehensive PDF report"""
|
| 602 |
+
if self.processed_df is None:
|
| 603 |
return None
|
| 604 |
|
| 605 |
try:
|
|
|
|
| 624 |
|
| 625 |
story.append(Paragraph("Executive Summary", styles['Heading2']))
|
| 626 |
|
| 627 |
+
total_customers = self.processed_df['customer_id'].nunique()
|
| 628 |
+
total_revenue = self.processed_df['amount'].sum()
|
| 629 |
+
avg_order_value = self.processed_df['amount'].mean()
|
| 630 |
+
high_risk_customers = len(self.processed_df[self.processed_df['Churn_Risk'] == 'High']['customer_id'].unique())
|
| 631 |
|
| 632 |
summary_text = f"""
|
| 633 |
This comprehensive analysis examines {total_customers} B2B customers with total revenue of ${total_revenue:,.2f}.
|
|
|
|
| 645 |
|
| 646 |
story.append(Paragraph("Key Performance Indicators", styles['Heading2']))
|
| 647 |
|
| 648 |
+
segment_dist = self.processed_df.groupby('customer_id')['Segment'].first().value_counts()
|
| 649 |
+
risk_dist = self.processed_df.groupby('customer_id')['Churn_Risk'].first().value_counts()
|
| 650 |
|
| 651 |
metrics_data = [
|
| 652 |
['Metric', 'Value', 'Status'],
|
|
|
|
| 718 |
|
| 719 |
def get_customer_insights(self, customer_id):
|
| 720 |
"""Get detailed insights for a specific customer"""
|
| 721 |
+
if self.processed_df is None:
|
| 722 |
return "No data available"
|
| 723 |
|
| 724 |
+
try:
|
| 725 |
+
customer_data = self.processed_df[self.processed_df['customer_id'] == customer_id]
|
| 726 |
+
if customer_data.empty:
|
| 727 |
+
return f"Customer {customer_id} not found"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
|
| 729 |
+
total_orders = len(customer_data)
|
| 730 |
+
total_spent = customer_data['amount'].sum()
|
| 731 |
+
avg_order_value = customer_data['amount'].mean()
|
| 732 |
+
first_order = customer_data['order_date'].min()
|
| 733 |
+
last_order = customer_data['order_date'].max()
|
| 734 |
+
segment = customer_data['Segment'].iloc[0]
|
| 735 |
+
risk_level = customer_data['Churn_Risk'].iloc[0]
|
| 736 |
+
recency = customer_data['recency_days'].iloc[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 737 |
|
| 738 |
+
churn_prob = 0.5
|
| 739 |
+
if self.predictions is not None:
|
| 740 |
+
pred_data = self.predictions[self.predictions['customer_id'] == customer_id]
|
| 741 |
+
if not pred_data.empty:
|
| 742 |
+
churn_prob = pred_data['churn_probability'].iloc[0]
|
| 743 |
+
|
| 744 |
+
insights_html = f"""
|
| 745 |
+
<div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.1); margin-bottom: 2rem;">
|
| 746 |
+
<div style="text-align: center; margin-bottom: 2rem;">
|
| 747 |
+
<div style="display: inline-block; padding: 1.5rem; background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); border-radius: 50%; margin-bottom: 1rem;">
|
| 748 |
+
<span style="font-size: 2rem; color: white;">📊</span>
|
| 749 |
</div>
|
| 750 |
+
<h3 style="color: #1f2937; font-size: 1.75rem; font-weight: bold; margin-bottom: 0.5rem;">
|
| 751 |
+
Customer Profile: {customer_id}
|
| 752 |
+
</h3>
|
| 753 |
+
<p style="color: #6b7280; font-size: 1.1rem;">Comprehensive Customer Intelligence Report</p>
|
| 754 |
+
</div>
|
| 755 |
+
|
| 756 |
+
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1.5rem; margin-bottom: 2rem;">
|
| 757 |
+
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
|
| 758 |
+
<h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CUSTOMER SEGMENT</h4>
|
| 759 |
+
<div style="font-size: 1.5rem; font-weight: bold;">{segment}</div>
|
| 760 |
</div>
|
| 761 |
+
<div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
|
| 762 |
+
<h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CHURN RISK</h4>
|
| 763 |
+
<div style="font-size: 1.5rem; font-weight: bold;">{risk_level}</div>
|
| 764 |
</div>
|
| 765 |
+
<div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 1.5rem; border-radius: 1rem; color: white; text-align: center;">
|
| 766 |
+
<h4 style="font-size: 0.9rem; opacity: 0.9; margin-bottom: 0.5rem; font-weight: 600;">CHURN PROBABILITY</h4>
|
| 767 |
+
<div style="font-size: 1.5rem; font-weight: bold;">{churn_prob:.1%}</div>
|
| 768 |
</div>
|
| 769 |
</div>
|
| 770 |
+
|
| 771 |
+
<div style="background: #f8fafc; padding: 2rem; border-radius: 1rem; margin-bottom: 2rem;">
|
| 772 |
+
<h4 style="color: #374151; font-weight: 700; margin-bottom: 1.5rem; font-size: 1.3rem;">Transaction Analytics</h4>
|
| 773 |
+
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 2rem;">
|
| 774 |
+
<div>
|
| 775 |
+
<div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Total Orders</div>
|
| 776 |
+
<div style="font-size: 2rem; font-weight: bold; color: #1f2937;">{total_orders}</div>
|
| 777 |
+
</div>
|
| 778 |
+
<div>
|
| 779 |
+
<div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Total Spent</div>
|
| 780 |
+
<div style="font-size: 2rem; font-weight: bold; color: #1f2937;">${total_spent:,.2f}</div>
|
| 781 |
+
</div>
|
| 782 |
+
<div>
|
| 783 |
+
<div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Avg Order Value</div>
|
| 784 |
+
<div style="font-size: 2rem; font-weight: bold; color: #1f2937;">${avg_order_value:.2f}</div>
|
| 785 |
+
</div>
|
| 786 |
+
<div>
|
| 787 |
+
<div style="font-size: 0.875rem; color: #6b7280; font-weight: 600; margin-bottom: 0.5rem;">Days Since Last Order</div>
|
| 788 |
+
<div style="font-size: 2rem; font-weight: bold; color: #1f2937;">{recency}</div>
|
| 789 |
+
</div>
|
| 790 |
+
</div>
|
| 791 |
+
</div>
|
| 792 |
+
|
| 793 |
+
<div style="background: linear-gradient(135deg, #f0f9ff, #e0f2fe); border-left: 4px solid #3b82f6; padding: 1.5rem; border-radius: 0.5rem;">
|
| 794 |
+
<h4 style="color: #1e40af; font-weight: 700; margin-bottom: 1rem; font-size: 1.2rem;">Strategic Recommendations</h4>
|
| 795 |
+
<p style="color: #1f2937; margin: 0; font-size: 1rem; line-height: 1.6;">
|
| 796 |
+
{self._get_customer_recommendations(segment, risk_level, churn_prob, recency)}
|
| 797 |
+
</p>
|
| 798 |
+
</div>
|
| 799 |
</div>
|
| 800 |
+
"""
|
| 801 |
|
| 802 |
+
return insights_html
|
| 803 |
+
|
| 804 |
+
except Exception as e:
|
| 805 |
+
return f"Error getting customer insights: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 806 |
|
| 807 |
def _get_customer_recommendations(self, segment, risk_level, churn_prob, recency):
|
| 808 |
"""Generate personalized recommendations based on customer profile"""
|