entropy25 commited on
Commit
45a90de
·
verified ·
1 Parent(s): 0a4c6d8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +544 -0
app.py ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import RandomForestClassifier
8
+ from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
9
+ import xgboost as xgb
10
+ from datetime import datetime, timedelta
11
+ import plotly.express as px
12
+ import plotly.graph_objects as go
13
+ from plotly.subplots import make_subplots
14
+ import plotly.io as pio
15
+ from reportlab.lib.pagesizes import letter, A4
16
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak
17
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
18
+ from reportlab.lib.units import inch
19
+ from reportlab.lib import colors
20
+ import io
21
+ import base64
22
+ import warnings
23
+ warnings.filterwarnings('ignore')
24
+
25
+ # Set plotting style
26
+ plt.style.use('default')
27
+ sns.set_palette("husl")
28
+
29
+ class B2BCustomerAnalytics:
30
+ def __init__(self):
31
+ self.df = None
32
+ self.model = None
33
+ self.feature_importance = None
34
+ self.predictions = None
35
+
36
+ def load_and_process_data(self, file):
37
+ """Load and process the uploaded CSV file"""
38
+ try:
39
+ if file is None:
40
+ return "Please upload a CSV file", None, None
41
+
42
+ # Read the CSV file
43
+ self.df = pd.read_csv(file.name)
44
+
45
+ # Basic data validation
46
+ required_columns = ['customer_id', 'order_date', 'amount']
47
+ missing_cols = [col for col in required_columns if col not in self.df.columns]
48
+ if missing_cols:
49
+ return f"Missing required columns: {missing_cols}", None, None
50
+
51
+ # Convert order_date to datetime
52
+ self.df['order_date'] = pd.to_datetime(self.df['order_date'])
53
+
54
+ # Calculate RFM metrics if not present
55
+ if 'recency_days' not in self.df.columns or 'frequency' not in self.df.columns or 'monetary' not in self.df.columns:
56
+ self.df = self.calculate_rfm_metrics(self.df)
57
+
58
+ # Customer segmentation
59
+ self.df = self.perform_customer_segmentation(self.df)
60
+
61
+ # Prepare summary
62
+ summary = self.generate_data_summary()
63
+
64
+ return "Data loaded successfully!", summary, self.df.head(10)
65
+
66
+ except Exception as e:
67
+ return f"Error loading data: {str(e)}", None, None
68
+
69
+ def calculate_rfm_metrics(self, df):
70
+ """Calculate RFM metrics from transaction data"""
71
+ current_date = df['order_date'].max() + timedelta(days=1)
72
+
73
+ # Group by customer
74
+ customer_metrics = df.groupby('customer_id').agg({
75
+ 'order_date': ['max', 'count'],
76
+ 'amount': ['sum', 'mean']
77
+ }).round(2)
78
+
79
+ customer_metrics.columns = ['last_order_date', 'frequency', 'monetary', 'avg_order_value']
80
+ customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
81
+
82
+ # Merge back with original data
83
+ df_with_rfm = df.merge(customer_metrics[['recency_days', 'frequency', 'monetary']],
84
+ left_on='customer_id', right_index=True, how='left')
85
+
86
+ return df_with_rfm
87
+
88
+ def perform_customer_segmentation(self, df):
89
+ """Perform customer segmentation based on RFM analysis"""
90
+ customer_df = df.groupby('customer_id').agg({
91
+ 'recency_days': 'first',
92
+ 'frequency': 'first',
93
+ 'monetary': 'first'
94
+ }).reset_index()
95
+
96
+ # Create RFM scores (1-5 scale)
97
+ customer_df['R_Score'] = pd.qcut(customer_df['recency_days'].rank(method='first'), 5, labels=[5,4,3,2,1])
98
+ customer_df['F_Score'] = pd.qcut(customer_df['frequency'].rank(method='first'), 5, labels=[1,2,3,4,5])
99
+ customer_df['M_Score'] = pd.qcut(customer_df['monetary'].rank(method='first'), 5, labels=[1,2,3,4,5])
100
+
101
+ # Convert to numeric
102
+ customer_df['R_Score'] = customer_df['R_Score'].astype(int)
103
+ customer_df['F_Score'] = customer_df['F_Score'].astype(int)
104
+ customer_df['M_Score'] = customer_df['M_Score'].astype(int)
105
+
106
+ # Create segments
107
+ def segment_customers(row):
108
+ if row['R_Score'] >= 4 and row['F_Score'] >= 4 and row['M_Score'] >= 4:
109
+ return 'Champions'
110
+ elif row['R_Score'] >= 3 and row['F_Score'] >= 3 and row['M_Score'] >= 3:
111
+ return 'Loyal Customers'
112
+ elif row['R_Score'] >= 3 and row['F_Score'] >= 2:
113
+ return 'Potential Loyalists'
114
+ elif row['R_Score'] >= 4 and row['F_Score'] <= 2:
115
+ return 'New Customers'
116
+ elif row['R_Score'] <= 2 and row['F_Score'] >= 3:
117
+ return 'At Risk'
118
+ elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] >= 3:
119
+ return 'Cannot Lose Them'
120
+ elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] <= 2:
121
+ return 'Lost Customers'
122
+ else:
123
+ return 'Others'
124
+
125
+ customer_df['Segment'] = customer_df.apply(segment_customers, axis=1)
126
+
127
+ # Calculate churn risk
128
+ customer_df['Churn_Risk'] = customer_df.apply(lambda x:
129
+ 'High' if x['Segment'] in ['Lost Customers', 'At Risk'] else
130
+ 'Medium' if x['Segment'] in ['Others', 'Cannot Lose Them'] else 'Low', axis=1)
131
+
132
+ # Merge segments back to original data
133
+ segment_data = customer_df[['customer_id', 'Segment', 'Churn_Risk', 'R_Score', 'F_Score', 'M_Score']]
134
+ df_with_segments = df.merge(segment_data, on='customer_id', how='left')
135
+
136
+ return df_with_segments
137
+
138
+ def generate_data_summary(self):
139
+ """Generate data summary statistics"""
140
+ if self.df is None:
141
+ return "No data loaded"
142
+
143
+ total_customers = self.df['customer_id'].nunique()
144
+ total_orders = len(self.df)
145
+ total_revenue = self.df['amount'].sum()
146
+ avg_order_value = self.df['amount'].mean()
147
+
148
+ # Segment distribution
149
+ segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
150
+
151
+ summary = f"""
152
+ 📊 **DATA OVERVIEW**
153
+ • Total Customers: {total_customers:,}
154
+ • Total Orders: {total_orders:,}
155
+ • Total Revenue: ${total_revenue:,.2f}
156
+ • Average Order Value: ${avg_order_value:.2f}
157
+
158
+ 🎯 **CUSTOMER SEGMENTS**
159
+ {segment_dist.to_string()}
160
+
161
+ ⚠️ **CHURN ANALYSIS**
162
+ • High Risk: {len(self.df[self.df['Churn_Risk'] == 'High']['customer_id'].unique())} customers
163
+ • Medium Risk: {len(self.df[self.df['Churn_Risk'] == 'Medium']['customer_id'].unique())} customers
164
+ • Low Risk: {len(self.df[self.df['Churn_Risk'] == 'Low']['customer_id'].unique())} customers
165
+ """
166
+
167
+ return summary
168
+
169
+ def train_churn_model(self):
170
+ """Train churn prediction model"""
171
+ if self.df is None:
172
+ return "No data available. Please upload a CSV file first."
173
+
174
+ try:
175
+ # Prepare data for modeling
176
+ customer_features = self.df.groupby('customer_id').agg({
177
+ 'recency_days': 'first',
178
+ 'frequency': 'first',
179
+ 'monetary': 'first',
180
+ 'amount': ['mean', 'std', 'min', 'max'],
181
+ 'order_date': ['min', 'max']
182
+ }).reset_index()
183
+
184
+ # Flatten column names
185
+ customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
186
+ 'avg_amount', 'std_amount', 'min_amount', 'max_amount',
187
+ 'first_order', 'last_order']
188
+
189
+ # Fill NaN values
190
+ customer_features['std_amount'].fillna(0, inplace=True)
191
+
192
+ # Calculate additional features
193
+ customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
194
+ customer_features['customer_lifetime'].fillna(0, inplace=True)
195
+
196
+ # Create churn labels (if not present)
197
+ if 'churn_label' not in self.df.columns:
198
+ # Define churn based on recency (customers who haven't ordered in 90+ days)
199
+ customer_features['churn_label'] = (customer_features['recency_days'] > 90).astype(int)
200
+ else:
201
+ churn_labels = self.df.groupby('customer_id')['churn_label'].first().reset_index()
202
+ customer_features = customer_features.merge(churn_labels, on='customer_id')
203
+
204
+ # Select features for modeling
205
+ feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
206
+ 'min_amount', 'max_amount', 'customer_lifetime']
207
+
208
+ X = customer_features[feature_cols]
209
+ y = customer_features['churn_label']
210
+
211
+ # Split data
212
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
213
+
214
+ # Train XGBoost model
215
+ self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
216
+ self.model.fit(X_train, y_train)
217
+
218
+ # Make predictions
219
+ y_pred = self.model.predict(X_test)
220
+ y_pred_proba = self.model.predict_proba(X_test)[:, 1]
221
+
222
+ # Calculate feature importance
223
+ self.feature_importance = pd.DataFrame({
224
+ 'feature': feature_cols,
225
+ 'importance': self.model.feature_importances_
226
+ }).sort_values('importance', ascending=False)
227
+
228
+ # Generate predictions for all customers
229
+ all_predictions = self.model.predict_proba(X)[:, 1]
230
+ customer_features['churn_probability'] = all_predictions
231
+ self.predictions = customer_features
232
+
233
+ # Model performance
234
+ accuracy = accuracy_score(y_test, y_pred)
235
+
236
+ return f"""
237
+ 🤖 **MODEL TRAINING COMPLETED**
238
+ • Model: XGBoost Classifier
239
+ • Accuracy: {accuracy:.3f}
240
+ • Features Used: {len(feature_cols)}
241
+ • Training Samples: {len(X_train)}
242
+ • Test Samples: {len(X_test)}
243
+
244
+ 🔍 **TOP FEATURES**
245
+ {self.feature_importance.head().to_string(index=False)}
246
+ """
247
+
248
+ except Exception as e:
249
+ return f"Error training model: {str(e)}"
250
+
251
+ def create_visualizations(self):
252
+ """Create comprehensive visualizations"""
253
+ if self.df is None:
254
+ return None, None, None, None
255
+
256
+ fig1 = self.create_segment_analysis()
257
+ fig2 = self.create_rfm_analysis()
258
+ fig3 = self.create_churn_analysis()
259
+ fig4 = self.create_revenue_trends()
260
+
261
+ return fig1, fig2, fig3, fig4
262
+
263
+ def create_segment_analysis(self):
264
+ """Create customer segment analysis visualization"""
265
+ # Customer segment distribution
266
+ segment_data = self.df.groupby('customer_id')['Segment'].first().value_counts().reset_index()
267
+ segment_data.columns = ['Segment', 'Count']
268
+
269
+ fig = px.pie(segment_data, values='Count', names='Segment',
270
+ title='Customer Segment Distribution',
271
+ color_discrete_sequence=px.colors.qualitative.Set3)
272
+
273
+ fig.update_traces(textposition='inside', textinfo='percent+label')
274
+ fig.update_layout(height=400, showlegend=True)
275
+
276
+ return fig
277
+
278
+ def create_rfm_analysis(self):
279
+ """Create RFM analysis visualization"""
280
+ customer_rfm = self.df.groupby('customer_id').agg({
281
+ 'recency_days': 'first',
282
+ 'frequency': 'first',
283
+ 'monetary': 'first',
284
+ 'Segment': 'first'
285
+ }).reset_index()
286
+
287
+ fig = px.scatter_3d(customer_rfm, x='recency_days', y='frequency', z='monetary',
288
+ color='Segment', title='RFM Analysis - 3D Customer Mapping',
289
+ labels={'recency_days': 'Recency (Days)',
290
+ 'frequency': 'Frequency (Orders)',
291
+ 'monetary': 'Monetary (Revenue)'})
292
+
293
+ fig.update_layout(height=500)
294
+ return fig
295
+
296
+ def create_churn_analysis(self):
297
+ """Create churn risk analysis"""
298
+ if self.predictions is not None:
299
+ fig = px.histogram(self.predictions, x='churn_probability', nbins=20,
300
+ title='Churn Probability Distribution',
301
+ labels={'churn_probability': 'Churn Probability',
302
+ 'count': 'Number of Customers'})
303
+
304
+ fig.add_vline(x=0.5, line_dash="dash", line_color="red",
305
+ annotation_text="High Risk Threshold")
306
+ fig.update_layout(height=400)
307
+ return fig
308
+ else:
309
+ # Fallback to risk level distribution
310
+ risk_data = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts().reset_index()
311
+ risk_data.columns = ['Risk_Level', 'Count']
312
+
313
+ colors_map = {'High': 'red', 'Medium': 'orange', 'Low': 'green'}
314
+ fig = px.bar(risk_data, x='Risk_Level', y='Count',
315
+ title='Customer Churn Risk Distribution',
316
+ color='Risk_Level', color_discrete_map=colors_map)
317
+ fig.update_layout(height=400, showlegend=False)
318
+ return fig
319
+
320
+ def create_revenue_trends(self):
321
+ """Create revenue trend analysis"""
322
+ # Monthly revenue trends
323
+ self.df['order_month'] = self.df['order_date'].dt.to_period('M')
324
+ monthly_revenue = self.df.groupby('order_month')['amount'].sum().reset_index()
325
+ monthly_revenue['order_month'] = monthly_revenue['order_month'].astype(str)
326
+
327
+ fig = px.line(monthly_revenue, x='order_month', y='amount',
328
+ title='Monthly Revenue Trends',
329
+ labels={'amount': 'Revenue ($)', 'order_month': 'Month'})
330
+
331
+ fig.update_layout(height=400, xaxis_tickangle=-45)
332
+ return fig
333
+
334
+ def generate_pdf_report(self):
335
+ """Generate comprehensive PDF report"""
336
+ if self.df is None:
337
+ return None
338
+
339
+ try:
340
+ buffer = io.BytesIO()
341
+ doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72,
342
+ topMargin=72, bottomMargin=18)
343
+
344
+ styles = getSampleStyleSheet()
345
+ title_style = ParagraphStyle(
346
+ 'CustomTitle',
347
+ parent=styles['Heading1'],
348
+ fontSize=24,
349
+ spaceAfter=30,
350
+ textColor=colors.darkblue,
351
+ alignment=1 # Center alignment
352
+ )
353
+
354
+ story = []
355
+
356
+ # Title
357
+ story.append(Paragraph("B2B Customer Analytics Report", title_style))
358
+ story.append(Spacer(1, 20))
359
+
360
+ # Executive Summary
361
+ story.append(Paragraph("Executive Summary", styles['Heading2']))
362
+
363
+ total_customers = self.df['customer_id'].nunique()
364
+ total_revenue = self.df['amount'].sum()
365
+ avg_order_value = self.df['amount'].mean()
366
+ high_risk_customers = len(self.df[self.df['Churn_Risk'] == 'High']['customer_id'].unique())
367
+
368
+ summary_text = f"""
369
+ This report provides a comprehensive analysis of {total_customers} B2B customers based on their
370
+ transaction history and behavioral patterns. The analysis reveals total revenue of ${total_revenue:,.2f}
371
+ with an average order value of ${avg_order_value:.2f}.
372
+
373
+ Key findings indicate {high_risk_customers} customers are at high risk of churning, requiring
374
+ immediate attention to prevent revenue loss. The customer segmentation analysis identifies
375
+ opportunities for targeted marketing and retention strategies.
376
+ """
377
+
378
+ story.append(Paragraph(summary_text, styles['Normal']))
379
+ story.append(Spacer(1, 20))
380
+
381
+ # Key Metrics Table
382
+ story.append(Paragraph("Key Performance Metrics", styles['Heading2']))
383
+
384
+ segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
385
+ risk_dist = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts()
386
+
387
+ metrics_data = [
388
+ ['Metric', 'Value'],
389
+ ['Total Customers', f"{total_customers:,}"],
390
+ ['Total Revenue', f"${total_revenue:,.2f}"],
391
+ ['Average Order Value', f"${avg_order_value:.2f}"],
392
+ ['Champions', f"{segment_dist.get('Champions', 0)}"],
393
+ ['At Risk Customers', f"{segment_dist.get('At Risk', 0)}"],
394
+ ['High Risk Churn', f"{risk_dist.get('High', 0)}"],
395
+ ['Low Risk Churn', f"{risk_dist.get('Low', 0)}"]
396
+ ]
397
+
398
+ metrics_table = Table(metrics_data, colWidths=[3*inch, 2*inch])
399
+ metrics_table.setStyle(TableStyle([
400
+ ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
401
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
402
+ ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
403
+ ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
404
+ ('FONTSIZE', (0, 0), (-1, 0), 14),
405
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
406
+ ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
407
+ ('GRID', (0, 0), (-1, -1), 1, colors.black)
408
+ ]))
409
+
410
+ story.append(metrics_table)
411
+ story.append(Spacer(1, 20))
412
+
413
+ # Customer Segments Analysis
414
+ story.append(Paragraph("Customer Segmentation Analysis", styles['Heading2']))
415
+
416
+ segment_analysis = """
417
+ Customer segmentation based on RFM (Recency, Frequency, Monetary) analysis reveals distinct
418
+ customer groups with different behavioral patterns and value propositions:
419
+
420
+ • Champions: High-value customers who buy frequently and recently
421
+ • Loyal Customers: Consistent buyers with good purchase history
422
+ • At Risk: Previously good customers showing declining engagement
423
+ • Lost Customers: Haven't purchased recently, need win-back campaigns
424
+ """
425
+
426
+ story.append(Paragraph(segment_analysis, styles['Normal']))
427
+ story.append(Spacer(1, 20))
428
+
429
+ # Recommendations
430
+ story.append(Paragraph("Strategic Recommendations", styles['Heading2']))
431
+
432
+ recommendations = """
433
+ Based on the analysis, we recommend the following actions:
434
+
435
+ 1. Immediate Attention: Contact high-risk customers within 48 hours to prevent churn
436
+ 2. Retention Programs: Develop targeted campaigns for 'At Risk' segment customers
437
+ 3. Loyalty Rewards: Enhance programs for Champions and Loyal Customers to maintain engagement
438
+ 4. Win-back Campaigns: Create special offers for Lost Customers to reactivate them
439
+ 5. Predictive Monitoring: Implement real-time churn prediction alerts
440
+ """
441
+
442
+ story.append(Paragraph(recommendations, styles['Normal']))
443
+
444
+ # Build PDF
445
+ doc.build(story)
446
+ buffer.seek(0)
447
+
448
+ return buffer.getvalue()
449
+
450
+ except Exception as e:
451
+ print(f"Error generating PDF: {str(e)}")
452
+ return None
453
+
454
+ # Initialize the analytics engine
455
+ analytics = B2BCustomerAnalytics()
456
+
457
+ def process_file(file):
458
+ """Process uploaded file and return analysis"""
459
+ if file is None:
460
+ return "Please upload a CSV file", "", None, None, None, None, None
461
+
462
+ # Load and process data
463
+ status, summary, preview = analytics.load_and_process_data(file)
464
+
465
+ if "successfully" in status:
466
+ # Train model
467
+ model_results = analytics.train_churn_model()
468
+
469
+ # Create visualizations
470
+ fig1, fig2, fig3, fig4 = analytics.create_visualizations()
471
+
472
+ return status, summary, preview, model_results, fig1, fig2, fig3, fig4
473
+ else:
474
+ return status, summary, preview, "", None, None, None, None
475
+
476
+ def download_report():
477
+ """Generate and return PDF report"""
478
+ pdf_data = analytics.generate_pdf_report()
479
+ if pdf_data:
480
+ return pdf_data
481
+ else:
482
+ return None
483
+
484
+ # Create Gradio Interface
485
+ with gr.Blocks(title="B2B Customer Analytics", theme=gr.themes.Soft()) as app:
486
+ gr.Markdown("""
487
+ # 🏢 B2B Customer Analytics Platform
488
+
489
+ Upload your customer transaction data (CSV format) to get comprehensive insights including:
490
+ - **Customer Segmentation** (RFM Analysis)
491
+ - **Churn Prediction** (ML-powered)
492
+ - **Revenue Analysis** & Trends
493
+ - **Strategic Recommendations**
494
+ - **Downloadable PDF Report**
495
+
496
+ ### Required CSV Format:
497
+ `customer_id, order_id, order_date, amount` (minimum required columns)
498
+
499
+ Optional columns: `recency_days, frequency, monetary, churn_label`
500
+ """)
501
+
502
+ with gr.Row():
503
+ with gr.Column():
504
+ file_input = gr.File(label="Upload Customer Data (CSV)", file_types=[".csv"])
505
+ analyze_btn = gr.Button("🔍 Analyze Customer Data", variant="primary", size="lg")
506
+
507
+ with gr.Column():
508
+ download_btn = gr.Button("📄 Download PDF Report", variant="secondary", size="lg")
509
+ pdf_output = gr.File(label="PDF Report", visible=False)
510
+
511
+ # Status and Summary
512
+ with gr.Row():
513
+ status_output = gr.Textbox(label="Status", interactive=False)
514
+ summary_output = gr.Markdown(label="Data Summary")
515
+
516
+ # Data Preview
517
+ data_preview = gr.Dataframe(label="Data Preview", interactive=False)
518
+
519
+ # Model Results
520
+ model_output = gr.Markdown(label="Model Training Results")
521
+
522
+ # Visualizations
523
+ with gr.Row():
524
+ with gr.Column():
525
+ plot1 = gr.Plot(label="Customer Segments")
526
+ plot3 = gr.Plot(label="Churn Analysis")
527
+ with gr.Column():
528
+ plot2 = gr.Plot(label="RFM Analysis")
529
+ plot4 = gr.Plot(label="Revenue Trends")
530
+
531
+ # Event handlers
532
+ analyze_btn.click(
533
+ fn=process_file,
534
+ inputs=[file_input],
535
+ outputs=[status_output, summary_output, data_preview, model_output, plot1, plot2, plot3, plot4]
536
+ )
537
+
538
+ download_btn.click(
539
+ fn=download_report,
540
+ outputs=[pdf_output]
541
+ )
542
+
543
+ if __name__ == "__main__":
544
+ app.launch()