Spaces:

entropy25
/

customer

Sleeping

App Files Files Community

entropy25 commited on Aug 29, 2025

Commit

7ecef08

verified ·

1 Parent(s): 61d745b

Update app.py

Browse files

Files changed (1) hide show

app.py +797 -443

app.py CHANGED Viewed

@@ -6,16 +6,19 @@ import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.model_selection import train_test_split, cross_val_score
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
 import plotly.express as px
 import plotly.graph_objects as go
 from datetime import datetime, timedelta
 import io
 import base64
 import warnings
 warnings.filterwarnings('ignore')
-# Optional imports with fallbacks
 try:
     import xgboost as xgb
     XGBOOST_AVAILABLE = True
@@ -28,157 +31,178 @@ try:
     from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
     from reportlab.lib.units import inch
     from reportlab.lib import colors
-    from reportlab.graphics.shapes import Drawing
-    from reportlab.graphics.charts.piecharts import Pie
-    from reportlab.graphics.charts.barcharts import VerticalBarChart
-    from reportlab.graphics import renderPDF
     REPORTLAB_AVAILABLE = True
 except ImportError:
     REPORTLAB_AVAILABLE = False
-# Configuration
-CONFIG = {
     'churn_threshold_days': 90,
     'high_risk_probability': 0.7,
     'rfm_quantiles': 5,
-    'min_customers_for_training': 10
 }
 COLORS = {
     'primary': '#6366f1',
-    'success': '#10b981',
     'warning': '#f59e0b',
     'danger': '#ef4444',
-    'purple': '#8b5cf6'
 }
 class DataProcessor:
-    """Handles data loading, cleaning, and validation"""
     @staticmethod
-    def load_and_validate(file_path):
         """Load and validate CSV file"""
-        df = pd.read_csv(file_path)
-        # Column mapping
-        column_map = DataProcessor._map_columns(df.columns)
-        df = df.rename(columns=column_map)
-        # Data cleaning
-        df = DataProcessor._clean_data(df)
-        return df
     @staticmethod
-    def _map_columns(columns):
-        """Map various column name formats to standard names"""
         mapping = {}
-        columns_lower = [col.lower().strip() for col in columns]
-        variations = {
-            'customer_id': ['customer', 'cust_id', 'id', 'customerid', 'client_id'],
-            'order_date': ['date', 'orderdate', 'purchase_date', 'transaction_date'],
-            'amount': ['revenue', 'value', 'price', 'total', 'sales', 'order_value']
         }
-        for standard_name, variants in variations.items():
-            for col, col_lower in zip(columns, columns_lower):
-                if (standard_name in col_lower or
-                    any(variant in col_lower for variant in variants)):
-                    mapping[col] = standard_name
                     break
         return mapping
     @staticmethod
-    def _clean_data(df):
-        """Clean and convert data types"""
-        required_cols = ['customer_id', 'order_date', 'amount']
-        # Check required columns
-        missing_cols = [col for col in required_cols if col not in df.columns]
-        if missing_cols:
-            raise ValueError(f"Missing columns: {missing_cols}")
-        # Convert data types
         df['customer_id'] = df['customer_id'].astype(str)
         df['order_date'] = pd.to_datetime(df['order_date'], errors='coerce')
         df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
         # Remove invalid rows
-        df = df.dropna(subset=required_cols)
-        df = df[df['amount'] > 0]  # Remove negative/zero amounts
         return df
-class FeatureEngineering:
-    """Advanced feature engineering for customer analytics"""
     @staticmethod
-    def calculate_rfm_features(df):
-        """Calculate RFM and additional behavioral features"""
         current_date = df['order_date'].max() + timedelta(days=1)
-        # Basic RFM
-        customer_features = df.groupby('customer_id').agg({
-            'order_date': ['min', 'max', 'count'],
             'amount': ['sum', 'mean', 'std', 'min', 'max']
         })
-        # Flatten columns
-        customer_features.columns = [
-            'first_order', 'last_order', 'frequency',
-            'monetary', 'avg_amount', 'std_amount', 'min_amount', 'max_amount'
         ]
-        # Calculate derived features
-        customer_features['recency_days'] = (current_date - customer_features['last_order']).dt.days
-        customer_features['customer_lifetime_days'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
-        customer_features['std_amount'] = customer_features['std_amount'].fillna(0)
-        # Behavioral features
-        customer_features['order_frequency'] = customer_features['frequency'] / (customer_features['customer_lifetime_days'] + 1)
-        customer_features['amount_trend'] = customer_features['max_amount'] / customer_features['min_amount']
-        customer_features['amount_consistency'] = 1 - (customer_features['std_amount'] / customer_features['avg_amount']).fillna(0)
-        return customer_features.reset_index()
 class CustomerSegmenter:
-    """Customer segmentation using RFM analysis"""
     @staticmethod
-    def perform_segmentation(customer_features):
-        """Segment customers based on RFM scores"""
-        df = customer_features.copy()
         # Calculate RFM scores
-        if len(df) >= CONFIG['rfm_quantiles']:
-            df['r_score'] = pd.qcut(df['recency_days'], CONFIG['rfm_quantiles'],
-                                   labels=[5,4,3,2,1], duplicates='drop')
-            df['f_score'] = pd.qcut(df['frequency'], CONFIG['rfm_quantiles'],
-                                   labels=[1,2,3,4,5], duplicates='drop')
-            df['m_score'] = pd.qcut(df['monetary'], CONFIG['rfm_quantiles'],
-                                   labels=[1,2,3,4,5], duplicates='drop')
         else:
-            # Simple scoring for small datasets
-            df['r_score'] = pd.cut(df['recency_days'], bins=3, labels=[3,2,1])
-            df['f_score'] = pd.cut(df['frequency'], bins=3, labels=[1,2,3])
-            df['m_score'] = pd.cut(df['monetary'], bins=3, labels=[1,2,3])
-        # Convert to numeric
-        for col in ['r_score', 'f_score', 'm_score']:
             df[col] = pd.to_numeric(df[col], errors='coerce').fillna(3).astype(int)
-        # Segment assignment
-        df['segment'] = df.apply(CustomerSegmenter._assign_segment, axis=1)
-        df['churn_risk'] = df['segment'].map(CustomerSegmenter._get_risk_mapping())
         return df
     @staticmethod
-    def _assign_segment(row):
         """Assign customer segment based on RFM scores"""
-        r, f, m = row['r_score'], row['f_score'], row['m_score']
         if r >= 4 and f >= 4 and m >= 4:
             return 'Champions'
@@ -191,59 +215,77 @@ class CustomerSegmenter:
         elif r <= 2 and f >= 3:
             return 'At Risk'
         elif r <= 2 and f <= 2 and m >= 3:
-            return 'Cannot Lose'
         elif r <= 2 and f <= 2 and m <= 2:
-            return 'Lost'
         else:
             return 'Others'
     @staticmethod
-    def _get_risk_mapping():
-        """Map segments to risk levels"""
-        return {
-            'Champions': 'Low',
-            'Loyal Customers': 'Low',
-            'Potential Loyalists': 'Medium',
-            'New Customers': 'Low',
-            'At Risk': 'High',
-            'Cannot Lose': 'High',
-            'Lost': 'High',
-            'Others': 'Medium'
-        }
 class ChurnPredictor:
-    """Machine learning model for churn prediction"""
     def __init__(self):
         self.model = None
         self.feature_importance = None
-    def train(self, customer_features):
         """Train churn prediction model"""
-        df = customer_features.copy()
-        # Create target variable
-        df['churn_label'] = (df['recency_days'] > CONFIG['churn_threshold_days']).astype(int)
-        # Validate data
-        if len(df) < CONFIG['min_customers_for_training']:
-            raise ValueError(f"Insufficient data: need at least {CONFIG['min_customers_for_training']} customers")
-        if df['churn_label'].nunique() < 2:
-            raise ValueError("All customers have same churn status - cannot train model")
-        # Select features
-        feature_cols = [
-            'recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
-            'customer_lifetime_days', 'order_frequency', 'amount_trend', 'amount_consistency'
-        ]
-        X = df[feature_cols].fillna(0)
-        y = df['churn_label']
-        # Train model
-        self.model = self._get_best_model()
-        self.model.fit(X, y)
         # Feature importance
         self.feature_importance = pd.DataFrame({
@@ -251,181 +293,201 @@ class ChurnPredictor:
             'importance': self.model.feature_importances_
         }).sort_values('importance', ascending=False)
-        # Model evaluation
-        cv_scores = cross_val_score(self.model, X, y, cv=5, scoring='roc_auc')
-        # Predictions for all customers
-        df['churn_probability'] = self.model.predict_proba(X)[:, 1]
-        return {
-            'model_type': type(self.model).__name__,
-            'cv_auc_mean': cv_scores.mean(),
-            'cv_auc_std': cv_scores.std(),
-            'feature_importance': self.feature_importance,
-            'predictions': df
         }
-    def _get_best_model(self):
-        """Select best available model"""
-        if XGBOOST_AVAILABLE:
-            try:
-                return xgb.XGBClassifier(random_state=42, eval_metric='logloss')
-            except:
-                pass
-        return RandomForestClassifier(random_state=42, n_estimators=100)
-class Visualizer:
-    """Create interactive visualizations"""
     @staticmethod
-    def create_segment_chart(df):
-        """Customer segment distribution"""
-        segment_counts = df['segment'].value_counts()
         fig = px.pie(
-            values=segment_counts.values,
-            names=segment_counts.index,
             title='Customer Segment Distribution',
             hole=0.4,
-            color_discrete_sequence=px.colors.qualitative.Set3
         )
-        fig.update_layout(height=400, title_x=0.5)
         return fig
     @staticmethod
-    def create_rfm_scatter(df):
-        """RFM behavior matrix"""
         fig = px.scatter(
-            df, x='recency_days', y='frequency', size='monetary',
-            color='segment', title='Customer Behavior Matrix (RFM)',
-            labels={'recency_days': 'Days Since Last Order', 'frequency': 'Order Count'}
         )
-        fig.update_layout(height=400, title_x=0.5)
         return fig
     @staticmethod
-    def create_churn_distribution(df):
-        """Churn probability distribution"""
-        if 'churn_probability' in df.columns:
             fig = px.histogram(
-                df, x='churn_probability', nbins=20,
                 title='Churn Probability Distribution',
-                labels={'churn_probability': 'Churn Probability'}
             )
-            fig.add_vline(x=CONFIG['high_risk_probability'], line_dash="dash",
-                         line_color="red", annotation_text="High Risk Threshold")
         else:
-            risk_counts = df['churn_risk'].value_counts()
-            colors = {'High': COLORS['danger'], 'Medium': COLORS['warning'], 'Low': COLORS['success']}
             fig = px.bar(
-                x=risk_counts.index, y=risk_counts.values,
-                title='Churn Risk Distribution',
-                color=risk_counts.index, color_discrete_map=colors
             )
-        fig.update_layout(height=400, title_x=0.5)
         return fig
     @staticmethod
-    def create_feature_importance_chart(feature_importance):
-        """Feature importance visualization"""
         fig = px.bar(
-            feature_importance.head(8), x='importance', y='feature',
-            orientation='h', title='Feature Importance Analysis',
-            color='importance', color_continuous_scale='viridis'
         )
-        fig.update_layout(height=500, title_x=0.5, yaxis={'categoryorder': 'total ascending'})
         return fig
 class ReportGenerator:
-    """Generate dashboards and PDF reports"""
-    @staticmethod
-    def create_dashboard(df, model_results=None):
-        """Generate HTML dashboard"""
-        total_customers = len(df)
-        total_revenue = df['monetary'].sum()
-        avg_order_value = df['avg_amount'].mean()
-        high_risk_count = len(df[df['churn_risk'] == 'High'])
-        dashboard_html = f"""
-        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
-            <div style="background: linear-gradient(135deg, {COLORS['primary']}, #4f46e5); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
-                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Customers</h3>
-                <div style="font-size: 2.5rem; font-weight: bold;">{total_customers:,}</div>
-            </div>
-            <div style="background: linear-gradient(135deg, {COLORS['success']}, #047857); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
-                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Revenue</h3>
-                <div style="font-size: 2.5rem; font-weight: bold;">${total_revenue/1000:.0f}K</div>
-            </div>
-            <div style="background: linear-gradient(135deg, {COLORS['purple']}, #6d28d9); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
-                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Avg Order Value</h3>
-                <div style="font-size: 2.5rem; font-weight: bold;">${avg_order_value:.0f}</div>
-            </div>
-            <div style="background: linear-gradient(135deg, {COLORS['danger']}, #dc2626); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
-                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">High Risk</h3>
-                <div style="font-size: 2.5rem; font-weight: bold;">{high_risk_count}</div>
-            </div>
-        </div>
-        """
-        if model_results:
-            dashboard_html += f"""
-            <div style="background: #f8fafc; padding: 1.5rem; border-radius: 12px; border-left: 4px solid {COLORS['primary']}; margin-top: 1rem;">
-                <h4 style="margin: 0 0 1rem 0; color: #374151;">Model Performance</h4>
-                <p><strong>Model:</strong> {model_results['model_type']}</p>
-                <p><strong>Cross-validation AUC:</strong> {model_results['cv_auc_mean']:.3f} ± {model_results['cv_auc_std']:.3f}</p>
-            </div>
-            """
-        return dashboard_html
     @staticmethod
-    def generate_pdf_report(df, model_results=None):
-        """Generate comprehensive PDF report"""
         if not REPORTLAB_AVAILABLE:
-            raise ImportError("ReportLab is required for PDF generation")
         buffer = io.BytesIO()
-        doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72,
-                               topMargin=72, bottomMargin=18)
         styles = getSampleStyleSheet()
         story = []
         # Title
         title_style = ParagraphStyle('CustomTitle', parent=styles['Title'],
-                                    fontSize=24, spaceAfter=30, alignment=1)
         story.append(Paragraph("B2B Customer Analytics Report", title_style))
         story.append(Spacer(1, 12))
-        # Executive Summary
         story.append(Paragraph("Executive Summary", styles['Heading2']))
-        total_customers = len(df)
-        total_revenue = df['monetary'].sum()
-        avg_revenue = df['monetary'].mean()
         summary_text = f"""
-        <para>This comprehensive analysis covers <b>{total_customers:,}</b> customers with
-        total revenue of <b>${total_revenue:,.0f}</b>. The average customer lifetime value
-        is <b>${avg_revenue:.0f}</b>.</para>
-        <para>Customers have been segmented using advanced RFM analysis, and machine learning
-        models have been applied for churn prediction.</para>
         """
         story.append(Paragraph(summary_text, styles['Normal']))
-        story.append(Spacer(1, 12))
-        # Customer Segments
-        story.append(Paragraph("Customer Segmentation", styles['Heading2']))
-        segment_data = df['segment'].value_counts()
-        segment_table_data = [['Segment', 'Count', 'Percentage']]
-        for segment, count in segment_data.items():
-            percentage = f"{count/len(df)*100:.1f}%"
-            segment_table_data.append([segment, str(count), percentage])
-        segment_table = Table(segment_table_data)
         segment_table.setStyle(TableStyle([
             ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
             ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
@@ -437,256 +499,548 @@ class ReportGenerator:
             ('GRID', (0, 0), (-1, -1), 1, colors.black)
         ]))
         story.append(segment_table)
-        story.append(Spacer(1, 12))
-        # Model Performance
-        if model_results:
-            story.append(Paragraph("Churn Prediction Model", styles['Heading2']))
             model_text = f"""
-            <para><b>Model Type:</b> {model_results['model_type']}</para>
-            <para><b>Cross-validation AUC:</b> {model_results['cv_auc_mean']:.3f} ± {model_results['cv_auc_std']:.3f}</para>
-            <para>The model uses advanced feature engineering including behavioral patterns
-            and customer lifecycle metrics for accurate churn prediction.</para>
             """
             story.append(Paragraph(model_text, styles['Normal']))
-            story.append(Spacer(1, 12))
-            # Top features
-            if not model_results['feature_importance'].empty:
-                story.append(Paragraph("Key Predictive Features", styles['Heading3']))
-                feature_table_data = [['Feature', 'Importance']]
-                for _, row in model_results['feature_importance'].head(5).iterrows():
-                    feature_table_data.append([row['feature'].replace('_', ' ').title(), f"{row['importance']:.3f}"])
-                feature_table = Table(feature_table_data)
-                feature_table.setStyle(TableStyle([
-                    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
-                    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
-                    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
-                    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-                    ('GRID', (0, 0), (-1, -1), 1, colors.black)
-                ]))
-                story.append(feature_table)
-        # Build PDF
         doc.build(story)
         pdf_bytes = buffer.getvalue()
         buffer.close()
         return pdf_bytes
-class B2BAnalyticsApp:
-    """Main application orchestrator"""
     def __init__(self):
         self.raw_data = None
-        self.customer_features = None
-        self.segmented_data = None
-        self.model_results = None
-        self.predictor = ChurnPredictor()
-    def load_data(self, file):
-        """Load and process uploaded file"""
-        try:
-            if file is None:
-                return "Please upload a CSV file", None, None
-            # Load and process data
-            self.raw_data = DataProcessor.load_and_validate(file.name)
-            self.customer_features = FeatureEngineering.calculate_rfm_features(self.raw_data)
-            self.segmented_data = CustomerSegmenter.perform_segmentation(self.customer_features)
             # Generate dashboard
-            dashboard = ReportGenerator.create_dashboard(self.segmented_data)
-            preview = self.segmented_data.head(20)
-            status = f"Successfully processed {len(self.segmented_data)} customers from {len(self.raw_data)} transactions"
-            return status, dashboard, preview
-        except Exception as e:
-            return f"Error: {str(e)}", None, None
-    def train_churn_model(self):
         """Train churn prediction model"""
-        try:
-            if self.segmented_data is None:
-                return "Please load data first", None
-            self.model_results = self.predictor.train(self.segmented_data)
-            # Update dashboard with model results
-            dashboard = ReportGenerator.create_dashboard(self.segmented_data, self.model_results)
-            # Create feature importance chart
-            importance_chart = Visualizer.create_feature_importance_chart(
-                self.model_results['feature_importance']
             )
-            return dashboard, importance_chart
-        except Exception as e:
-            return f"Error: {str(e)}", None
-    def create_visualizations(self):
-        """Generate all visualization charts"""
-        if self.segmented_data is None:
-            return None, None, None
-        try:
-            # Use predictions if available, otherwise use segmented data
-            data_for_viz = (self.model_results['predictions'] if self.model_results
-                           else self.segmented_data)
-            segment_chart = Visualizer.create_segment_chart(data_for_viz)
-            rfm_chart = Visualizer.create_rfm_scatter(data_for_viz)
-            churn_chart = Visualizer.create_churn_distribution(data_for_viz)
-            return segment_chart, rfm_chart, churn_chart
-        except Exception as e:
-            print(f"Visualization error: {e}")
-            return None, None, None
-    def get_customer_summary_table(self):
-        """Generate customer summary table"""
-        if self.segmented_data is None:
             return None
-        try:
-            display_data = self.segmented_data.copy()
-            # Add predictions if available
-            if self.model_results:
-                pred_data = self.model_results['predictions']
-                display_data = display_data.merge(
-                    pred_data[['customer_id', 'churn_probability']],
-                    on='customer_id', how='left'
-                )
-                display_data['churn_probability'] = (display_data['churn_probability'] * 100).round(1)
-            else:
-                display_data['churn_probability'] = 50.0
-            # Select and format columns
-            summary_table = display_data[[
-                'customer_id', 'segment', 'churn_risk', 'recency_days',
-                'frequency', 'monetary', 'avg_amount', 'churn_probability'
-            ]].round(2)
-            summary_table.columns = [
-                'Customer ID', 'Segment', 'Risk Level', 'Recency (Days)',
-                'Orders', 'Total Revenue ($)', 'Avg Order ($)', 'Churn Risk (%)'
-            ]
-            return summary_table.head(100)
-        except Exception as e:
-            print(f"Table generation error: {e}")
-            return None
-    def generate_pdf_report(self):
-        """Generate and return PDF report"""
-        try:
-            if self.segmented_data is None:
-                return None
-            pdf_bytes = ReportGenerator.generate_pdf_report(
-                self.segmented_data, self.model_results
-            )
-            # Save to temporary file for download
-            import tempfile
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
-                tmp_file.write(pdf_bytes)
-                return tmp_file.name
-        except Exception as e:
-            print(f"PDF generation error: {e}")
-            return None
-def create_interface():
-    """Create Gradio interface"""
-    app = B2BAnalyticsApp()
-    with gr.Blocks(theme=gr.themes.Soft(), title="B2B Customer Analytics") as demo:
         gr.HTML("""
-        <div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
-                    padding: 2rem; border-radius: 1rem; color: white; text-align: center; margin-bottom: 2rem;">
-            <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">
-                B2B Customer Analytics Platform
-            </h1>
-            <p style="font-size: 1.1rem; opacity: 0.9;">
-                Advanced Customer Segmentation & Churn Prediction
-            </p>
         </div>
         """)
         with gr.Tabs():
-            # Data Upload Tab
-            with gr.Tab("Data Upload & Dashboard"):
                 with gr.Row():
-                    file_input = gr.File(label="Upload Customer Data CSV", file_types=[".csv"])
-                    load_btn = gr.Button("Load & Process Data", variant="primary", size="lg")
-                load_status = gr.Textbox(label="Status", interactive=False)
-                dashboard_display = gr.HTML()
-                data_preview = gr.DataFrame(label="Data Preview")
-            # Segmentation Tab
-            with gr.Tab("Customer Segmentation"):
                 with gr.Row():
-                    segment_chart = gr.Plot(label="Customer Segments")
-                    rfm_chart = gr.Plot(label="RFM Analysis")
-                customer_table = gr.DataFrame(label="Customer Summary")
-            # Churn Prediction Tab
-            with gr.Tab("Churn Prediction"):
-                train_btn = gr.Button("Train Churn Model", variant="primary", size="lg")
-                model_dashboard = gr.HTML()
                 with gr.Row():
-                    importance_chart = gr.Plot(label="Feature Importance")
-                    churn_dist_chart = gr.Plot(label="Churn Risk Distribution")
-            # Reports Tab
-            with gr.Tab("Reports"):
-                report_btn = gr.Button("Generate PDF Report", variant="primary", size="lg")
-                report_status = gr.Textbox(label="Status", interactive=False)
-                report_file = gr.File(label="Download Report")
-        # Event handlers
-        def load_and_visualize(file):
-            status, dashboard, preview = app.load_data(file)
-            if "Successfully" in status:
-                charts = app.create_visualizations()
-                table = app.get_customer_summary_table()
-                return status, dashboard, preview, charts[0], charts[1], table
-            return status, dashboard, preview, None, None, None
-        def train_and_update():
-            dashboard, importance = app.train_churn_model()
-            if "Error" not in dashboard:
-                charts = app.create_visualizations()
-                return dashboard, importance, charts[2]
-            return dashboard, importance, None
-        def generate_report():
-            report_path = app.generate_pdf_report()
-            if report_path:
-                return "PDF report generated successfully", report_path
-            return "Error generating PDF report", None
-        # Connect events
         load_btn.click(
-            fn=load_and_visualize,
-            inputs=[file_input],
-            outputs=[load_status, dashboard_display, data_preview,
-                    segment_chart, rfm_chart, customer_table]
         )
         train_btn.click(
-            fn=train_and_update,
-            outputs=[model_dashboard, importance_chart, churn_dist_chart]
-        )

 import seaborn as sns
 from sklearn.model_selection import train_test_split, cross_val_score
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, precision_recall_curve
 import plotly.express as px
 import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import plotly.io as pio
 from datetime import datetime, timedelta
 import io
 import base64
 import warnings
+from typing import Optional, Tuple, Dict, Any
 warnings.filterwarnings('ignore')
+# Try importing optional dependencies
 try:
     import xgboost as xgb
     XGBOOST_AVAILABLE = True
     from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
     from reportlab.lib.units import inch
     from reportlab.lib import colors
     REPORTLAB_AVAILABLE = True
 except ImportError:
     REPORTLAB_AVAILABLE = False
+# Business configuration
+BUSINESS_CONFIG = {
     'churn_threshold_days': 90,
     'high_risk_probability': 0.7,
     'rfm_quantiles': 5,
+    'min_customers_for_model': 10
 }
+# UI color scheme
 COLORS = {
     'primary': '#6366f1',
+    'success': '#10b981',
     'warning': '#f59e0b',
     'danger': '#ef4444',
+    'purple': '#8b5cf6',
+    'pink': '#ec4899',
+    'blue': '#3b82f6',
+    'indigo': '#6366f1'
 }
 class DataProcessor:
+    """Handles data loading, validation, and preprocessing"""
     @staticmethod
+    def load_and_validate(file) -> Tuple[Optional[pd.DataFrame], str]:
         """Load and validate CSV file"""
+        if file is None:
+            return None, "Please upload a CSV file"
+        try:
+            df = pd.read_csv(file.name)
+            # Flexible column mapping
+            column_mapping = DataProcessor._map_columns(df.columns)
+            if not column_mapping:
+                return None, f"Required columns not found. Available: {list(df.columns)}"
+            df = df.rename(columns=column_mapping)
+            # Clean and validate data
+            initial_rows = len(df)
+            df = DataProcessor._clean_data(df)
+            final_rows = len(df)
+            if final_rows == 0:
+                return None, "No valid data after cleaning"
+            status = f"Data loaded successfully! {final_rows} records from {df['customer_id'].nunique()} customers"
+            if initial_rows != final_rows:
+                status += f" ({initial_rows - final_rows} invalid rows removed)"
+            return df, status
+        except Exception as e:
+            return None, f"Error loading data: {str(e)}"
     @staticmethod
+    def _map_columns(columns) -> Dict[str, str]:
+        """Map CSV columns to standard names"""
+        required = ['customer_id', 'order_date', 'amount']
         mapping = {}
+        column_variations = {
+            'customer_id': ['customer', 'cust_id', 'id', 'customerid', 'client_id', 'customer_id'],
+            'order_date': ['date', 'order_date', 'orderdate', 'purchase_date', 'transaction_date'],
+            'amount': ['revenue', 'value', 'price', 'total', 'sales', 'order_value', 'amount']
         }
+        for req_col in required:
+            found = False
+            for col in columns:
+                col_lower = col.lower().strip()
+                if col_lower == req_col or any(var in col_lower for var in column_variations[req_col]):
+                    mapping[col] = req_col
+                    found = True
                     break
+            if not found:
+                return {}
         return mapping
     @staticmethod
+    def _clean_data(df: pd.DataFrame) -> pd.DataFrame:
+        """Clean and prepare data"""
+        df = df.copy()
         df['customer_id'] = df['customer_id'].astype(str)
         df['order_date'] = pd.to_datetime(df['order_date'], errors='coerce')
         df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
         # Remove invalid rows
+        df = df.dropna(subset=['customer_id', 'order_date', 'amount'])
+        df = df[df['amount'] > 0]  # Remove negative amounts
         return df
+class RFMAnalyzer:
+    """Handles RFM analysis and customer metrics calculation"""
     @staticmethod
+    def calculate_rfm_metrics(df: pd.DataFrame) -> pd.DataFrame:
+        """Calculate RFM metrics for customers"""
         current_date = df['order_date'].max() + timedelta(days=1)
+        customer_metrics = df.groupby('customer_id').agg({
+            'order_date': ['max', 'count', 'min'],
             'amount': ['sum', 'mean', 'std', 'min', 'max']
         })
+        # Flatten column names
+        customer_metrics.columns = [
+            'last_order_date', 'frequency', 'first_order_date',
+            'monetary', 'avg_order_value', 'std_amount', 'min_amount', 'max_amount'
         ]
+        # Calculate additional features
+        customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
+        customer_metrics['customer_lifetime_days'] = (
+            customer_metrics['last_order_date'] - customer_metrics['first_order_date']
+        ).dt.days
+        customer_metrics['std_amount'] = customer_metrics['std_amount'].fillna(0)
+        customer_metrics['customer_lifetime_days'] = customer_metrics['customer_lifetime_days'].fillna(0)
+        return customer_metrics.reset_index()
 class CustomerSegmenter:
+    """Handles customer segmentation based on RFM analysis"""
     @staticmethod
+    def perform_segmentation(customer_metrics: pd.DataFrame) -> pd.DataFrame:
+        """Segment customers using RFM scores"""
+        df = customer_metrics.copy()
         # Calculate RFM scores
+        if len(df) >= BUSINESS_CONFIG['rfm_quantiles']:
+            try:
+                df['R_Score'] = pd.qcut(df['recency_days'], BUSINESS_CONFIG['rfm_quantiles'],
+                                      labels=[5,4,3,2,1], duplicates='drop')
+                df['F_Score'] = pd.qcut(df['frequency'], BUSINESS_CONFIG['rfm_quantiles'],
+                                      labels=[1,2,3,4,5], duplicates='drop')
+                df['M_Score'] = pd.qcut(df['monetary'], BUSINESS_CONFIG['rfm_quantiles'],
+                                      labels=[1,2,3,4,5], duplicates='drop')
+            except ValueError:
+                # Fallback for small datasets
+                df['R_Score'] = pd.cut(df['recency_days'], bins=BUSINESS_CONFIG['rfm_quantiles'],
+                                     labels=[5,4,3,2,1], include_lowest=True)
+                df['F_Score'] = pd.cut(df['frequency'], bins=BUSINESS_CONFIG['rfm_quantiles'],
+                                     labels=[1,2,3,4,5], include_lowest=True)
+                df['M_Score'] = pd.cut(df['monetary'], bins=BUSINESS_CONFIG['rfm_quantiles'],
+                                     labels=[1,2,3,4,5], include_lowest=True)
         else:
+            df['R_Score'] = 3
+            df['F_Score'] = 3
+            df['M_Score'] = 3
+        # Convert to numeric and handle NaN
+        for col in ['R_Score', 'F_Score', 'M_Score']:
             df[col] = pd.to_numeric(df[col], errors='coerce').fillna(3).astype(int)
+        # Apply segmentation logic
+        df['Segment'] = df.apply(CustomerSegmenter._assign_segment, axis=1)
+        df['Churn_Risk'] = df.apply(CustomerSegmenter._assign_risk_level, axis=1)
         return df
     @staticmethod
+    def _assign_segment(row) -> str:
         """Assign customer segment based on RFM scores"""
+        r, f, m = row['R_Score'], row['F_Score'], row['M_Score']
         if r >= 4 and f >= 4 and m >= 4:
             return 'Champions'
         elif r <= 2 and f >= 3:
             return 'At Risk'
         elif r <= 2 and f <= 2 and m >= 3:
+            return 'Cannot Lose Them'
         elif r <= 2 and f <= 2 and m <= 2:
+            return 'Lost Customers'
         else:
             return 'Others'
     @staticmethod
+    def _assign_risk_level(row) -> str:
+        """Assign churn risk level"""
+        segment = CustomerSegmenter._assign_segment(row)
+        if segment in ['Lost Customers', 'At Risk']:
+            return 'High'
+        elif segment in ['Others', 'Cannot Lose Them']:
+            return 'Medium'
+        else:
+            return 'Low'
 class ChurnPredictor:
+    """Handles churn prediction model training and inference"""
     def __init__(self):
         self.model = None
         self.feature_importance = None
+        self.model_metrics = {}
+    def train_model(self, customer_metrics: pd.DataFrame) -> Tuple[bool, str, Dict]:
         """Train churn prediction model"""
+        if len(customer_metrics) < BUSINESS_CONFIG['min_customers_for_model']:
+            return False, f"Insufficient data for training (minimum {BUSINESS_CONFIG['min_customers_for_model']} customers required)", {}
+        # Prepare features
+        feature_cols = [
+            'recency_days', 'frequency', 'monetary', 'avg_order_value',
+            'std_amount', 'min_amount', 'max_amount', 'customer_lifetime_days'
+        ]
+        X = customer_metrics[feature_cols]
+        y = (customer_metrics['recency_days'] > BUSINESS_CONFIG['churn_threshold_days']).astype(int)
+        # Check for sufficient class diversity
+        if y.nunique() < 2:
+            return False, "Cannot train model: all customers have the same churn status", {}
+        # Train-test split
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=0.2, random_state=42, stratify=y
+        )
+        # Select and train model
+        if XGBOOST_AVAILABLE:
+            try:
+                self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
+                model_name = "XGBoost Classifier"
+            except:
+                self.model = RandomForestClassifier(random_state=42, n_estimators=100)
+                model_name = "Random Forest Classifier"
+        else:
+            self.model = RandomForestClassifier(random_state=42, n_estimators=100)
+            model_name = "Random Forest Classifier"
+        self.model.fit(X_train, y_train)
+        # Evaluate model
+        y_pred = self.model.predict(X_test)
+        y_pred_proba = self.model.predict_proba(X_test)[:, 1]
+        accuracy = accuracy_score(y_test, y_pred)
+        auc_score = roc_auc_score(y_test, y_pred_proba)
+        # Cross-validation
+        cv_scores = cross_val_score(self.model, X, y, cv=5, scoring='roc_auc')
         # Feature importance
         self.feature_importance = pd.DataFrame({
             'importance': self.model.feature_importances_
         }).sort_values('importance', ascending=False)
+        self.model_metrics = {
+            'accuracy': accuracy,
+            'auc_score': auc_score,
+            'cv_mean': cv_scores.mean(),
+            'cv_std': cv_scores.std(),
+            'model_name': model_name,
+            'n_features': len(feature_cols),
+            'n_samples': len(X_train)
         }
+        return True, "Model trained successfully", self.model_metrics
+    def predict(self, customer_metrics: pd.DataFrame) -> pd.DataFrame:
+        """Make churn predictions"""
+        if self.model is None:
+            return customer_metrics
+        feature_cols = [
+            'recency_days', 'frequency', 'monetary', 'avg_order_value',
+            'std_amount', 'min_amount', 'max_amount', 'customer_lifetime_days'
+        ]
+        X = customer_metrics[feature_cols]
+        predictions = self.model.predict_proba(X)[:, 1]
+        result = customer_metrics.copy()
+        result['churn_probability'] = predictions
+        result['predicted_churn'] = (predictions > BUSINESS_CONFIG['high_risk_probability']).astype(int)
+        return result
+class VisualizationEngine:
+    """Handles all chart creation and visualization"""
     @staticmethod
+    def create_segment_chart(customer_data: pd.DataFrame):
+        """Create customer segment distribution chart"""
+        segment_counts = customer_data['Segment'].value_counts().reset_index()
+        segment_counts.columns = ['Segment', 'Count']
         fig = px.pie(
+            segment_counts,
+            values='Count',
+            names='Segment',
             title='Customer Segment Distribution',
             hole=0.4,
+            color_discrete_sequence=list(COLORS.values())
         )
+        fig.update_traces(textposition='inside', textinfo='percent+label')
+        fig.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
         return fig
     @staticmethod
+    def create_rfm_scatter(customer_data: pd.DataFrame):
+        """Create RFM analysis scatter plot"""
         fig = px.scatter(
+            customer_data,
+            x='recency_days',
+            y='frequency',
+            size='monetary',
+            color='Segment',
+            title='RFM Customer Behavior Matrix',
+            labels={
+                'recency_days': 'Days Since Last Purchase',
+                'frequency': 'Purchase Frequency',
+                'monetary': 'Total Revenue'
+            },
+            color_discrete_sequence=list(COLORS.values())
         )
+        fig.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
         return fig
     @staticmethod
+    def create_churn_chart(customer_data: pd.DataFrame, has_predictions: bool = False):
+        """Create churn risk visualization"""
+        if has_predictions and 'churn_probability' in customer_data.columns:
             fig = px.histogram(
+                customer_data,
+                x='churn_probability',
+                nbins=20,
                 title='Churn Probability Distribution',
+                labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
+                color_discrete_sequence=[COLORS['primary']]
             )
+            fig.add_vline(x=BUSINESS_CONFIG['high_risk_probability'], line_dash="dash",
+                         line_color=COLORS['danger'], annotation_text="High Risk Threshold")
         else:
+            risk_counts = customer_data['Churn_Risk'].value_counts().reset_index()
+            risk_counts.columns = ['Risk_Level', 'Count']
+            colors_map = {'High': COLORS['danger'], 'Medium': COLORS['warning'], 'Low': COLORS['success']}
             fig = px.bar(
+                risk_counts,
+                x='Risk_Level',
+                y='Count',
+                title='Customer Churn Risk Distribution',
+                color='Risk_Level',
+                color_discrete_map=colors_map
             )
+            fig.update_layout(showlegend=False)
+        fig.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
         return fig
     @staticmethod
+    def create_revenue_trend(df: pd.DataFrame):
+        """Create revenue trend visualization"""
+        df_copy = df.copy()
+        df_copy['order_month'] = df_copy['order_date'].dt.to_period('M')
+        monthly_revenue = df_copy.groupby('order_month')['amount'].sum().reset_index()
+        monthly_revenue['order_month'] = monthly_revenue['order_month'].astype(str)
+        fig = px.line(
+            monthly_revenue,
+            x='order_month',
+            y='amount',
+            title='Monthly Revenue Trends',
+            labels={'amount': 'Revenue ($)', 'order_month': 'Month'}
+        )
+        fig.update_traces(line_color=COLORS['primary'], line_width=3)
+        fig.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
+        return fig
+    @staticmethod
+    def create_feature_importance_chart(feature_importance: pd.DataFrame):
+        """Create feature importance chart"""
         fig = px.bar(
+            feature_importance.head(8),
+            x='importance',
+            y='feature',
+            orientation='h',
+            title='Feature Importance Analysis',
+            labels={'importance': 'Importance Score', 'feature': 'Features'},
+            color='importance',
+            color_continuous_scale='viridis'
+        )
+        fig.update_layout(
+            height=500,
+            showlegend=False,
+            plot_bgcolor='white',
+            paper_bgcolor='white',
+            title={'x': 0.5, 'xanchor': 'center'},
+            yaxis={'categoryorder': 'total ascending'}
         )
         return fig
 class ReportGenerator:
+    """Handles report generation"""
     @staticmethod
+    def generate_pdf_report(customer_data: pd.DataFrame, model_metrics: Dict) -> bytes:
+        """Generate PDF report"""
         if not REPORTLAB_AVAILABLE:
+            raise ImportError("PDF generation requires ReportLab library")
         buffer = io.BytesIO()
+        doc = SimpleDocTemplate(buffer, pagesize=A4,
+                              rightMargin=72, leftMargin=72,
+                              topMargin=72, bottomMargin=18)
         styles = getSampleStyleSheet()
         story = []
         # Title
         title_style = ParagraphStyle('CustomTitle', parent=styles['Title'],
+                                   fontSize=24, spaceAfter=30, alignment=1)
         story.append(Paragraph("B2B Customer Analytics Report", title_style))
         story.append(Spacer(1, 12))
+        # Executive summary
         story.append(Paragraph("Executive Summary", styles['Heading2']))
+        total_customers = len(customer_data)
+        total_revenue = customer_data['monetary'].sum()
+        avg_revenue = customer_data['monetary'].mean()
         summary_text = f"""
+        This comprehensive analysis covers {total_customers:,} customers with combined revenue of ${total_revenue:,.2f}.
+        The average customer value is ${avg_revenue:,.2f}. Customer segmentation and churn risk assessment
+        have been performed using advanced RFM analysis and machine learning techniques.
         """
         story.append(Paragraph(summary_text, styles['Normal']))
+        story.append(Spacer(1, 20))
+        # Segment distribution
+        story.append(Paragraph("Customer Segmentation Overview", styles['Heading2']))
+        segment_dist = customer_data['Segment'].value_counts()
+        segment_data = []
+        segment_data.append(['Segment', 'Count', 'Percentage'])
+        for segment, count in segment_dist.items():
+            percentage = (count / total_customers) * 100
+            segment_data.append([segment, str(count), f"{percentage:.1f}%"])
+        segment_table = Table(segment_data)
         segment_table.setStyle(TableStyle([
             ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
             ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
             ('GRID', (0, 0), (-1, -1), 1, colors.black)
         ]))
         story.append(segment_table)
+        story.append(Spacer(1, 20))
+        # Model performance (if available)
+        if model_metrics:
+            story.append(Paragraph("Churn Prediction Model Performance", styles['Heading2']))
             model_text = f"""
+            Model Type: {model_metrics['model_name']}<br/>
+            Accuracy: {model_metrics['accuracy']:.1%}<br/>
+            AUC Score: {model_metrics['auc_score']:.3f}<br/>
+            Cross-validation Score: {model_metrics['cv_mean']:.3f} ± {model_metrics['cv_std']:.3f}<br/>
+            Features Used: {model_metrics['n_features']}<br/>
+            Training Samples: {model_metrics['n_samples']}
             """
             story.append(Paragraph(model_text, styles['Normal']))
+        # Build and return PDF
         doc.build(story)
         pdf_bytes = buffer.getvalue()
         buffer.close()
         return pdf_bytes
+class B2BCustomerAnalytics:
+    """Main analytics orchestrator"""
     def __init__(self):
         self.raw_data = None
+        self.customer_metrics = None
+        self.churn_predictor = ChurnPredictor()
+        self.has_trained_model = False
+    def load_data(self, file) -> Tuple[str, str, Optional[pd.DataFrame]]:
+        """Load and process data"""
+        self.raw_data, status = DataProcessor.load_and_validate(file)
+        if self.raw_data is not None:
+            # Calculate RFM metrics
+            self.customer_metrics = RFMAnalyzer.calculate_rfm_metrics(self.raw_data)
+            # Perform segmentation
+            self.customer_metrics = CustomerSegmenter.perform_segmentation(self.customer_metrics)
             # Generate dashboard
+            dashboard_html = self._generate_dashboard()
+            preview_data = self._prepare_preview_data()
+            return status, dashboard_html, preview_data
+        return status, "", None
+    def train_churn_model(self) -> Tuple[str, Optional[Any]]:
         """Train churn prediction model"""
+        if self.customer_metrics is None:
+            return "No data available. Please upload data first.", None
+        success, message, metrics = self.churn_predictor.train_model(self.customer_metrics)
+        if success:
+            self.has_trained_model = True
+            # Update predictions
+            self.customer_metrics = self.churn_predictor.predict(self.customer_metrics)
+            results_html = self._format_model_results(metrics)
+            chart = VisualizationEngine.create_feature_importance_chart(
+                self.churn_predictor.feature_importance
             )
+            return results_html, chart
+        return f"Model training failed: {message}", None
+    def get_visualizations(self) -> Tuple[Any, Any, Any, Any]:
+        """Get all visualizations"""
+        if self.customer_metrics is None:
+            return None, None, None, None
+        segment_chart = VisualizationEngine.create_segment_chart(self.customer_metrics)
+        rfm_chart = VisualizationEngine.create_rfm_scatter(self.customer_metrics)
+        churn_chart = VisualizationEngine.create_churn_chart(
+            self.customer_metrics, self.has_trained_model
+        )
+        revenue_chart = VisualizationEngine.create_revenue_trend(self.raw_data)
+        return segment_chart, rfm_chart, churn_chart, revenue_chart
+    def get_customer_table(self) -> Optional[pd.DataFrame]:
+        """Get formatted customer table"""
+        if self.customer_metrics is None:
             return None
+        columns = ['customer_id', 'Segment', 'Churn_Risk', 'recency_days',
+                  'frequency', 'monetary', 'avg_order_value']
+        if 'churn_probability' in self.customer_metrics.columns:
+            columns.append('churn_probability')
+            self.customer_metrics['churn_probability'] = (
+                self.customer_metrics['churn_probability'] * 100
+            ).round(1)
+        table_data = self.customer_metrics[columns].copy()
+        table_data['monetary'] = table_data['monetary'].round(2)
+        table_data['avg_order_value'] = table_data['avg_order_value'].round(2)
+        # Rename columns for display
+        display_names = {
+            'customer_id': 'Customer ID',
+            'Segment': 'Segment',
+            'Churn_Risk': 'Risk Level',
+            'recency_days': 'Recency (Days)',
+            'frequency': 'Frequency',
+            'monetary': 'Total Spent ($)',
+            'avg_order_value': 'Avg Order ($)',
+            'churn_probability': 'Churn Probability (%)'
+        }
+        table_data = table_data.rename(columns=display_names)
+        return table_data.head(50)
+    def get_customer_insights(self, customer_id: str) -> str:
+        """Get detailed customer insights"""
+        if self.customer_metrics is None or not customer_id:
+            return "Please enter a valid customer ID"
+        customer_data = self.customer_metrics[
+            self.customer_metrics['customer_id'] == customer_id
+        ]
+        if customer_data.empty:
+            return f"Customer {customer_id} not found"
+        customer = customer_data.iloc[0]
+        return self._format_customer_profile(customer)
+    def generate_report(self) -> bytes:
+        """Generate PDF report"""
+        if self.customer_metrics is None:
+            raise ValueError("No data available for report generation")
+        return ReportGenerator.generate_pdf_report(
+            self.customer_metrics,
+            self.churn_predictor.model_metrics
+        )
+    def _generate_dashboard(self) -> str:
+        """Generate dashboard HTML"""
+        total_customers = len(self.customer_metrics)
+        total_revenue = self.customer_metrics['monetary'].sum()
+        avg_order_value = self.customer_metrics['avg_order_value'].mean()
+        high_risk_customers = (self.customer_metrics['Churn_Risk'] == 'High').sum()
+        segment_dist = self.customer_metrics['Segment'].value_counts()
+        return f"""
+        <div style="display: flex; flex-wrap: wrap; gap: 1rem; margin-bottom: 2rem;">
+            <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #3b82f6, #1d4ed8); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
+                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Customers</h3>
+                <div style="font-size: 2.5rem; font-weight: bold;">{total_customers:,}</div>
+            </div>
+            <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #10b981, #047857); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
+                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Revenue</h3>
+                <div style="font-size: 2.5rem; font-weight: bold;">${total_revenue/1000000:.1f}M</div>
+            </div>
+            <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #8b5cf6, #6d28d9); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
+                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Avg Order Value</h3>
+                <div style="font-size: 2.5rem; font-weight: bold;">${avg_order_value:.0f}</div>
+            </div>
+            <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #ef4444, #dc2626); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
+                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">High Risk Customers</h3>
+                <div style="font-size: 2.5rem; font-weight: bold;">{high_risk_customers}</div>
+            </div>
+        </div>
+        <div style="background: #f8fafc; padding: 1.5rem; border-radius: 12px; border-left: 4px solid #6366f1;">
+            <h4 style="margin: 0 0 1rem 0; color: #374151;">Customer Segments Overview</h4>
+            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem;">
+                {' '.join([f'<div><strong>{segment}:</strong> {count}</div>' for segment, count in segment_dist.items()])}
+            </div>
+        </div>
+        """
+    def _prepare_preview_data(self) -> pd.DataFrame:
+        """Prepare data preview"""
+        if self.raw_data is None:
+            return pd.DataFrame()
+        preview = self.raw_data.merge(
+            self.customer_metrics[['customer_id', 'Segment', 'Churn_Risk']],
+            on='customer_id',
+            how='left'
+        )
+        return preview.head(20)
+    def _format_model_results(self, metrics: Dict) -> str:
+        """Format model training results"""
+        return f"""
+        <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 2rem;">
+            <div style="text-align: center; margin-bottom: 2rem;">
+                <h3 style="color: #1f2937; font-size: 1.5rem; font-weight: bold; margin-bottom: 0.5rem;">
+                    Model Training Completed Successfully
+                </h3>
+                <p style="color: #6b7280;">{metrics['model_name']} with Advanced Feature Engineering</p>
+            </div>
+            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
+                <div style="background: linear-gradient(135deg, #6366f1, #4f46e5); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
+                    <div style="font-size: 2rem; font-weight: bold;">{metrics['accuracy']:.1%}</div>
+                    <div style="font-size: 0.9rem;">Accuracy</div>
+                </div>
+                <div style="background: linear-gradient(135deg, #10b981, #059669); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
+                    <div style="font-size: 2rem; font-weight: bold;">{metrics['auc_score']:.3f}</div>
+                    <div style="font-size: 0.9rem;">AUC Score</div>
+                </div>
+                <div style="background: linear-gradient(135deg, #f59e0b, #d97706); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
+                    <div style="font-size: 2rem; font-weight: bold;">{metrics['n_features']}</div>
+                    <div style="font-size: 0.9rem;">Features Used</div>
+                </div>
+                <div style="background: linear-gradient(135deg, #8b5cf6, #7c3aed); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
+                    <div style="font-size: 2rem; font-weight: bold;">{metrics['cv_mean']:.3f}</div>
+                    <div style="font-size: 0.9rem;">CV Score</div>
+                </div>
+            </div>
+        </div>
+        """
+    def _format_customer_profile(self, customer) -> str:
+        """Format individual customer profile"""
+        churn_prob = customer.get('churn_probability', 0.5)
+        recommendations = self._get_customer_recommendations(
+            customer['Segment'], customer['Churn_Risk'], churn_prob, customer['recency_days']
+        )
+        return f"""
+        <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 1rem;">
+            <h3 style="text-align: center; color: #1f2937; margin-bottom: 1.5rem;">Customer Profile: {customer['customer_id']}</h3>
+            <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
+                <div style="background: linear-gradient(135deg, #6366f1, #4f46e5); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
+                    <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Segment</h4>
+                    <div style="font-size: 1.2rem; font-weight: bold;">{customer['Segment']}</div>
+                </div>
+                <div style="background: linear-gradient(135deg, #ef4444, #dc2626); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
+                    <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Churn Risk</h4>
+                    <div style="font-size: 1.2rem; font-weight: bold;">{customer['Churn_Risk']}</div>
+                </div>
+                <div style="background: linear-gradient(135deg, #8b5cf6, #6d28d9); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
+                    <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Churn Probability</h4>
+                    <div style="font-size: 1.2rem; font-weight: bold;">{churn_prob:.1%}</div>
+                </div>
+            </div>
+            <div style="background: #f8fafc; padding: 1.5rem; border-radius: 8px; margin-bottom: 1rem;">
+                <h4 style="color: #374151; margin-bottom: 1rem;">Transaction Analytics</h4>
+                <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem;">
+                    <div>
+                        <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Purchase Frequency</div>
+                        <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">{customer['frequency']}</div>
+                    </div>
+                    <div>
+                        <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Total Spent</div>
+                        <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">${customer['monetary']:,.0f}</div>
+                    </div>
+                    <div>
+                        <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Avg Order Value</div>
+                        <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">${customer['avg_order_value']:.0f}</div>
+                    </div>
+                    <div>
+                        <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Days Since Last Order</div>
+                        <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">{customer['recency_days']}</div>
+                    </div>
+                </div>
+            </div>
+            <div style="background: linear-gradient(135deg, #f0f9ff, #e0f2fe); border-left: 4px solid #3b82f6; padding: 1rem; border-radius: 4px;">
+                <h4 style="color: #1e40af; margin-bottom: 0.5rem;">Recommendations</h4>
+                <p style="color: #1f2937; margin: 0;">{recommendations}</p>
+            </div>
+        </div>
+        """
+    def _get_customer_recommendations(self, segment: str, risk_level: str,
+                                    churn_prob: float, recency: int) -> str:
+        """Generate personalized recommendations"""
+        recommendations = []
+        if risk_level == 'High' or churn_prob > BUSINESS_CONFIG['high_risk_probability']:
+            recommendations.append("URGENT: Personal outreach required within 24 hours")
+            recommendations.append("Offer retention incentive or loyalty program")
+        elif risk_level == 'Medium':
+            recommendations.append("Send personalized re-engagement campaign")
+        if segment == 'Champions':
+            recommendations.append("Invite to VIP program or advisory board")
+        elif segment == 'At Risk':
+            recommendations.append("Proactive customer success intervention needed")
+        elif segment == 'New Customers':
+            recommendations.append("Deploy onboarding campaign sequence")
+        elif segment == 'Lost Customers':
+            recommendations.append("Win-back campaign with deep discount offer")
+        if recency > 60:
+            recommendations.append("Re-engagement campaign with special offer recommended")
+        return " • ".join(recommendations) if recommendations else "Continue monitoring customer engagement patterns."
+def create_gradio_interface():
+    """Create the enhanced Gradio interface"""
+    # Custom CSS for modern styling
+    custom_css = """
+    .gradio-container {
+        font-family: 'Inter', system-ui, sans-serif !important;
+        max-width: 1200px !important;
+    }
+    .tab-nav {
+        background: #f8fafc !important;
+        border-radius: 8px !important;
+    }
+    """
+    with gr.Blocks(theme=gr.themes.Soft(), title="B2B Customer Analytics", css=custom_css) as demo:
+        # Initialize analytics instance per session
+        analytics = gr.State(B2BCustomerAnalytics())
         gr.HTML("""
+        <div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); padding: 2rem; border-radius: 1rem; color: white; text-align: center; margin-bottom: 2rem;">
+            <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">B2B Customer Analytics Platform</h1>
+            <p style="font-size: 1.1rem; opacity: 0.9;">Advanced Customer Segmentation & Churn Prediction</p>
+            <div style="font-size: 0.9rem; opacity: 0.8; margin-top: 1rem;">
+                Upload your customer data CSV with columns: customer_id, order_date, amount (or similar)
+            </div>
         </div>
         """)
         with gr.Tabs():
+            with gr.Tab("📊 Data Upload & Dashboard"):
+                with gr.Row():
+                    with gr.Column():
+                        file_input = gr.File(
+                            label="Upload Customer Data CSV",
+                            file_types=[".csv"],
+                            type="filepath"
+                        )
+                        load_btn = gr.Button(
+                            "Load & Process Data",
+                            variant="primary",
+                            size="lg"
+                        )
+                        load_status = gr.Textbox(
+                            label="Status",
+                            interactive=False,
+                            max_lines=2
+                        )
+                summary_display = gr.HTML()
+                data_preview = gr.DataFrame(label="Data Preview (First 20 Rows)")
+            with gr.Tab("🎯 Customer Segmentation"):
                 with gr.Row():
+                    with gr.Column():
+                        segment_chart = gr.Plot(label="Customer Segments Distribution")
+                    with gr.Column():
+                        rfm_chart = gr.Plot(label="RFM Behavior Analysis")
+                customer_table = gr.DataFrame(label="Customer Segmentation Details")
+                gr.HTML("""
+                <div style="background: #f0f9ff; padding: 1rem; border-radius: 8px; border-left: 4px solid #3b82f6; margin-top: 1rem;">
+                    <h4 style="color: #1e40af; margin: 0 0 0.5rem 0;">Segment Definitions</h4>
+                    <p style="margin: 0; color: #1f2937; font-size: 0.9rem;">
+                        <strong>Champions:</strong> High value, frequent customers •
+                        <strong>Loyal Customers:</strong> Regular, valuable customers •
+                        <strong>At Risk:</strong> Previously valuable but declining activity •
+                        <strong>Lost Customers:</strong> Haven't purchased recently
+                    </p>
+                </div>
+                """)
+            with gr.Tab("🤖 Churn Prediction"):
+                train_btn = gr.Button(
+                    "Train Churn Prediction Model",
+                    variant="primary",
+                    size="lg"
+                )
+                model_results = gr.HTML()
                 with gr.Row():
+                    with gr.Column():
+                        feature_importance_chart = gr.Plot(label="Feature Importance Analysis")
+                    with gr.Column():
+                        churn_distribution_chart = gr.Plot(label="Churn Risk Distribution")
+                gr.HTML("""
+                <div style="background: #fef3c7; padding: 1rem; border-radius: 8px; border-left: 4px solid #f59e0b; margin-top: 1rem;">
+                    <h4 style="color: #92400e; margin: 0 0 0.5rem 0;">Model Information</h4>
+                    <p style="margin: 0; color: #1f2937; font-size: 0.9rem;">
+                        The model uses advanced features including customer lifetime, purchase patterns, and RFM metrics.
+                        Customers with >90 days since last purchase are considered churned for training purposes.
+                    </p>
+                </div>
+                """)
+            with gr.Tab("📈 Revenue Analytics"):
+                revenue_chart = gr.Plot(label="Monthly Revenue Trends")
+                gr.HTML("""
+                <div style="background: #ecfdf5; padding: 1rem; border-radius: 8px; border-left: 4px solid #10b981; margin-top: 1rem;">
+                    <h4 style="color: #065f46; margin: 0 0 0.5rem 0;">Revenue Insights</h4>
+                    <p style="margin: 0; color: #1f2937; font-size: 0.9rem;">
+                        Track revenue trends over time to identify seasonal patterns, growth trajectories, and potential business impact of customer segments.
+                    </p>
+                </div>
+                """)
+            with gr.Tab("👤 Customer Insights"):
                 with gr.Row():
+                    customer_id_input = gr.Textbox(
+                        label="Customer ID",
+                        placeholder="Enter customer ID for detailed analysis",
+                        scale=3
+                    )
+                    insights_btn = gr.Button(
+                        "Get Customer Profile",
+                        variant="primary",
+                        scale=1
+                    )
+                customer_insights = gr.HTML()
+            with gr.Tab("📋 Reports"):
+                with gr.Row():
+                    with gr.Column():
+                        gr.HTML("""
+                        <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
+                            <h3 style="color: #1f2937; margin-bottom: 1rem;">Generate Comprehensive Report</h3>
+                            <p style="color: #6b7280; margin-bottom: 1.5rem;">
+                                Create a detailed PDF report including customer segmentation analysis,
+                                churn predictions, and actionable business insights.
+                            </p>
+                        </div>
+                        """)
+                        report_btn = gr.Button(
+                            "Generate PDF Report",
+                            variant="primary",
+                            size="lg"
+                        )
+                    with gr.Column():
+                        report_file = gr.File(
+                            label="Download Report",
+                            interactive=False
+                        )
+        # Event handlers with proper error handling
+        def safe_load_data(analytics_instance, file):
+            try:
+                if file is None:
+                    return analytics_instance, "Please upload a CSV file", "", None, None, None, None, None, None
+                status, dashboard, preview = analytics_instance.load_data(file)
+                if "successfully" in status:
+                    charts = analytics_instance.get_visualizations()
+                    table = analytics_instance.get_customer_table()
+                    return analytics_instance, status, dashboard, preview, *charts, table
+                else:
+                    return analytics_instance, status, "", None, None, None, None, None, None
+            except Exception as e:
+                error_msg = f"Error loading data: {str(e)}"
+                return analytics_instance, error_msg, "", None, None, None, None, None, None
+        def safe_train_model(analytics_instance):
+            try:
+                result_html, chart = analytics_instance.train_churn_model()
+                # Update churn chart after training
+                updated_charts = analytics_instance.get_visualizations()
+                return analytics_instance, result_html, chart, updated_charts[2]
+            except Exception as e:
+                error_msg = f"Error training model: {str(e)}"
+                return analytics_instance, error_msg, None, None
+        def safe_get_insights(analytics_instance, customer_id):
+            try:
+                return analytics_instance.get_customer_insights(customer_id)
+            except Exception as e:
+                return f"Error getting insights: {str(e)}"
+        def safe_generate_report(analytics_instance):
+            try:
+                if analytics_instance.customer_metrics is None:
+                    return None
+                pdf_bytes = analytics_instance.generate_report()
+                # Save to temporary file
+                import tempfile
+                with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
+                    tmp.write(pdf_bytes)
+                    return tmp.name
+            except Exception as e:
+                gr.Warning(f"Error generating report: {str(e)}")
+                return None
+        # Wire up events
         load_btn.click(
+            fn=safe_load_data,
+            inputs=[analytics, file_input],
+            outputs=[analytics, load_status, summary_display, data_preview,
+                    segment_chart, rfm_chart, churn_distribution_chart, revenue_chart, customer_table]
         )
         train_btn.click(
+            fn=safe_train_model,
+            inputs=[analytics],
+            outputs=[analytics, model_results, feature_importance_chart, churn_distribution_chart]
+        )
+        insights_btn.click(
+            fn=safe_get_insights,
+            inputs=[analytics, customer_id_input],
+            outputs=[customer_insights]
+        )
+        report_btn.click(
+            fn=safe_generate_report,
+            inputs=[analytics],
+            outputs=[report_file]
+        )
+        # Auto-update customer insights on Enter key
+        customer_id_input.submit(
+            fn=safe_get_insights,
+            inputs=[analytics, customer_id_input],
+            outputs=[customer_insights]
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        show_error=True
+    )