Spaces:

entropy25
/

customer

Sleeping

App Files Files Community

entropy25 commited on Aug 29, 2025

Commit

61d745b

verified ·

1 Parent(s): 3dbb5ae

Update app.py

Browse files

Files changed (1) hide show

app.py +566 -767

app.py CHANGED Viewed

@@ -4,26 +4,23 @@ import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
-from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
 import plotly.express as px
 import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-import plotly.io as pio
 from datetime import datetime, timedelta
 import io
 import base64
 import warnings
 warnings.filterwarnings('ignore')
-# Try importing xgboost and reportlab, use fallbacks if not available
 try:
     import xgboost as xgb
     XGBOOST_AVAILABLE = True
 except ImportError:
     XGBOOST_AVAILABLE = False
-    print("XGBoost not available, using RandomForest only")
 try:
     from reportlab.lib.pagesizes import letter, A4
@@ -31,863 +28,665 @@ try:
     from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
     from reportlab.lib.units import inch
     from reportlab.lib import colors
     REPORTLAB_AVAILABLE = True
 except ImportError:
     REPORTLAB_AVAILABLE = False
-    print("ReportLab not available, PDF generation disabled")
-# Modern color palette
 COLORS = {
     'primary': '#6366f1',
-    'success': '#10b981',
     'warning': '#f59e0b',
     'danger': '#ef4444',
-    'purple': '#8b5cf6',
-    'pink': '#ec4899',
-    'blue': '#3b82f6',
-    'indigo': '#6366f1'
 }
-# Global analytics instance
-analytics = None
-class B2BCustomerAnalytics:
     def __init__(self):
-        self.df = None
-        self.processed_df = None
         self.model = None
         self.feature_importance = None
-        self.predictions = None
-    def load_and_process_data(self, file):
-        """Load and process the uploaded CSV file"""
-        try:
-            if file is None:
-                return "Please upload a CSV file", None, None
-            # Load raw data
-            self.df = pd.read_csv(file.name)
-            # Check for required columns - be flexible with column names
-            required_columns = ['customer_id', 'order_date', 'amount']
-            df_columns_lower = [col.lower().strip() for col in self.df.columns]
-            # Map common variations
-            column_mapping = {}
-            for req_col in required_columns:
-                found = False
-                for df_col in self.df.columns:
-                    if req_col in df_col.lower() or df_col.lower().strip() in req_col:
-                        column_mapping[req_col] = df_col
-                        found = True
-                        break
-                    # Check for common variations
-                    variations = {
-                        'customer_id': ['customer', 'cust_id', 'id', 'customerid', 'client_id'],
-                        'order_date': ['date', 'order_date', 'orderdate', 'purchase_date', 'transaction_date'],
-                        'amount': ['revenue', 'value', 'price', 'total', 'sales', 'order_value']
-                    }
-                    if req_col in variations:
-                        for var in variations[req_col]:
-                            if var in df_col.lower():
-                                column_mapping[req_col] = df_col
-                                found = True
-                                break
-                if not found:
-                    return f"Missing required column: {req_col}. Available columns: {list(self.df.columns)}", None, None
-            # Rename columns to standard names
-            self.df = self.df.rename(columns=column_mapping)
-            # Clean and convert data types
-            self.df['customer_id'] = self.df['customer_id'].astype(str)
-            self.df['order_date'] = pd.to_datetime(self.df['order_date'], errors='coerce')
-            self.df['amount'] = pd.to_numeric(self.df['amount'], errors='coerce')
-            # Remove rows with invalid data
-            initial_rows = len(self.df)
-            self.df = self.df.dropna(subset=['customer_id', 'order_date', 'amount'])
-            final_rows = len(self.df)
-            if final_rows == 0:
-                return "No valid data rows found after cleaning", None, None
-            # Calculate RFM metrics
-            self.processed_df = self.calculate_rfm_metrics(self.df.copy())
-            # Perform customer segmentation
-            self.processed_df = self.perform_customer_segmentation(self.processed_df)
-            # Generate summary
-            summary_html = self.generate_summary_dashboard()
-            status_msg = f"✅ Data loaded successfully! Processed {final_rows} records from {self.df['customer_id'].nunique()} customers."
-            if initial_rows != final_rows:
-                status_msg += f" ({initial_rows - final_rows} invalid rows removed)"
-            return status_msg, summary_html, self.processed_df.head(20)
-        except Exception as e:
-            return f"❌ Error loading data: {str(e)}", None, None
-    def calculate_rfm_metrics(self, df):
-        """Calculate RFM metrics from transaction data"""
-        try:
-            current_date = df['order_date'].max() + timedelta(days=1)
-            # Calculate customer-level metrics
-            customer_metrics = df.groupby('customer_id').agg({
-                'order_date': ['max', 'count'],
-                'amount': ['sum', 'mean']
-            }).round(2)
-            # Flatten column names
-            customer_metrics.columns = ['last_order_date', 'frequency', 'monetary', 'avg_order_value']
-            customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
-            # Merge back with original data
-            df_with_rfm = df.merge(
-                customer_metrics[['recency_days', 'frequency', 'monetary']],
-                left_on='customer_id',
-                right_index=True,
-                how='left'
             )
-            return df_with_rfm
-        except Exception as e:
-            print(f"Error in calculate_rfm_metrics: {e}")
-            # Add default RFM values
-            df['recency_days'] = 30
-            df['frequency'] = 1
-            df['monetary'] = df['amount']
-            return df
-    def perform_customer_segmentation(self, df):
-        """Perform customer segmentation based on RFM analysis"""
-        try:
-            # Get unique customer data
-            customer_df = df.groupby('customer_id').agg({
-                'recency_days': 'first',
-                'frequency': 'first',
-                'monetary': 'first'
-            }).reset_index()
-            # Calculate RFM scores using quantiles with duplicates handling
-            if len(customer_df) >= 5:
-                try:
-                    customer_df['R_Score'] = pd.qcut(customer_df['recency_days'], 5, labels=[5,4,3,2,1], duplicates='drop')
-                    customer_df['F_Score'] = pd.qcut(customer_df['frequency'], 5, labels=[1,2,3,4,5], duplicates='drop')
-                    customer_df['M_Score'] = pd.qcut(customer_df['monetary'], 5, labels=[1,2,3,4,5], duplicates='drop')
-                except (ValueError, TypeError):
-                    # Fallback to percentile-based scoring
-                    customer_df['R_Score'] = pd.cut(customer_df['recency_days'], bins=5, labels=[5,4,3,2,1], include_lowest=True)
-                    customer_df['F_Score'] = pd.cut(customer_df['frequency'], bins=5, labels=[1,2,3,4,5], include_lowest=True)
-                    customer_df['M_Score'] = pd.cut(customer_df['monetary'], bins=5, labels=[1,2,3,4,5], include_lowest=True)
-            else:
-                # Simple scoring for small datasets
-                customer_df['R_Score'] = 3
-                customer_df['F_Score'] = 3
-                customer_df['M_Score'] = 3
-            # Convert to int, handle NaN values
-            customer_df['R_Score'] = pd.to_numeric(customer_df['R_Score'], errors='coerce').fillna(3).astype(int)
-            customer_df['F_Score'] = pd.to_numeric(customer_df['F_Score'], errors='coerce').fillna(3).astype(int)
-            customer_df['M_Score'] = pd.to_numeric(customer_df['M_Score'], errors='coerce').fillna(3).astype(int)
-            def segment_customers(row):
-                if row['R_Score'] >= 4 and row['F_Score'] >= 4 and row['M_Score'] >= 4:
-                    return 'Champions'
-                elif row['R_Score'] >= 3 and row['F_Score'] >= 3 and row['M_Score'] >= 3:
-                    return 'Loyal Customers'
-                elif row['R_Score'] >= 3 and row['F_Score'] >= 2:
-                    return 'Potential Loyalists'
-                elif row['R_Score'] >= 4 and row['F_Score'] <= 2:
-                    return 'New Customers'
-                elif row['R_Score'] <= 2 and row['F_Score'] >= 3:
-                    return 'At Risk'
-                elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] >= 3:
-                    return 'Cannot Lose Them'
-                elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] <= 2:
-                    return 'Lost Customers'
-                else:
-                    return 'Others'
-            customer_df['Segment'] = customer_df.apply(segment_customers, axis=1)
-            customer_df['Churn_Risk'] = customer_df.apply(lambda x:
-                'High' if x['Segment'] in ['Lost Customers', 'At Risk'] else
-                'Medium' if x['Segment'] in ['Others', 'Cannot Lose Them'] else 'Low', axis=1)
-            # Merge segmentation data back
-            segment_data = customer_df[['customer_id', 'Segment', 'Churn_Risk', 'R_Score', 'F_Score', 'M_Score']]
-            df_with_segments = df.merge(segment_data, on='customer_id', how='left')
-            return df_with_segments
-        except Exception as e:
-            print(f"Error in perform_customer_segmentation: {e}")
-            # Return original df with dummy segments if segmentation fails
-            df['Segment'] = 'Others'
-            df['Churn_Risk'] = 'Medium'
-            df['R_Score'] = 3
-            df['F_Score'] = 3
-            df['M_Score'] = 3
-            return df
-    def generate_summary_dashboard(self):
-        """Generate modern dashboard summary with KPI cards"""
-        if self.processed_df is None:
-            return "No data loaded"
-        try:
-            total_customers = self.processed_df['customer_id'].nunique()
-            total_orders = len(self.processed_df)
-            total_revenue = self.processed_df['amount'].sum()
-            avg_order_value = self.processed_df['amount'].mean()
-            # Get segment and risk distributions
-            segment_dist = self.processed_df.groupby('customer_id')['Segment'].first().value_counts()
-            risk_dist = self.processed_df.groupby('customer_id')['Churn_Risk'].first().value_counts()
-            # Create modern dashboard
-            summary_html = f"""
-            <div style="display: flex; flex-wrap: wrap; gap: 1rem; margin-bottom: 2rem;">
-                <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #3b82f6, #1d4ed8); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
-                    <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Customers</h3>
-                    <div style="font-size: 2.5rem; font-weight: bold;">{total_customers:,}</div>
-                </div>
-                <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #10b981, #047857); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
-                    <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Revenue</h3>
-                    <div style="font-size: 2.5rem; font-weight: bold;">${total_revenue/1000000:.1f}M</div>
-                </div>
-                <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #8b5cf6, #6d28d9); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
-                    <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Avg Order Value</h3>
-                    <div style="font-size: 2.5rem; font-weight: bold;">${avg_order_value:.0f}</div>
-                </div>
-                <div style="flex: 1; min-width: 200px; background: linear-gradient(135deg, #ef4444, #dc2626); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
-                    <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">High Risk Customers</h3>
-                    <div style="font-size: 2.5rem; font-weight: bold;">{risk_dist.get('High', 0)}</div>
-                </div>
             </div>
-            <div style="background: #f8fafc; padding: 1.5rem; border-radius: 12px; border-left: 4px solid #6366f1;">
-                <h4 style="margin: 0 0 1rem 0; color: #374151;">Customer Segments Overview</h4>
-                <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem;">
-                    {' '.join([f'<div><strong>{segment}:</strong> {count}</div>' for segment, count in segment_dist.items()])}
-                </div>
             </div>
             """
-            return summary_html
-        except Exception as e:
-            return f"Error generating dashboard: {str(e)}"
-    def train_churn_model(self):
-        """Train churn prediction model"""
-        if self.processed_df is None:
-            return "❌ No data available. Please upload and process a CSV file first.", None
         try:
-            # Prepare customer-level features
-            customer_features = self.processed_df.groupby('customer_id').agg({
-                'recency_days': 'first',
-                'frequency': 'first',
-                'monetary': 'first',
-                'amount': ['mean', 'std', 'min', 'max'],
-                'order_date': ['min', 'max']
-            }).reset_index()
-            # Flatten column names
-            customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
-                                       'avg_amount', 'std_amount', 'min_amount', 'max_amount',
-                                       'first_order', 'last_order']
-            # Handle missing values
-            customer_features['std_amount'].fillna(0, inplace=True)
-            # Calculate additional features
-            customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
-            customer_features['customer_lifetime'].fillna(0, inplace=True)
-            # Create churn labels based on recency (customers who haven't ordered in 90 days are churned)
-            customer_features['churn_label'] = (customer_features['recency_days'] > 90).astype(int)
-            # Check if we have enough data for training
-            if len(customer_features) < 10:
-                return "❌ Not enough data for model training (minimum 10 customers required).", None
-            # Check if we have both classes
-            if customer_features['churn_label'].nunique() < 2:
-                return "❌ Cannot train model: all customers have the same churn status.", None
-            # Select features for modeling
-            feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
-                           'min_amount', 'max_amount', 'customer_lifetime']
-            X = customer_features[feature_cols]
-            y = customer_features['churn_label']
-            # Train-test split
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
-            # Train model
-            if XGBOOST_AVAILABLE:
-                try:
-                    self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
-                    self.model.fit(X_train, y_train)
-                    model_name = "XGBoost Classifier"
-                except:
-                    self.model = RandomForestClassifier(random_state=42, n_estimators=100)
-                    self.model.fit(X_train, y_train)
-                    model_name = "Random Forest Classifier"
-            else:
-                self.model = RandomForestClassifier(random_state=42, n_estimators=100)
-                self.model.fit(X_train, y_train)
-                model_name = "Random Forest Classifier"
-            # Make predictions
-            y_pred = self.model.predict(X_test)
-            accuracy = accuracy_score(y_test, y_pred)
-            # Feature importance
-            self.feature_importance = pd.DataFrame({
-                'feature': feature_cols,
-                'importance': self.model.feature_importances_
-            }).sort_values('importance', ascending=False)
-            # Predict for all customers
-            all_predictions = self.model.predict_proba(X)[:, 1]
-            customer_features['churn_probability'] = all_predictions
-            self.predictions = customer_features
-            results_html = f"""
-            <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 2rem;">
-                <div style="text-align: center; margin-bottom: 2rem;">
-                    <h3 style="color: #1f2937; font-size: 1.5rem; font-weight: bold; margin-bottom: 0.5rem;">
-                        ✅ Model Training Completed
-                    </h3>
-                    <p style="color: #6b7280;">{model_name} with Advanced Feature Engineering</p>
-                </div>
-                <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
-                    <div style="background: linear-gradient(135deg, #6366f1, #4f46e5); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
-                        <div style="font-size: 2rem; font-weight: bold;">{accuracy:.1%}</div>
-                        <div style="font-size: 0.9rem;">Model Accuracy</div>
-                    </div>
-                    <div style="background: linear-gradient(135deg, #10b981, #059669); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
-                        <div style="font-size: 2rem; font-weight: bold;">{len(feature_cols)}</div>
-                        <div style="font-size: 0.9rem;">Features Used</div>
-                    </div>
-                    <div style="background: linear-gradient(135deg, #f59e0b, #d97706); padding: 1rem; border-radius: 8px; text-align: center; color: white;">
-                        <div style="font-size: 2rem; font-weight: bold;">{len(X_train)}</div>
-                        <div style="font-size: 0.9rem;">Training Samples</div>
-                    </div>
-                </div>
-                <div style="background: #f8fafc; padding: 1rem; border-radius: 8px;">
-                    <h4 style="color: #374151; margin-bottom: 1rem;">Top Feature Importance</h4>
-                    {''.join([f'<div style="display: flex; justify-content: space-between; padding: 0.5rem 0; border-bottom: 1px solid #e5e7eb;"><span>{row["feature"].replace("_", " ").title()}</span><span style="font-weight: bold;">{row["importance"]:.3f}</span></div>' for _, row in self.feature_importance.head(5).iterrows()])}
-                </div>
-            </div>
-            """
-            return results_html, self.create_feature_importance_chart()
         except Exception as e:
-            return f"❌ Error training model: {str(e)}", None
-    def create_feature_importance_chart(self):
-        """Create feature importance visualization"""
-        if self.feature_importance is None:
-            return None
         try:
-            fig = px.bar(
-                self.feature_importance.head(8),
-                x='importance',
-                y='feature',
-                orientation='h',
-                title='Feature Importance Analysis',
-                labels={'importance': 'Importance Score', 'feature': 'Features'},
-                color='importance',
-                color_continuous_scale='viridis'
-            )
-            fig.update_layout(
-                height=500,
-                showlegend=False,
-                plot_bgcolor='white',
-                paper_bgcolor='white',
-                title={'x': 0.5, 'xanchor': 'center'},
-                yaxis={'categoryorder': 'total ascending'}
             )
-            return fig
         except Exception as e:
-            print(f"Error creating feature importance chart: {e}")
-            return None
     def create_visualizations(self):
-        """Create comprehensive visualizations"""
-        if self.processed_df is None or len(self.processed_df) == 0:
-            print("No processed data available for visualization")
-            return None, None, None, None
         try:
-            print(f"Creating visualizations with {len(self.processed_df)} rows")
-            # 1. Customer Segment Distribution
-            segment_data = self.processed_df.groupby('customer_id')['Segment'].first().value_counts().reset_index()
-            segment_data.columns = ['Segment', 'Count']
-            print(f"Segment data: {segment_data}")
-            if len(segment_data) == 0:
-                print("No segment data found")
-                fig1 = None
-            else:
-                fig1 = px.pie(
-                    segment_data,
-                    values='Count',
-                    names='Segment',
-                    title='Customer Segment Distribution',
-                    hole=0.4,
-                    color_discrete_sequence=['#6366f1', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899']
-                )
-                fig1.update_traces(textposition='inside', textinfo='percent+label')
-                fig1.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
-            # 2. RFM Analysis
-            customer_rfm = self.processed_df.groupby('customer_id').agg({
-                'recency_days': 'first',
-                'frequency': 'first',
-                'monetary': 'first',
-                'Segment': 'first'
-            }).reset_index()
-            print(f"RFM data shape: {customer_rfm.shape}")
-            if len(customer_rfm) == 0:
-                print("No RFM data found")
-                fig2 = None
-            else:
-                fig2 = px.scatter(
-                    customer_rfm,
-                    x='recency_days',
-                    y='frequency',
-                    size='monetary',
-                    color='Segment',
-                    title='RFM Customer Behavior Matrix',
-                    labels={
-                        'recency_days': 'Days Since Last Purchase',
-                        'frequency': 'Purchase Frequency',
-                        'monetary': 'Total Revenue'
-                    }
-                )
-                fig2.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
-            # 3. Churn Risk Distribution
-            if self.predictions is not None and len(self.predictions) > 0:
-                print(f"Using predictions data with {len(self.predictions)} rows")
-                fig3 = px.histogram(
-                    self.predictions,
-                    x='churn_probability',
-                    nbins=20,
-                    title='Churn Probability Distribution',
-                    labels={'churn_probability': 'Churn Probability', 'count': 'Number of Customers'},
-                    color_discrete_sequence=['#6366f1']
-                )
-                fig3.add_vline(x=0.5, line_dash="dash", line_color="red",
-                              annotation_text="High Risk Threshold")
-            else:
-                risk_data = self.processed_df.groupby('customer_id')['Churn_Risk'].first().value_counts().reset_index()
-                risk_data.columns = ['Risk_Level', 'Count']
-                print(f"Risk data: {risk_data}")
-                if len(risk_data) == 0:
-                    print("No risk data found")
-                    fig3 = None
-                else:
-                    colors_map = {'High': '#ef4444', 'Medium': '#f59e0b', 'Low': '#10b981'}
-                    fig3 = px.bar(
-                        risk_data,
-                        x='Risk_Level',
-                        y='Count',
-                        title='Customer Churn Risk Distribution',
-                        color='Risk_Level',
-                        color_discrete_map=colors_map
-                    )
-                    fig3.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'}, showlegend=False)
-            # 4. Revenue Trends
-            try:
-                self.processed_df['order_month'] = self.processed_df['order_date'].dt.to_period('M')
-                monthly_revenue = self.processed_df.groupby('order_month')['amount'].sum().reset_index()
-                monthly_revenue['order_month'] = monthly_revenue['order_month'].astype(str)
-                print(f"Monthly revenue data: {monthly_revenue.head()}")
-                if len(monthly_revenue) == 0:
-                    fig4 = None
-                else:
-                    fig4 = px.line(
-                        monthly_revenue,
-                        x='order_month',
-                        y='amount',
-                        title='Monthly Revenue Trends',
-                        labels={'amount': 'Revenue ($)', 'order_month': 'Month'}
-                    )
-                    fig4.update_traces(line_color='#6366f1', line_width=3)
-                    fig4.update_layout(height=400, title={'x': 0.5, 'xanchor': 'center'})
-            except Exception as e:
-                print(f"Error creating revenue chart: {e}")
-                fig4 = None
-            return fig1, fig2, fig3, fig4
         except Exception as e:
-            print(f"Error creating visualizations: {e}")
-            import traceback
-            traceback.print_exc()
-            return None, None, None, None
-    def create_customer_table(self):
-        """Create customer segmentation table"""
-        if self.processed_df is None:
             return None
         try:
-            customer_summary = self.processed_df.groupby('customer_id').agg({
-                'Segment': 'first',
-                'Churn_Risk': 'first',
-                'recency_days': 'first',
-                'frequency': 'first',
-                'monetary': 'first',
-                'amount': 'mean'
-            }).reset_index()
-            if self.predictions is not None:
-                customer_summary = customer_summary.merge(
-                    self.predictions[['customer_id', 'churn_probability']],
-                    on='customer_id',
-                    how='left'
                 )
-                customer_summary['churn_probability'] = customer_summary['churn_probability'].fillna(0)
             else:
-                customer_summary['churn_probability'] = 0.5
-            customer_summary['monetary'] = customer_summary['monetary'].round(2)
-            customer_summary['amount'] = customer_summary['amount'].round(2)
-            customer_summary['churn_probability'] = (customer_summary['churn_probability'] * 100).round(1)
-            customer_summary.columns = [
-                'Customer ID', 'Segment', 'Risk Level', 'Recency (Days)',
-                'Frequency', 'Total Spent ($)', 'Avg Order ($)', 'Churn Probability (%)'
             ]
-            return customer_summary.head(50)
         except Exception as e:
-            print(f"Error creating customer table: {e}")
             return None
-    def get_customer_insights(self, customer_id):
-        """Get detailed insights for a specific customer"""
-        if self.processed_df is None:
-            return "❌ No data available"
-        if not customer_id:
-            return "Please enter a customer ID"
-        try:
-            customer_data = self.processed_df[self.processed_df['customer_id'] == customer_id]
-            if customer_data.empty:
-                return f"❌ Customer {customer_id} not found"
-            total_orders = len(customer_data)
-            total_spent = customer_data['amount'].sum()
-            avg_order_value = customer_data['amount'].mean()
-            segment = customer_data['Segment'].iloc[0]
-            risk_level = customer_data['Churn_Risk'].iloc[0]
-            recency = customer_data['recency_days'].iloc[0]
-            churn_prob = 0.5
-            if self.predictions is not None:
-                pred_data = self.predictions[self.predictions['customer_id'] == customer_id]
-                if not pred_data.empty:
-                    churn_prob = pred_data['churn_probability'].iloc[0]
-            insights_html = f"""
-            <div style="background: white; padding: 2rem; border-radius: 1rem; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 1rem;">
-                <h3 style="text-align: center; color: #1f2937; margin-bottom: 1.5rem;">Customer Profile: {customer_id}</h3>
-                <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
-                    <div style="background: linear-gradient(135deg, #6366f1, #4f46e5); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
-                        <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Segment</h4>
-                        <div style="font-size: 1.2rem; font-weight: bold;">{segment}</div>
-                    </div>
-                    <div style="background: linear-gradient(135deg, #ef4444, #dc2626); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
-                        <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Churn Risk</h4>
-                        <div style="font-size: 1.2rem; font-weight: bold;">{risk_level}</div>
-                    </div>
-                    <div style="background: linear-gradient(135deg, #8b5cf6, #6d28d9); padding: 1rem; border-radius: 8px; color: white; text-align: center;">
-                        <h4 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Churn Probability</h4>
-                        <div style="font-size: 1.2rem; font-weight: bold;">{churn_prob:.1%}</div>
-                    </div>
-                </div>
-                <div style="background: #f8fafc; padding: 1.5rem; border-radius: 8px; margin-bottom: 1rem;">
-                    <h4 style="color: #374151; margin-bottom: 1rem;">Transaction Analytics</h4>
-                    <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 1rem;">
-                        <div>
-                            <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Total Orders</div>
-                            <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">{total_orders}</div>
-                        </div>
-                        <div>
-                            <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Total Spent</div>
-                            <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">${total_spent:,.0f}</div>
-                        </div>
-                        <div>
-                            <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Avg Order Value</div>
-                            <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">${avg_order_value:.0f}</div>
-                        </div>
-                        <div>
-                            <div style="font-size: 0.8rem; color: #6b7280; margin-bottom: 0.2rem;">Days Since Last Order</div>
-                            <div style="font-size: 1.5rem; font-weight: bold; color: #1f2937;">{recency}</div>
-                        </div>
-                    </div>
-                </div>
-                <div style="background: linear-gradient(135deg, #f0f9ff, #e0f2fe); border-left: 4px solid #3b82f6; padding: 1rem; border-radius: 4px;">
-                    <h4 style="color: #1e40af; margin-bottom: 0.5rem;">Recommendations</h4>
-                    <p style="color: #1f2937; margin: 0;">{self._get_customer_recommendations(segment, risk_level, churn_prob, recency)}</p>
-                </div>
-            </div>
-            """
-            return insights_html
-        except Exception as e:
-            return f"Error getting customer insights: {str(e)}"
-    def _get_customer_recommendations(self, segment, risk_level, churn_prob, recency):
-        """Generate personalized recommendations based on customer profile"""
-        recommendations = []
-        if risk_level == 'High' or churn_prob > 0.7:
-            recommendations.append("URGENT: Personal outreach required within 24 hours")
-            recommendations.append("Offer retention incentive or loyalty program")
-        elif risk_level == 'Medium':
-            recommendations.append("Send personalized re-engagement campaign")
-        if segment == 'Champions':
-            recommendations.append("Invite to VIP program or advisory board")
-        elif segment == 'At Risk':
-            recommendations.append("Proactive customer success intervention needed")
-        elif segment == 'New Customers':
-            recommendations.append("Deploy onboarding campaign sequence")
-        if recency > 60:
-            recommendations.append("Win-back campaign with special offer")
-        return " • ".join(recommendations) if recommendations else "Continue monitoring customer engagement patterns."
     def generate_pdf_report(self):
-        """Generate PDF report (simplified version if ReportLab not available)"""
-        if not REPORTLAB_AVAILABLE:
-            return "PDF generation requires ReportLab library. Please install: pip install reportlab"
-        if self.processed_df is None:
-            return "No data available for report generation"
         try:
-            buffer = io.BytesIO()
-            doc = SimpleDocTemplate(buffer, pagesize=A4)
-            styles = getSampleStyleSheet()
-            story = []
-            # Title
-            title_style = ParagraphStyle('Title', parent=styles['Title'], fontSize=24, spaceAfter=30)
-            story.append(Paragraph("B2B Customer Analytics Report", title_style))
-            # Summary stats
-            total_customers = self.processed_df['customer_id'].nunique()
-            total_revenue = self.processed_df['amount'].sum()
-            story.append(Paragraph("Executive Summary", styles['Heading2']))
-            summary_text = f"""
-            This analysis covers {total_customers} customers with total revenue of ${total_revenue:,.2f}.
-            The data has been processed for customer segmentation and churn risk assessment.
-            """
-            story.append(Paragraph(summary_text, styles['Normal']))
-            # Build PDF
-            doc.build(story)
-            pdf_bytes = buffer.getvalue()
-            buffer.close()
-            return pdf_bytes
         except Exception as e:
-            return f"Error generating PDF: {str(e)}"
-def create_gradio_interface():
-    """Create the Gradio interface"""
-    # Initialize analytics instance
-    analytics = B2BCustomerAnalytics()
-    # Define interface functions
-    def load_data(file):
-        return analytics.load_and_process_data(file)
-    def train_model():
-        return analytics.train_churn_model()
-    def create_charts():
-        charts = analytics.create_visualizations()
-        return charts if charts[0] is not None else [None, None, None, None]
-    def get_customer_table():
-        return analytics.create_customer_table()
-    def get_insights(customer_id):
-        return analytics.get_customer_insights(customer_id)
-    def generate_report():
-        return analytics.generate_pdf_report()
-    # Custom CSS
-    custom_css = """
-    .gradio-container {
-        font-family: 'Inter', system-ui, sans-serif !important;
-        max-width: 1200px !important;
-    }
-    """
-    # Create interface
-    with gr.Blocks(theme=gr.themes.Soft(), title="B2B Customer Analytics", css=custom_css) as demo:
         gr.HTML("""
-        <div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); padding: 2rem; border-radius: 1rem; color: white; text-align: center; margin-bottom: 2rem;">
-            <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">B2B Customer Analytics Platform</h1>
-            <p style="font-size: 1.1rem; opacity: 0.9;">Advanced Customer Segmentation & Churn Prediction</p>
         </div>
         """)
         with gr.Tabs():
             with gr.Tab("Data Upload & Dashboard"):
                 with gr.Row():
-                    with gr.Column():
-                        file_input = gr.File(label="Upload Customer Data CSV", file_types=[".csv"])
-                        load_btn = gr.Button("Load & Process Data", variant="primary", size="lg")
-                        load_status = gr.HTML()
-                summary_display = gr.HTML()
                 data_preview = gr.DataFrame(label="Data Preview")
             with gr.Tab("Customer Segmentation"):
                 with gr.Row():
-                    with gr.Column():
-                        segment_chart = gr.Plot(label="Customer Segments")
-                    with gr.Column():
-                        rfm_chart = gr.Plot(label="RFM Analysis")
-                customer_table = gr.DataFrame(label="Customer Details")
             with gr.Tab("Churn Prediction"):
                 train_btn = gr.Button("Train Churn Model", variant="primary", size="lg")
-                model_results = gr.HTML()
                 with gr.Row():
-                    with gr.Column():
-                        performance_chart = gr.Plot(label="Feature Importance")
-                    with gr.Column():
-                        churn_chart = gr.Plot(label="Churn Risk")
-            with gr.Tab("Revenue Analytics"):
-                revenue_chart = gr.Plot(label="Monthly Revenue Trends")
-            with gr.Tab("Customer Insights"):
-                with gr.Row():
-                    customer_id_input = gr.Textbox(label="Customer ID", placeholder="Enter customer ID")
-                    insights_btn = gr.Button("Get Profile", variant="primary")
-                customer_insights = gr.HTML()
             with gr.Tab("Reports"):
                 report_btn = gr.Button("Generate PDF Report", variant="primary", size="lg")
                 report_file = gr.File(label="Download Report")
-        # Event handlers with better error handling
-        def safe_load_data(file):
-            try:
-                return load_data(file)
-            except Exception as e:
-                return f"Error: {str(e)}", None, None
-        def safe_create_charts():
-            try:
-                return create_charts()
-            except Exception as e:
-                return None, None, None, None
-        def safe_train_model():
-            try:
-                return train_model()
-            except Exception as e:
-                return f"Error: {str(e)}", None
-        def safe_get_table():
-            try:
-                return get_customer_table()
-            except Exception as e:
-                return None
-        def safe_get_insights(customer_id):
-            try:
-                return get_insights(customer_id)
-            except Exception as e:
-                return f"Error: {str(e)}"
-        # Connect events - fix the chart loading issue
-        def load_and_update_all(file):
-            # Load data first
-            status, summary, preview = safe_load_data(file)
-            # Then create charts if data loaded successfully
-            if "successfully" in str(status):
-                charts = safe_create_charts()
-                table = safe_get_table()
-                return status, summary, preview, charts[0], charts[1], charts[2], charts[3], table
-            else:
-                return status, summary, preview, None, None, None, None, None
         load_btn.click(
-            fn=load_and_update_all,
             inputs=[file_input],
-            outputs=[load_status, summary_display, data_preview, segment_chart, rfm_chart, churn_chart, revenue_chart, customer_table]
         )
         train_btn.click(
-            fn=safe_train_model,
-            outputs=[model_results, performance_chart]
-        )
-        insights_btn.click(
-            fn=safe_get_insights,
-            inputs=[customer_id_input],
-            outputs=[customer_insights]
-        )
-        report_btn.click(
-            fn=generate_report,
-            outputs=[report_file]
-        )
-    return demo
-if __name__ == "__main__":
-    demo = create_gradio_interface()
-    demo.launch(share=True, server_name="0.0.0.0", server_port=7860)

 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+from sklearn.model_selection import train_test_split, cross_val_score
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
 import plotly.express as px
 import plotly.graph_objects as go
 from datetime import datetime, timedelta
 import io
 import base64
 import warnings
 warnings.filterwarnings('ignore')
+# Optional imports with fallbacks
 try:
     import xgboost as xgb
     XGBOOST_AVAILABLE = True
 except ImportError:
     XGBOOST_AVAILABLE = False
 try:
     from reportlab.lib.pagesizes import letter, A4
     from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
     from reportlab.lib.units import inch
     from reportlab.lib import colors
+    from reportlab.graphics.shapes import Drawing
+    from reportlab.graphics.charts.piecharts import Pie
+    from reportlab.graphics.charts.barcharts import VerticalBarChart
+    from reportlab.graphics import renderPDF
     REPORTLAB_AVAILABLE = True
 except ImportError:
     REPORTLAB_AVAILABLE = False
+# Configuration
+CONFIG = {
+    'churn_threshold_days': 90,
+    'high_risk_probability': 0.7,
+    'rfm_quantiles': 5,
+    'min_customers_for_training': 10
+}
 COLORS = {
     'primary': '#6366f1',
+    'success': '#10b981',
     'warning': '#f59e0b',
     'danger': '#ef4444',
+    'purple': '#8b5cf6'
 }
+class DataProcessor:
+    """Handles data loading, cleaning, and validation"""
+    @staticmethod
+    def load_and_validate(file_path):
+        """Load and validate CSV file"""
+        df = pd.read_csv(file_path)
+        # Column mapping
+        column_map = DataProcessor._map_columns(df.columns)
+        df = df.rename(columns=column_map)
+        # Data cleaning
+        df = DataProcessor._clean_data(df)
+        return df
+    @staticmethod
+    def _map_columns(columns):
+        """Map various column name formats to standard names"""
+        mapping = {}
+        columns_lower = [col.lower().strip() for col in columns]
+        variations = {
+            'customer_id': ['customer', 'cust_id', 'id', 'customerid', 'client_id'],
+            'order_date': ['date', 'orderdate', 'purchase_date', 'transaction_date'],
+            'amount': ['revenue', 'value', 'price', 'total', 'sales', 'order_value']
+        }
+        for standard_name, variants in variations.items():
+            for col, col_lower in zip(columns, columns_lower):
+                if (standard_name in col_lower or
+                    any(variant in col_lower for variant in variants)):
+                    mapping[col] = standard_name
+                    break
+        return mapping
+    @staticmethod
+    def _clean_data(df):
+        """Clean and convert data types"""
+        required_cols = ['customer_id', 'order_date', 'amount']
+        # Check required columns
+        missing_cols = [col for col in required_cols if col not in df.columns]
+        if missing_cols:
+            raise ValueError(f"Missing columns: {missing_cols}")
+        # Convert data types
+        df['customer_id'] = df['customer_id'].astype(str)
+        df['order_date'] = pd.to_datetime(df['order_date'], errors='coerce')
+        df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
+        # Remove invalid rows
+        df = df.dropna(subset=required_cols)
+        df = df[df['amount'] > 0]  # Remove negative/zero amounts
+        return df
+class FeatureEngineering:
+    """Advanced feature engineering for customer analytics"""
+    @staticmethod
+    def calculate_rfm_features(df):
+        """Calculate RFM and additional behavioral features"""
+        current_date = df['order_date'].max() + timedelta(days=1)
+        # Basic RFM
+        customer_features = df.groupby('customer_id').agg({
+            'order_date': ['min', 'max', 'count'],
+            'amount': ['sum', 'mean', 'std', 'min', 'max']
+        })
+        # Flatten columns
+        customer_features.columns = [
+            'first_order', 'last_order', 'frequency',
+            'monetary', 'avg_amount', 'std_amount', 'min_amount', 'max_amount'
+        ]
+        # Calculate derived features
+        customer_features['recency_days'] = (current_date - customer_features['last_order']).dt.days
+        customer_features['customer_lifetime_days'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
+        customer_features['std_amount'] = customer_features['std_amount'].fillna(0)
+        # Behavioral features
+        customer_features['order_frequency'] = customer_features['frequency'] / (customer_features['customer_lifetime_days'] + 1)
+        customer_features['amount_trend'] = customer_features['max_amount'] / customer_features['min_amount']
+        customer_features['amount_consistency'] = 1 - (customer_features['std_amount'] / customer_features['avg_amount']).fillna(0)
+        return customer_features.reset_index()
+class CustomerSegmenter:
+    """Customer segmentation using RFM analysis"""
+    @staticmethod
+    def perform_segmentation(customer_features):
+        """Segment customers based on RFM scores"""
+        df = customer_features.copy()
+        # Calculate RFM scores
+        if len(df) >= CONFIG['rfm_quantiles']:
+            df['r_score'] = pd.qcut(df['recency_days'], CONFIG['rfm_quantiles'],
+                                   labels=[5,4,3,2,1], duplicates='drop')
+            df['f_score'] = pd.qcut(df['frequency'], CONFIG['rfm_quantiles'],
+                                   labels=[1,2,3,4,5], duplicates='drop')
+            df['m_score'] = pd.qcut(df['monetary'], CONFIG['rfm_quantiles'],
+                                   labels=[1,2,3,4,5], duplicates='drop')
+        else:
+            # Simple scoring for small datasets
+            df['r_score'] = pd.cut(df['recency_days'], bins=3, labels=[3,2,1])
+            df['f_score'] = pd.cut(df['frequency'], bins=3, labels=[1,2,3])
+            df['m_score'] = pd.cut(df['monetary'], bins=3, labels=[1,2,3])
+        # Convert to numeric
+        for col in ['r_score', 'f_score', 'm_score']:
+            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(3).astype(int)
+        # Segment assignment
+        df['segment'] = df.apply(CustomerSegmenter._assign_segment, axis=1)
+        df['churn_risk'] = df['segment'].map(CustomerSegmenter._get_risk_mapping())
+        return df
+    @staticmethod
+    def _assign_segment(row):
+        """Assign customer segment based on RFM scores"""
+        r, f, m = row['r_score'], row['f_score'], row['m_score']
+        if r >= 4 and f >= 4 and m >= 4:
+            return 'Champions'
+        elif r >= 3 and f >= 3 and m >= 3:
+            return 'Loyal Customers'
+        elif r >= 3 and f >= 2:
+            return 'Potential Loyalists'
+        elif r >= 4 and f <= 2:
+            return 'New Customers'
+        elif r <= 2 and f >= 3:
+            return 'At Risk'
+        elif r <= 2 and f <= 2 and m >= 3:
+            return 'Cannot Lose'
+        elif r <= 2 and f <= 2 and m <= 2:
+            return 'Lost'
+        else:
+            return 'Others'
+    @staticmethod
+    def _get_risk_mapping():
+        """Map segments to risk levels"""
+        return {
+            'Champions': 'Low',
+            'Loyal Customers': 'Low',
+            'Potential Loyalists': 'Medium',
+            'New Customers': 'Low',
+            'At Risk': 'High',
+            'Cannot Lose': 'High',
+            'Lost': 'High',
+            'Others': 'Medium'
+        }
+class ChurnPredictor:
+    """Machine learning model for churn prediction"""
     def __init__(self):
         self.model = None
         self.feature_importance = None
+    def train(self, customer_features):
+        """Train churn prediction model"""
+        df = customer_features.copy()
+        # Create target variable
+        df['churn_label'] = (df['recency_days'] > CONFIG['churn_threshold_days']).astype(int)
+        # Validate data
+        if len(df) < CONFIG['min_customers_for_training']:
+            raise ValueError(f"Insufficient data: need at least {CONFIG['min_customers_for_training']} customers")
+        if df['churn_label'].nunique() < 2:
+            raise ValueError("All customers have same churn status - cannot train model")
+        # Select features
+        feature_cols = [
+            'recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
+            'customer_lifetime_days', 'order_frequency', 'amount_trend', 'amount_consistency'
+        ]
+        X = df[feature_cols].fillna(0)
+        y = df['churn_label']
+        # Train model
+        self.model = self._get_best_model()
+        self.model.fit(X, y)
+        # Feature importance
+        self.feature_importance = pd.DataFrame({
+            'feature': feature_cols,
+            'importance': self.model.feature_importances_
+        }).sort_values('importance', ascending=False)
+        # Model evaluation
+        cv_scores = cross_val_score(self.model, X, y, cv=5, scoring='roc_auc')
+        # Predictions for all customers
+        df['churn_probability'] = self.model.predict_proba(X)[:, 1]
+        return {
+            'model_type': type(self.model).__name__,
+            'cv_auc_mean': cv_scores.mean(),
+            'cv_auc_std': cv_scores.std(),
+            'feature_importance': self.feature_importance,
+            'predictions': df
+        }
+    def _get_best_model(self):
+        """Select best available model"""
+        if XGBOOST_AVAILABLE:
+            try:
+                return xgb.XGBClassifier(random_state=42, eval_metric='logloss')
+            except:
+                pass
+        return RandomForestClassifier(random_state=42, n_estimators=100)
+class Visualizer:
+    """Create interactive visualizations"""
+    @staticmethod
+    def create_segment_chart(df):
+        """Customer segment distribution"""
+        segment_counts = df['segment'].value_counts()
+        fig = px.pie(
+            values=segment_counts.values,
+            names=segment_counts.index,
+            title='Customer Segment Distribution',
+            hole=0.4,
+            color_discrete_sequence=px.colors.qualitative.Set3
+        )
+        fig.update_layout(height=400, title_x=0.5)
+        return fig
+    @staticmethod
+    def create_rfm_scatter(df):
+        """RFM behavior matrix"""
+        fig = px.scatter(
+            df, x='recency_days', y='frequency', size='monetary',
+            color='segment', title='Customer Behavior Matrix (RFM)',
+            labels={'recency_days': 'Days Since Last Order', 'frequency': 'Order Count'}
+        )
+        fig.update_layout(height=400, title_x=0.5)
+        return fig
+    @staticmethod
+    def create_churn_distribution(df):
+        """Churn probability distribution"""
+        if 'churn_probability' in df.columns:
+            fig = px.histogram(
+                df, x='churn_probability', nbins=20,
+                title='Churn Probability Distribution',
+                labels={'churn_probability': 'Churn Probability'}
             )
+            fig.add_vline(x=CONFIG['high_risk_probability'], line_dash="dash",
+                         line_color="red", annotation_text="High Risk Threshold")
+        else:
+            risk_counts = df['churn_risk'].value_counts()
+            colors = {'High': COLORS['danger'], 'Medium': COLORS['warning'], 'Low': COLORS['success']}
+            fig = px.bar(
+                x=risk_counts.index, y=risk_counts.values,
+                title='Churn Risk Distribution',
+                color=risk_counts.index, color_discrete_map=colors
+            )
+        fig.update_layout(height=400, title_x=0.5)
+        return fig
+    @staticmethod
+    def create_feature_importance_chart(feature_importance):
+        """Feature importance visualization"""
+        fig = px.bar(
+            feature_importance.head(8), x='importance', y='feature',
+            orientation='h', title='Feature Importance Analysis',
+            color='importance', color_continuous_scale='viridis'
+        )
+        fig.update_layout(height=500, title_x=0.5, yaxis={'categoryorder': 'total ascending'})
+        return fig
+class ReportGenerator:
+    """Generate dashboards and PDF reports"""
+    @staticmethod
+    def create_dashboard(df, model_results=None):
+        """Generate HTML dashboard"""
+        total_customers = len(df)
+        total_revenue = df['monetary'].sum()
+        avg_order_value = df['avg_amount'].mean()
+        high_risk_count = len(df[df['churn_risk'] == 'High'])
+        dashboard_html = f"""
+        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem;">
+            <div style="background: linear-gradient(135deg, {COLORS['primary']}, #4f46e5); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
+                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Customers</h3>
+                <div style="font-size: 2.5rem; font-weight: bold;">{total_customers:,}</div>
+            </div>
+            <div style="background: linear-gradient(135deg, {COLORS['success']}, #047857); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
+                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Total Revenue</h3>
+                <div style="font-size: 2.5rem; font-weight: bold;">${total_revenue/1000:.0f}K</div>
+            </div>
+            <div style="background: linear-gradient(135deg, {COLORS['purple']}, #6d28d9); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
+                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">Avg Order Value</h3>
+                <div style="font-size: 2.5rem; font-weight: bold;">${avg_order_value:.0f}</div>
             </div>
+            <div style="background: linear-gradient(135deg, {COLORS['danger']}, #dc2626); padding: 1.5rem; border-radius: 12px; color: white; text-align: center;">
+                <h3 style="margin: 0 0 0.5rem 0; font-size: 0.9rem; opacity: 0.9;">High Risk</h3>
+                <div style="font-size: 2.5rem; font-weight: bold;">{high_risk_count}</div>
+            </div>
+        </div>
+        """
+        if model_results:
+            dashboard_html += f"""
+            <div style="background: #f8fafc; padding: 1.5rem; border-radius: 12px; border-left: 4px solid {COLORS['primary']}; margin-top: 1rem;">
+                <h4 style="margin: 0 0 1rem 0; color: #374151;">Model Performance</h4>
+                <p><strong>Model:</strong> {model_results['model_type']}</p>
+                <p><strong>Cross-validation AUC:</strong> {model_results['cv_auc_mean']:.3f} ± {model_results['cv_auc_std']:.3f}</p>
             </div>
             """
+        return dashboard_html
+    @staticmethod
+    def generate_pdf_report(df, model_results=None):
+        """Generate comprehensive PDF report"""
+        if not REPORTLAB_AVAILABLE:
+            raise ImportError("ReportLab is required for PDF generation")
+        buffer = io.BytesIO()
+        doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72,
+                               topMargin=72, bottomMargin=18)
+        styles = getSampleStyleSheet()
+        story = []
+        # Title
+        title_style = ParagraphStyle('CustomTitle', parent=styles['Title'],
+                                    fontSize=24, spaceAfter=30, alignment=1)
+        story.append(Paragraph("B2B Customer Analytics Report", title_style))
+        story.append(Spacer(1, 12))
+        # Executive Summary
+        story.append(Paragraph("Executive Summary", styles['Heading2']))
+        total_customers = len(df)
+        total_revenue = df['monetary'].sum()
+        avg_revenue = df['monetary'].mean()
+        summary_text = f"""
+        <para>This comprehensive analysis covers <b>{total_customers:,}</b> customers with
+        total revenue of <b>${total_revenue:,.0f}</b>. The average customer lifetime value
+        is <b>${avg_revenue:.0f}</b>.</para>
+        <para>Customers have been segmented using advanced RFM analysis, and machine learning
+        models have been applied for churn prediction.</para>
+        """
+        story.append(Paragraph(summary_text, styles['Normal']))
+        story.append(Spacer(1, 12))
+        # Customer Segments
+        story.append(Paragraph("Customer Segmentation", styles['Heading2']))
+        segment_data = df['segment'].value_counts()
+        segment_table_data = [['Segment', 'Count', 'Percentage']]
+        for segment, count in segment_data.items():
+            percentage = f"{count/len(df)*100:.1f}%"
+            segment_table_data.append([segment, str(count), percentage])
+        segment_table = Table(segment_table_data)
+        segment_table.setStyle(TableStyle([
+            ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+            ('FONTSIZE', (0, 0), (-1, 0), 14),
+            ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+            ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
+            ('GRID', (0, 0), (-1, -1), 1, colors.black)
+        ]))
+        story.append(segment_table)
+        story.append(Spacer(1, 12))
+        # Model Performance
+        if model_results:
+            story.append(Paragraph("Churn Prediction Model", styles['Heading2']))
+            model_text = f"""
+            <para><b>Model Type:</b> {model_results['model_type']}</para>
+            <para><b>Cross-validation AUC:</b> {model_results['cv_auc_mean']:.3f} ± {model_results['cv_auc_std']:.3f}</para>
+            <para>The model uses advanced feature engineering including behavioral patterns
+            and customer lifecycle metrics for accurate churn prediction.</para>
+            """
+            story.append(Paragraph(model_text, styles['Normal']))
+            story.append(Spacer(1, 12))
+            # Top features
+            if not model_results['feature_importance'].empty:
+                story.append(Paragraph("Key Predictive Features", styles['Heading3']))
+                feature_table_data = [['Feature', 'Importance']]
+                for _, row in model_results['feature_importance'].head(5).iterrows():
+                    feature_table_data.append([row['feature'].replace('_', ' ').title(), f"{row['importance']:.3f}"])
+                feature_table = Table(feature_table_data)
+                feature_table.setStyle(TableStyle([
+                    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+                    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+                    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+                    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                    ('GRID', (0, 0), (-1, -1), 1, colors.black)
+                ]))
+                story.append(feature_table)
+        # Build PDF
+        doc.build(story)
+        pdf_bytes = buffer.getvalue()
+        buffer.close()
+        return pdf_bytes
+class B2BAnalyticsApp:
+    """Main application orchestrator"""
+    def __init__(self):
+        self.raw_data = None
+        self.customer_features = None
+        self.segmented_data = None
+        self.model_results = None
+        self.predictor = ChurnPredictor()
+    def load_data(self, file):
+        """Load and process uploaded file"""
         try:
+            if file is None:
+                return "Please upload a CSV file", None, None
+            # Load and process data
+            self.raw_data = DataProcessor.load_and_validate(file.name)
+            self.customer_features = FeatureEngineering.calculate_rfm_features(self.raw_data)
+            self.segmented_data = CustomerSegmenter.perform_segmentation(self.customer_features)
+            # Generate dashboard
+            dashboard = ReportGenerator.create_dashboard(self.segmented_data)
+            preview = self.segmented_data.head(20)
+            status = f"Successfully processed {len(self.segmented_data)} customers from {len(self.raw_data)} transactions"
+            return status, dashboard, preview
         except Exception as e:
+            return f"Error: {str(e)}", None, None
+    def train_churn_model(self):
+        """Train churn prediction model"""
         try:
+            if self.segmented_data is None:
+                return "Please load data first", None
+            self.model_results = self.predictor.train(self.segmented_data)
+            # Update dashboard with model results
+            dashboard = ReportGenerator.create_dashboard(self.segmented_data, self.model_results)
+            # Create feature importance chart
+            importance_chart = Visualizer.create_feature_importance_chart(
+                self.model_results['feature_importance']
             )
+            return dashboard, importance_chart
         except Exception as e:
+            return f"Error: {str(e)}", None
     def create_visualizations(self):
+        """Generate all visualization charts"""
+        if self.segmented_data is None:
+            return None, None, None
         try:
+            # Use predictions if available, otherwise use segmented data
+            data_for_viz = (self.model_results['predictions'] if self.model_results
+                           else self.segmented_data)
+            segment_chart = Visualizer.create_segment_chart(data_for_viz)
+            rfm_chart = Visualizer.create_rfm_scatter(data_for_viz)
+            churn_chart = Visualizer.create_churn_distribution(data_for_viz)
+            return segment_chart, rfm_chart, churn_chart
         except Exception as e:
+            print(f"Visualization error: {e}")
+            return None, None, None
+    def get_customer_summary_table(self):
+        """Generate customer summary table"""
+        if self.segmented_data is None:
             return None
         try:
+            display_data = self.segmented_data.copy()
+            # Add predictions if available
+            if self.model_results:
+                pred_data = self.model_results['predictions']
+                display_data = display_data.merge(
+                    pred_data[['customer_id', 'churn_probability']],
+                    on='customer_id', how='left'
                 )
+                display_data['churn_probability'] = (display_data['churn_probability'] * 100).round(1)
             else:
+                display_data['churn_probability'] = 50.0
+            # Select and format columns
+            summary_table = display_data[[
+                'customer_id', 'segment', 'churn_risk', 'recency_days',
+                'frequency', 'monetary', 'avg_amount', 'churn_probability'
+            ]].round(2)
+            summary_table.columns = [
+                'Customer ID', 'Segment', 'Risk Level', 'Recency (Days)',
+                'Orders', 'Total Revenue ($)', 'Avg Order ($)', 'Churn Risk (%)'
             ]
+            return summary_table.head(100)
         except Exception as e:
+            print(f"Table generation error: {e}")
             return None
     def generate_pdf_report(self):
+        """Generate and return PDF report"""
         try:
+            if self.segmented_data is None:
+                return None
+            pdf_bytes = ReportGenerator.generate_pdf_report(
+                self.segmented_data, self.model_results
+            )
+            # Save to temporary file for download
+            import tempfile
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+                tmp_file.write(pdf_bytes)
+                return tmp_file.name
         except Exception as e:
+            print(f"PDF generation error: {e}")
+            return None
+def create_interface():
+    """Create Gradio interface"""
+    app = B2BAnalyticsApp()
+    with gr.Blocks(theme=gr.themes.Soft(), title="B2B Customer Analytics") as demo:
         gr.HTML("""
+        <div style="background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
+                    padding: 2rem; border-radius: 1rem; color: white; text-align: center; margin-bottom: 2rem;">
+            <h1 style="font-size: 2.5rem; font-weight: bold; margin-bottom: 0.5rem;">
+                B2B Customer Analytics Platform
+            </h1>
+            <p style="font-size: 1.1rem; opacity: 0.9;">
+                Advanced Customer Segmentation & Churn Prediction
+            </p>
         </div>
         """)
         with gr.Tabs():
+            # Data Upload Tab
             with gr.Tab("Data Upload & Dashboard"):
                 with gr.Row():
+                    file_input = gr.File(label="Upload Customer Data CSV", file_types=[".csv"])
+                    load_btn = gr.Button("Load & Process Data", variant="primary", size="lg")
+                load_status = gr.Textbox(label="Status", interactive=False)
+                dashboard_display = gr.HTML()
                 data_preview = gr.DataFrame(label="Data Preview")
+            # Segmentation Tab
             with gr.Tab("Customer Segmentation"):
                 with gr.Row():
+                    segment_chart = gr.Plot(label="Customer Segments")
+                    rfm_chart = gr.Plot(label="RFM Analysis")
+                customer_table = gr.DataFrame(label="Customer Summary")
+            # Churn Prediction Tab
             with gr.Tab("Churn Prediction"):
                 train_btn = gr.Button("Train Churn Model", variant="primary", size="lg")
+                model_dashboard = gr.HTML()
                 with gr.Row():
+                    importance_chart = gr.Plot(label="Feature Importance")
+                    churn_dist_chart = gr.Plot(label="Churn Risk Distribution")
+            # Reports Tab
             with gr.Tab("Reports"):
                 report_btn = gr.Button("Generate PDF Report", variant="primary", size="lg")
+                report_status = gr.Textbox(label="Status", interactive=False)
                 report_file = gr.File(label="Download Report")
+        # Event handlers
+        def load_and_visualize(file):
+            status, dashboard, preview = app.load_data(file)
+            if "Successfully" in status:
+                charts = app.create_visualizations()
+                table = app.get_customer_summary_table()
+                return status, dashboard, preview, charts[0], charts[1], table
+            return status, dashboard, preview, None, None, None
+        def train_and_update():
+            dashboard, importance = app.train_churn_model()
+            if "Error" not in dashboard:
+                charts = app.create_visualizations()
+                return dashboard, importance, charts[2]
+            return dashboard, importance, None
+        def generate_report():
+            report_path = app.generate_pdf_report()
+            if report_path:
+                return "PDF report generated successfully", report_path
+            return "Error generating PDF report", None
+        # Connect events
         load_btn.click(
+            fn=load_and_visualize,
             inputs=[file_input],
+            outputs=[load_status, dashboard_display, data_preview,
+                    segment_chart, rfm_chart, customer_table]
         )
         train_btn.click(
+            fn=train_and_update,
+            outputs=[model_dashboard, importance_chart, churn_dist_chart]
+        )