Spaces:

entropy25
/

customer

Sleeping

App Files Files Community

entropy25 commited on Aug 28, 2025

Commit

45a90de

verified ·

1 Parent(s): 0a4c6d8

Create app.py

Browse files

Files changed (1) hide show

app.py +544 -0

app.py ADDED Viewed

	@@ -0,0 +1,544 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+import xgboost as xgb
+from datetime import datetime, timedelta
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import plotly.io as pio
+from reportlab.lib.pagesizes import letter, A4
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib.units import inch
+from reportlab.lib import colors
+import io
+import base64
+import warnings
+warnings.filterwarnings('ignore')
+# Set plotting style
+plt.style.use('default')
+sns.set_palette("husl")
+class B2BCustomerAnalytics:
+    def __init__(self):
+        self.df = None
+        self.model = None
+        self.feature_importance = None
+        self.predictions = None
+    def load_and_process_data(self, file):
+        """Load and process the uploaded CSV file"""
+        try:
+            if file is None:
+                return "Please upload a CSV file", None, None
+            # Read the CSV file
+            self.df = pd.read_csv(file.name)
+            # Basic data validation
+            required_columns = ['customer_id', 'order_date', 'amount']
+            missing_cols = [col for col in required_columns if col not in self.df.columns]
+            if missing_cols:
+                return f"Missing required columns: {missing_cols}", None, None
+            # Convert order_date to datetime
+            self.df['order_date'] = pd.to_datetime(self.df['order_date'])
+            # Calculate RFM metrics if not present
+            if 'recency_days' not in self.df.columns or 'frequency' not in self.df.columns or 'monetary' not in self.df.columns:
+                self.df = self.calculate_rfm_metrics(self.df)
+            # Customer segmentation
+            self.df = self.perform_customer_segmentation(self.df)
+            # Prepare summary
+            summary = self.generate_data_summary()
+            return "Data loaded successfully!", summary, self.df.head(10)
+        except Exception as e:
+            return f"Error loading data: {str(e)}", None, None
+    def calculate_rfm_metrics(self, df):
+        """Calculate RFM metrics from transaction data"""
+        current_date = df['order_date'].max() + timedelta(days=1)
+        # Group by customer
+        customer_metrics = df.groupby('customer_id').agg({
+            'order_date': ['max', 'count'],
+            'amount': ['sum', 'mean']
+        }).round(2)
+        customer_metrics.columns = ['last_order_date', 'frequency', 'monetary', 'avg_order_value']
+        customer_metrics['recency_days'] = (current_date - customer_metrics['last_order_date']).dt.days
+        # Merge back with original data
+        df_with_rfm = df.merge(customer_metrics[['recency_days', 'frequency', 'monetary']],
+                               left_on='customer_id', right_index=True, how='left')
+        return df_with_rfm
+    def perform_customer_segmentation(self, df):
+        """Perform customer segmentation based on RFM analysis"""
+        customer_df = df.groupby('customer_id').agg({
+            'recency_days': 'first',
+            'frequency': 'first',
+            'monetary': 'first'
+        }).reset_index()
+        # Create RFM scores (1-5 scale)
+        customer_df['R_Score'] = pd.qcut(customer_df['recency_days'].rank(method='first'), 5, labels=[5,4,3,2,1])
+        customer_df['F_Score'] = pd.qcut(customer_df['frequency'].rank(method='first'), 5, labels=[1,2,3,4,5])
+        customer_df['M_Score'] = pd.qcut(customer_df['monetary'].rank(method='first'), 5, labels=[1,2,3,4,5])
+        # Convert to numeric
+        customer_df['R_Score'] = customer_df['R_Score'].astype(int)
+        customer_df['F_Score'] = customer_df['F_Score'].astype(int)
+        customer_df['M_Score'] = customer_df['M_Score'].astype(int)
+        # Create segments
+        def segment_customers(row):
+            if row['R_Score'] >= 4 and row['F_Score'] >= 4 and row['M_Score'] >= 4:
+                return 'Champions'
+            elif row['R_Score'] >= 3 and row['F_Score'] >= 3 and row['M_Score'] >= 3:
+                return 'Loyal Customers'
+            elif row['R_Score'] >= 3 and row['F_Score'] >= 2:
+                return 'Potential Loyalists'
+            elif row['R_Score'] >= 4 and row['F_Score'] <= 2:
+                return 'New Customers'
+            elif row['R_Score'] <= 2 and row['F_Score'] >= 3:
+                return 'At Risk'
+            elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] >= 3:
+                return 'Cannot Lose Them'
+            elif row['R_Score'] <= 2 and row['F_Score'] <= 2 and row['M_Score'] <= 2:
+                return 'Lost Customers'
+            else:
+                return 'Others'
+        customer_df['Segment'] = customer_df.apply(segment_customers, axis=1)
+        # Calculate churn risk
+        customer_df['Churn_Risk'] = customer_df.apply(lambda x:
+            'High' if x['Segment'] in ['Lost Customers', 'At Risk'] else
+            'Medium' if x['Segment'] in ['Others', 'Cannot Lose Them'] else 'Low', axis=1)
+        # Merge segments back to original data
+        segment_data = customer_df[['customer_id', 'Segment', 'Churn_Risk', 'R_Score', 'F_Score', 'M_Score']]
+        df_with_segments = df.merge(segment_data, on='customer_id', how='left')
+        return df_with_segments
+    def generate_data_summary(self):
+        """Generate data summary statistics"""
+        if self.df is None:
+            return "No data loaded"
+        total_customers = self.df['customer_id'].nunique()
+        total_orders = len(self.df)
+        total_revenue = self.df['amount'].sum()
+        avg_order_value = self.df['amount'].mean()
+        # Segment distribution
+        segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
+        summary = f"""
+        📊 **DATA OVERVIEW**
+        • Total Customers: {total_customers:,}
+        • Total Orders: {total_orders:,}
+        • Total Revenue: ${total_revenue:,.2f}
+        • Average Order Value: ${avg_order_value:.2f}
+        🎯 **CUSTOMER SEGMENTS**
+        {segment_dist.to_string()}
+        ⚠️ **CHURN ANALYSIS**
+        • High Risk: {len(self.df[self.df['Churn_Risk'] == 'High']['customer_id'].unique())} customers
+        • Medium Risk: {len(self.df[self.df['Churn_Risk'] == 'Medium']['customer_id'].unique())} customers
+        • Low Risk: {len(self.df[self.df['Churn_Risk'] == 'Low']['customer_id'].unique())} customers
+        """
+        return summary
+    def train_churn_model(self):
+        """Train churn prediction model"""
+        if self.df is None:
+            return "No data available. Please upload a CSV file first."
+        try:
+            # Prepare data for modeling
+            customer_features = self.df.groupby('customer_id').agg({
+                'recency_days': 'first',
+                'frequency': 'first',
+                'monetary': 'first',
+                'amount': ['mean', 'std', 'min', 'max'],
+                'order_date': ['min', 'max']
+            }).reset_index()
+            # Flatten column names
+            customer_features.columns = ['customer_id', 'recency_days', 'frequency', 'monetary',
+                                       'avg_amount', 'std_amount', 'min_amount', 'max_amount',
+                                       'first_order', 'last_order']
+            # Fill NaN values
+            customer_features['std_amount'].fillna(0, inplace=True)
+            # Calculate additional features
+            customer_features['customer_lifetime'] = (customer_features['last_order'] - customer_features['first_order']).dt.days
+            customer_features['customer_lifetime'].fillna(0, inplace=True)
+            # Create churn labels (if not present)
+            if 'churn_label' not in self.df.columns:
+                # Define churn based on recency (customers who haven't ordered in 90+ days)
+                customer_features['churn_label'] = (customer_features['recency_days'] > 90).astype(int)
+            else:
+                churn_labels = self.df.groupby('customer_id')['churn_label'].first().reset_index()
+                customer_features = customer_features.merge(churn_labels, on='customer_id')
+            # Select features for modeling
+            feature_cols = ['recency_days', 'frequency', 'monetary', 'avg_amount', 'std_amount',
+                           'min_amount', 'max_amount', 'customer_lifetime']
+            X = customer_features[feature_cols]
+            y = customer_features['churn_label']
+            # Split data
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
+            # Train XGBoost model
+            self.model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
+            self.model.fit(X_train, y_train)
+            # Make predictions
+            y_pred = self.model.predict(X_test)
+            y_pred_proba = self.model.predict_proba(X_test)[:, 1]
+            # Calculate feature importance
+            self.feature_importance = pd.DataFrame({
+                'feature': feature_cols,
+                'importance': self.model.feature_importances_
+            }).sort_values('importance', ascending=False)
+            # Generate predictions for all customers
+            all_predictions = self.model.predict_proba(X)[:, 1]
+            customer_features['churn_probability'] = all_predictions
+            self.predictions = customer_features
+            # Model performance
+            accuracy = accuracy_score(y_test, y_pred)
+            return f"""
+            🤖 **MODEL TRAINING COMPLETED**
+            • Model: XGBoost Classifier
+            • Accuracy: {accuracy:.3f}
+            • Features Used: {len(feature_cols)}
+            • Training Samples: {len(X_train)}
+            • Test Samples: {len(X_test)}
+            🔍 **TOP FEATURES**
+            {self.feature_importance.head().to_string(index=False)}
+            """
+        except Exception as e:
+            return f"Error training model: {str(e)}"
+    def create_visualizations(self):
+        """Create comprehensive visualizations"""
+        if self.df is None:
+            return None, None, None, None
+        fig1 = self.create_segment_analysis()
+        fig2 = self.create_rfm_analysis()
+        fig3 = self.create_churn_analysis()
+        fig4 = self.create_revenue_trends()
+        return fig1, fig2, fig3, fig4
+    def create_segment_analysis(self):
+        """Create customer segment analysis visualization"""
+        # Customer segment distribution
+        segment_data = self.df.groupby('customer_id')['Segment'].first().value_counts().reset_index()
+        segment_data.columns = ['Segment', 'Count']
+        fig = px.pie(segment_data, values='Count', names='Segment',
+                    title='Customer Segment Distribution',
+                    color_discrete_sequence=px.colors.qualitative.Set3)
+        fig.update_traces(textposition='inside', textinfo='percent+label')
+        fig.update_layout(height=400, showlegend=True)
+        return fig
+    def create_rfm_analysis(self):
+        """Create RFM analysis visualization"""
+        customer_rfm = self.df.groupby('customer_id').agg({
+            'recency_days': 'first',
+            'frequency': 'first',
+            'monetary': 'first',
+            'Segment': 'first'
+        }).reset_index()
+        fig = px.scatter_3d(customer_rfm, x='recency_days', y='frequency', z='monetary',
+                           color='Segment', title='RFM Analysis - 3D Customer Mapping',
+                           labels={'recency_days': 'Recency (Days)',
+                                  'frequency': 'Frequency (Orders)',
+                                  'monetary': 'Monetary (Revenue)'})
+        fig.update_layout(height=500)
+        return fig
+    def create_churn_analysis(self):
+        """Create churn risk analysis"""
+        if self.predictions is not None:
+            fig = px.histogram(self.predictions, x='churn_probability', nbins=20,
+                              title='Churn Probability Distribution',
+                              labels={'churn_probability': 'Churn Probability',
+                                     'count': 'Number of Customers'})
+            fig.add_vline(x=0.5, line_dash="dash", line_color="red",
+                         annotation_text="High Risk Threshold")
+            fig.update_layout(height=400)
+            return fig
+        else:
+            # Fallback to risk level distribution
+            risk_data = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts().reset_index()
+            risk_data.columns = ['Risk_Level', 'Count']
+            colors_map = {'High': 'red', 'Medium': 'orange', 'Low': 'green'}
+            fig = px.bar(risk_data, x='Risk_Level', y='Count',
+                        title='Customer Churn Risk Distribution',
+                        color='Risk_Level', color_discrete_map=colors_map)
+            fig.update_layout(height=400, showlegend=False)
+            return fig
+    def create_revenue_trends(self):
+        """Create revenue trend analysis"""
+        # Monthly revenue trends
+        self.df['order_month'] = self.df['order_date'].dt.to_period('M')
+        monthly_revenue = self.df.groupby('order_month')['amount'].sum().reset_index()
+        monthly_revenue['order_month'] = monthly_revenue['order_month'].astype(str)
+        fig = px.line(monthly_revenue, x='order_month', y='amount',
+                     title='Monthly Revenue Trends',
+                     labels={'amount': 'Revenue ($)', 'order_month': 'Month'})
+        fig.update_layout(height=400, xaxis_tickangle=-45)
+        return fig
+    def generate_pdf_report(self):
+        """Generate comprehensive PDF report"""
+        if self.df is None:
+            return None
+        try:
+            buffer = io.BytesIO()
+            doc = SimpleDocTemplate(buffer, pagesize=A4, rightMargin=72, leftMargin=72,
+                                  topMargin=72, bottomMargin=18)
+            styles = getSampleStyleSheet()
+            title_style = ParagraphStyle(
+                'CustomTitle',
+                parent=styles['Heading1'],
+                fontSize=24,
+                spaceAfter=30,
+                textColor=colors.darkblue,
+                alignment=1  # Center alignment
+            )
+            story = []
+            # Title
+            story.append(Paragraph("B2B Customer Analytics Report", title_style))
+            story.append(Spacer(1, 20))
+            # Executive Summary
+            story.append(Paragraph("Executive Summary", styles['Heading2']))
+            total_customers = self.df['customer_id'].nunique()
+            total_revenue = self.df['amount'].sum()
+            avg_order_value = self.df['amount'].mean()
+            high_risk_customers = len(self.df[self.df['Churn_Risk'] == 'High']['customer_id'].unique())
+            summary_text = f"""
+            This report provides a comprehensive analysis of {total_customers} B2B customers based on their
+            transaction history and behavioral patterns. The analysis reveals total revenue of ${total_revenue:,.2f}
+            with an average order value of ${avg_order_value:.2f}.
+            Key findings indicate {high_risk_customers} customers are at high risk of churning, requiring
+            immediate attention to prevent revenue loss. The customer segmentation analysis identifies
+            opportunities for targeted marketing and retention strategies.
+            """
+            story.append(Paragraph(summary_text, styles['Normal']))
+            story.append(Spacer(1, 20))
+            # Key Metrics Table
+            story.append(Paragraph("Key Performance Metrics", styles['Heading2']))
+            segment_dist = self.df.groupby('customer_id')['Segment'].first().value_counts()
+            risk_dist = self.df.groupby('customer_id')['Churn_Risk'].first().value_counts()
+            metrics_data = [
+                ['Metric', 'Value'],
+                ['Total Customers', f"{total_customers:,}"],
+                ['Total Revenue', f"${total_revenue:,.2f}"],
+                ['Average Order Value', f"${avg_order_value:.2f}"],
+                ['Champions', f"{segment_dist.get('Champions', 0)}"],
+                ['At Risk Customers', f"{segment_dist.get('At Risk', 0)}"],
+                ['High Risk Churn', f"{risk_dist.get('High', 0)}"],
+                ['Low Risk Churn', f"{risk_dist.get('Low', 0)}"]
+            ]
+            metrics_table = Table(metrics_data, colWidths=[3*inch, 2*inch])
+            metrics_table.setStyle(TableStyle([
+                ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                ('FONTSIZE', (0, 0), (-1, 0), 14),
+                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+                ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
+                ('GRID', (0, 0), (-1, -1), 1, colors.black)
+            ]))
+            story.append(metrics_table)
+            story.append(Spacer(1, 20))
+            # Customer Segments Analysis
+            story.append(Paragraph("Customer Segmentation Analysis", styles['Heading2']))
+            segment_analysis = """
+            Customer segmentation based on RFM (Recency, Frequency, Monetary) analysis reveals distinct
+            customer groups with different behavioral patterns and value propositions:
+            • Champions: High-value customers who buy frequently and recently
+            • Loyal Customers: Consistent buyers with good purchase history
+            • At Risk: Previously good customers showing declining engagement
+            • Lost Customers: Haven't purchased recently, need win-back campaigns
+            """
+            story.append(Paragraph(segment_analysis, styles['Normal']))
+            story.append(Spacer(1, 20))
+            # Recommendations
+            story.append(Paragraph("Strategic Recommendations", styles['Heading2']))
+            recommendations = """
+            Based on the analysis, we recommend the following actions:
+            1. Immediate Attention: Contact high-risk customers within 48 hours to prevent churn
+            2. Retention Programs: Develop targeted campaigns for 'At Risk' segment customers
+            3. Loyalty Rewards: Enhance programs for Champions and Loyal Customers to maintain engagement
+            4. Win-back Campaigns: Create special offers for Lost Customers to reactivate them
+            5. Predictive Monitoring: Implement real-time churn prediction alerts
+            """
+            story.append(Paragraph(recommendations, styles['Normal']))
+            # Build PDF
+            doc.build(story)
+            buffer.seek(0)
+            return buffer.getvalue()
+        except Exception as e:
+            print(f"Error generating PDF: {str(e)}")
+            return None
+# Initialize the analytics engine
+analytics = B2BCustomerAnalytics()
+def process_file(file):
+    """Process uploaded file and return analysis"""
+    if file is None:
+        return "Please upload a CSV file", "", None, None, None, None, None
+    # Load and process data
+    status, summary, preview = analytics.load_and_process_data(file)
+    if "successfully" in status:
+        # Train model
+        model_results = analytics.train_churn_model()
+        # Create visualizations
+        fig1, fig2, fig3, fig4 = analytics.create_visualizations()
+        return status, summary, preview, model_results, fig1, fig2, fig3, fig4
+    else:
+        return status, summary, preview, "", None, None, None, None
+def download_report():
+    """Generate and return PDF report"""
+    pdf_data = analytics.generate_pdf_report()
+    if pdf_data:
+        return pdf_data
+    else:
+        return None
+# Create Gradio Interface
+with gr.Blocks(title="B2B Customer Analytics", theme=gr.themes.Soft()) as app:
+    gr.Markdown("""
+    # 🏢 B2B Customer Analytics Platform
+    Upload your customer transaction data (CSV format) to get comprehensive insights including:
+    - **Customer Segmentation** (RFM Analysis)
+    - **Churn Prediction** (ML-powered)
+    - **Revenue Analysis** & Trends
+    - **Strategic Recommendations**
+    - **Downloadable PDF Report**
+    ### Required CSV Format:
+    `customer_id, order_id, order_date, amount` (minimum required columns)
+    Optional columns: `recency_days, frequency, monetary, churn_label`
+    """)
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(label="Upload Customer Data (CSV)", file_types=[".csv"])
+            analyze_btn = gr.Button("🔍 Analyze Customer Data", variant="primary", size="lg")
+        with gr.Column():
+            download_btn = gr.Button("📄 Download PDF Report", variant="secondary", size="lg")
+            pdf_output = gr.File(label="PDF Report", visible=False)
+    # Status and Summary
+    with gr.Row():
+        status_output = gr.Textbox(label="Status", interactive=False)
+        summary_output = gr.Markdown(label="Data Summary")
+    # Data Preview
+    data_preview = gr.Dataframe(label="Data Preview", interactive=False)
+    # Model Results
+    model_output = gr.Markdown(label="Model Training Results")
+    # Visualizations
+    with gr.Row():
+        with gr.Column():
+            plot1 = gr.Plot(label="Customer Segments")
+            plot3 = gr.Plot(label="Churn Analysis")
+        with gr.Column():
+            plot2 = gr.Plot(label="RFM Analysis")
+            plot4 = gr.Plot(label="Revenue Trends")
+    # Event handlers
+    analyze_btn.click(
+        fn=process_file,
+        inputs=[file_input],
+        outputs=[status_output, summary_output, data_preview, model_output, plot1, plot2, plot3, plot4]
+    )
+    download_btn.click(
+        fn=download_report,
+        outputs=[pdf_output]
+    )
+if __name__ == "__main__":
+    app.launch()