Spaces:

Abs6187
/

TELECOM_CHURN_CAPSTONE

Sleeping

App Files Files Community

Abs6187 commited on Oct 17, 2025

Commit

4fcf028

verified ·

1 Parent(s): c5a7400

Update app.py

Browse files

Files changed (1) hide show

app.py +246 -280

app.py CHANGED Viewed

@@ -1,280 +1,246 @@
-import gradio as gr
-import joblib
-import pandas as pd
-import numpy as np
-import os
-from PIL import Image
-# Load models and encoders
-def load_models():
-    try:
-        model = joblib.load('models/churn_model.pkl')
-        encoders = joblib.load('models/label_encoders.pkl')
-        try:
-            scaler = joblib.load('models/scaler.pkl')
-        except:
-            scaler = None
-        return model, encoders, scaler
-    except Exception as e:
-        print(f"Error loading models: {e}")
-        return None, None, None
-model, encoders, scaler = load_models()
-# Define feature options based on the training data
-REGIONS = ['North', 'South', 'East', 'West', 'Central']
-PLAN_TYPES = ['Prepaid', 'Postpaid']
-CONTRACT_TYPES = ['Month-to-month', 'One year', 'Two year']
-COMPLAINT_STATUS = ['Open', 'Closed', 'Not Applicable']
-def predict_churn(customer_id, region, plan_type, monthly_charges, total_charges,
-                 tenure_months, contract_type, paperless_billing, payment_method,
-                 data_usage_gb, call_minutes, sms_count, complaint_status, complaint_count):
-    if model is None:
-        return "Model not loaded properly", 0.0
-    try:
-        # Create input dataframe
-        input_data = pd.DataFrame({
-            'customer_id': [customer_id],
-            'region': [region],
-            'plan_type': [plan_type],
-            'monthly_charges': [monthly_charges],
-            'total_charges': [total_charges],
-            'tenure_months': [tenure_months],
-            'contract_type': [contract_type],
-            'paperless_billing': [1 if paperless_billing else 0],
-            'payment_method': [payment_method],
-            'data_usage_gb': [data_usage_gb],
-            'call_minutes': [call_minutes],
-            'sms_count': [sms_count],
-            'complaint_status': [complaint_status],
-            'complaint_count': [complaint_count]
-        })
-        # Encode categorical variables
-        categorical_columns = ['region', 'plan_type', 'contract_type', 'payment_method', 'complaint_status']
-        for col in categorical_columns:
-            if col in encoders:
-                try:
-                    input_data[col] = encoders[col].transform(input_data[col])
-                except ValueError:
-                    # Handle unseen labels by using the most frequent class
-                    input_data[col] = 0
-        # Drop customer_id for prediction
-        prediction_data = input_data.drop(['customer_id'], axis=1)
-        # Scale features if scaler exists
-        if scaler is not None:
-            prediction_data = scaler.transform(prediction_data)
-        # Make prediction
-        churn_probability = model.predict_proba(prediction_data)[0][1]
-        churn_prediction = "HIGH RISK" if churn_probability > 0.5 else "LOW RISK"
-        # Create detailed result
-        risk_level = "🔴 HIGH RISK" if churn_probability > 0.7 else "🟡 MEDIUM RISK" if churn_probability > 0.4 else "🟢 LOW RISK"
-        result = f"""
-        **Prediction Result for Customer {customer_id}**
-        **Churn Risk:** {risk_level}
-        **Churn Probability:** {churn_probability:.1%}
-        **Recommendation:** {'Immediate retention action required' if churn_probability > 0.7 else 'Monitor and engage' if churn_probability > 0.4 else 'Standard service maintenance'}
-        """
-        return result, churn_probability
-    except Exception as e:
-        return f"Error in prediction: {str(e)}", 0.0
-def load_image(image_name):
-    """Load and return image from the images folder"""
-    try:
-        img_path = f"images/{image_name}"
-        if os.path.exists(img_path):
-            return Image.open(img_path)
-        else:
-            return None
-    except Exception as e:
-        print(f"Error loading image {image_name}: {e}")
-        return None
-# Create Gradio interface
-with gr.Blocks(title="Telecom Churn Prediction - BRBRAITT Group 5", theme=gr.themes.Soft()) as app:
-    # Header
-    gr.Markdown("""
-    # 🔮 Telecom Churn Prediction System
-    **TIRTC Course: Advance AI/ML Training - Telecom Data Analytics (Nokia)**
-    **Institution:** BRBRAITT, Jabalpur
-    **Group 5 Members:**
-    - Abhay Gupta
-    - Jay Kumar
-    - Kripanshu Gupta
-    - Ruhy Namdeo
-    ---
-    This AI-powered system predicts customer churn with **90% accuracy** using Random Forest ML model.
-    """)
-    with gr.Tabs():
-        # Tab 1: Prediction Interface
-        with gr.TabItem("🎯 Churn Prediction"):
-            gr.Markdown("### Enter Customer Details for Churn Prediction")
-            with gr.Row():
-                with gr.Column():
-                    customer_id = gr.Textbox(label="Customer ID", placeholder="e.g., CUST001", value="CUST001")
-                    region = gr.Dropdown(choices=REGIONS, label="Region", value="North")
-                    plan_type = gr.Dropdown(choices=PLAN_TYPES, label="Plan Type", value="Postpaid")
-                    contract_type = gr.Dropdown(choices=CONTRACT_TYPES, label="Contract Type", value="Month-to-month")
-                with gr.Column():
-                    monthly_charges = gr.Number(label="Monthly Charges (₹)", value=1000, minimum=0)
-                    total_charges = gr.Number(label="Total Charges (₹)", value=12000, minimum=0)
-                    tenure_months = gr.Number(label="Tenure (Months)", value=12, minimum=0, maximum=120)
-                    paperless_billing = gr.Checkbox(label="Paperless Billing", value=True)
-                with gr.Column():
-                    payment_method = gr.Dropdown(choices=['Electronic check', 'Mailed check', 'Bank transfer', 'Credit card'],
-                                               label="Payment Method", value="Electronic check")
-                    data_usage_gb = gr.Number(label="Data Usage (GB)", value=15, minimum=0)
-                    call_minutes = gr.Number(label="Call Minutes", value=500, minimum=0)
-                    sms_count = gr.Number(label="SMS Count", value=100, minimum=0)
-            with gr.Row():
-                complaint_status = gr.Dropdown(choices=COMPLAINT_STATUS, label="Complaint Status", value="Not Applicable")
-                complaint_count = gr.Number(label="Complaint Count", value=0, minimum=0)
-            predict_btn = gr.Button("🔮 Predict Churn Risk", variant="primary", size="lg")
-            with gr.Row():
-                prediction_output = gr.Markdown()
-                probability_gauge = gr.Number(label="Churn Probability", interactive=False)
-            predict_btn.click(
-                fn=predict_churn,
-                inputs=[customer_id, region, plan_type, monthly_charges, total_charges,
-                       tenure_months, contract_type, paperless_billing, payment_method,
-                       data_usage_gb, call_minutes, sms_count, complaint_status, complaint_count],
-                outputs=[prediction_output, probability_gauge]
-            )
-        # Tab 2: Business Insights
-        with gr.TabItem("📊 Business Insights"):
-            gr.Markdown("### Key Findings from Analysis")
-            insights = gr.Markdown("""
-            ## 🎯 Model Performance
-            - **Accuracy:** 90%
-            - **AUC Score:** 0.95
-            - **Best Algorithm:** Random Forest Classifier
-            ## 💼 Business Impact
-            - **Current Churn Rate:** 50%
-            - **Revenue at Risk:** ₹12,250+ monthly
-            - **Annual Loss:** ₹147,000+ potential
-            - **Savings Opportunity:** ₹36,750+ with 25% churn reduction
-            ## 🔴 Top Risk Factors
-            1. **Contract Type:** Month-to-month customers (100% churn rate)
-            2. **Tenure:** New customers (0-12 months) at highest risk
-            3. **Complaints:** Open complaints double churn likelihood
-            4. **Plan Type:** Significant differences between Prepaid/Postpaid
-            ## 📈 Recommendations
-            ### Immediate Actions
-            - Target month-to-month customers for contract upgrades
-            - Implement 90-day new customer check-in program
-            - Prioritize complaint resolution within 48 hours
-            ### Long-term Strategy
-            - Deploy real-time churn scoring system
-            - Implement tiered retention programs
-            - A/B test retention campaigns
-            """)
-        # Tab 3: Visualizations
-        with gr.TabItem("📈 Data Visualizations"):
-            gr.Markdown("### Comprehensive Analysis Dashboard")
-            # Load and display images
-            image_files = [
-                ("churn_distribution.png", "Overall Churn Distribution"),
-                ("churn_by_contract.png", "Churn by Contract Type"),
-                ("churn_by_plan.png", "Churn by Plan Type"),
-                ("churn_by_region.png", "Regional Churn Analysis"),
-                ("tenure_vs_churn.png", "Tenure vs Churn Pattern"),
-                ("revenue_vs_churn.png", "Revenue Impact Analysis"),
-                ("complaints_analysis.png", "Complaints Impact on Churn"),
-                ("correlation_matrix.png", "Feature Correlation Matrix"),
-                ("feature_coefficients.png", "Model Feature Importance")
-            ]
-            for img_file, title in image_files:
-                img = load_image(img_file)
-                if img is not None:
-                    with gr.Row():
-                        gr.Markdown(f"#### {title}")
-                    with gr.Row():
-                        gr.Image(img, label=title, show_label=False)
-                else:
-                    gr.Markdown(f"*{title} - Image not available*")
-        # Tab 4: About Project
-        with gr.TabItem("ℹ️ About Project"):
-            gr.Markdown("""
-            ## 🎓 Academic Project Details
-            **Course:** TIRTC - Advance AI/ML Training - Telecom Data Analytics (Nokia)
-            **Institution:** BRBRAITT (Bharat Ratna Bhimrao Ambedkar Institute of Technology and Training), Jabalpur
-            **Project Type:** Capstone Project 1
-            ### 👥 Team Members (Group 5)
-            - **Abhay Gupta**
-            - **Jay Kumar**
-            - **Kripanshu Gupta**
-            - **Ruhy Namdeo**
-            ### 🛠️ Technical Stack
-            - **Machine Learning:** scikit-learn, Random Forest Classifier
-            - **Data Processing:** pandas, numpy
-            - **Visualization:** matplotlib, seaborn
-            - **Interface:** Gradio
-            - **Deployment:** Hugging Face Spaces
-            ### 📈 Project Scope
-            This end-to-end machine learning project demonstrates:
-            - Data engineering and ETL pipeline
-            - Advanced ML model development
-            - Business intelligence and insights generation
-            - Production deployment capabilities
-            ### 🎯 Learning Outcomes
-            - Real-world problem solving in telecom domain
-            - Complete ML pipeline implementation
-            - Business value creation through AI/ML
-            - Model deployment and productionization
-            ---
-            **🏆 Project Status:** Complete | **📅 Last Updated:** October 2024 | **🔢 Version:** 1.0.0
-            """)
-    # Footer
-    gr.Markdown("""
-    ---
-    **© 2024 BRBRAITT Group 5 | TIRTC Advance AI/ML Training | Telecom Data Analytics**
-    """)
-# Launch the app
-if __name__ == "__main__":
-    app.launch(share=True, server_name="0.0.0.0", server_port=7860)

+import gradio as gr
+import joblib
+import pandas as pd
+import numpy as np
+import os
+from PIL import Image
+import google.generativeai as genai
+# --- Gemini API Configuration ---
+# IMPORTANT: Set your GOOGLE_API_KEY as an environment variable
+# For local testing, you can uncomment the line below and paste your key
+# os.environ['GOOGLE_API_KEY'] = "YOUR_GOOGLE_API_KEY"
+try:
+    GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
+    if GOOGLE_API_KEY:
+        genai.configure(api_key=GOOGLE_API_KEY)
+    else:
+        print("Warning: GOOGLE_API_KEY not found. Gemini error handling will be disabled.")
+except Exception as e:
+    print(f"Error configuring Gemini: {e}")
+# --- Model and Asset Loading ---
+def load_models():
+    """Loads the ML model, encoders, and scaler from disk."""
+    try:
+        model = joblib.load('models/churn_model.pkl')
+        encoders = joblib.load('models/label_encoders.pkl')
+        scaler = joblib.load('models/scaler.pkl')
+        return model, encoders, scaler
+    except FileNotFoundError:
+        print("Error: Model files not found. Please ensure 'churn_model.pkl', 'label_encoders.pkl', and 'scaler.pkl' are in the 'models/' directory.")
+        return None, None, None
+    except Exception as e:
+        print(f"An unexpected error occurred while loading models: {e}")
+        return None, None, None
+model, encoders, scaler = load_models()
+def load_image(image_name):
+    """Loads an image from the 'images' folder."""
+    try:
+        img_path = os.path.join("images", image_name)
+        return Image.open(img_path) if os.path.exists(img_path) else None
+    except Exception as e:
+        print(f"Error loading image {image_name}: {e}")
+        return None
+# --- Feature Constants ---
+REGIONS = ['North', 'South', 'East', 'West', 'Central']
+PLAN_TYPES = ['Prepaid', 'Postpaid']
+CONTRACT_TYPES = ['Month-to-month', 'One year', 'Two year']
+COMPLAINT_STATUS = ['Open', 'Closed', 'Not Applicable']
+PAYMENT_METHODS = ['Electronic check', 'Mailed check', 'Bank transfer', 'Credit card']
+# --- Prediction Logic ---
+def predict_churn(customer_id, region, plan_type, monthly_charges, total_charges,
+                  tenure_months, contract_type, paperless_billing, payment_method,
+                  data_usage_gb, call_minutes, sms_count, complaint_status, complaint_count):
+    """Predicts customer churn and generates a detailed result."""
+    if model is None or encoders is None or scaler is None:
+        return "🔴 **Error:** Model components are not loaded. Please check the server logs.", 0.0
+    try:
+        # 1. Create Input DataFrame
+        input_data = pd.DataFrame({
+            'region': [region],
+            'plan_type': [plan_type],
+            'monthly_charges': [float(monthly_charges)],
+            'total_charges': [float(total_charges)],
+            'tenure_months': [int(tenure_months)],
+            'contract_type': [contract_type],
+            'paperless_billing': [1 if paperless_billing else 0],
+            'payment_method': [payment_method],
+            'data_usage_gb': [float(data_usage_gb)],
+            'call_minutes': [int(call_minutes)],
+            'sms_count': [int(sms_count)],
+            'complaint_status': [complaint_status],
+            'complaint_count': [int(complaint_count)]
+        })
+        # 2. Encode Categorical Features
+        for col, encoder in encoders.items():
+            if col in input_data.columns:
+                input_data[col] = input_data[col].apply(lambda x: encoder.transform([x])[0] if x in encoder.classes_ else -1)
+        # 3. Scale Numerical Features
+        scaled_data = scaler.transform(input_data)
+        # 4. Make Prediction
+        churn_probability = model.predict_proba(scaled_data)[0][1]
+        # 5. Format Output
+        if churn_probability > 0.7:
+            risk_level = "🔴 HIGH RISK"
+            recommendation = "Immediate retention action is strongly recommended."
+        elif churn_probability > 0.4:
+            risk_level = "🟡 MEDIUM RISK"
+            recommendation = "Proactively monitor and engage with personalized offers."
+        else:
+            risk_level = "🟢 LOW RISK"
+            recommendation = "Standard service and relationship maintenance."
+        result = f"""
+        ### Prediction for Customer `{customer_id}`
+        - **Churn Risk Level:** **{risk_level}**
+        - **Probability of Churn:** **{churn_probability:.1%}**
+        - **Recommendation:** {recommendation}
+        """
+        return result, churn_probability
+    except Exception as e:
+        # --- Gemini Error Handling ---
+        print(f"Prediction error: {e}") # Log the real error for debugging
+        if GOOGLE_API_KEY:
+            try:
+                # UPDATED to use the faster Flash model
+                gemini_model = genai.GenerativeModel('gemini-2.5-flash')
+                prompt = f"""
+                An error occurred in a telecom churn prediction app. The technical error was: '{str(e)}'.
+                Generate a concise, friendly, non-technical message for the user.
+                The message should suggest they double-check their inputs (like ensuring Total Charges are not less than Monthly Charges) and try again.
+                Do not mention the technical error details. Start with 'Oops! Something went wrong.'
+                """
+                response = gemini_model.generate_content(prompt)
+                return response.text, 0.0
+            except Exception as gemini_e:
+                print(f"Gemini API error: {gemini_e}") # Log Gemini error
+                return "An unexpected error occurred. Please verify your inputs and try again.", 0.0
+        else:
+            return "An unexpected error occurred. Please check your inputs are valid and try again.", 0.0
+# --- Gradio UI ---
+with gr.Blocks(title="Telecom Churn Prediction - BRBRAITT Group 5", theme=gr.themes.Soft()) as app:
+    # Header
+    gr.Markdown("""
+    # 🔮 Telecom Churn Prediction System
+    **TIRTC Course: Advance AI/ML Training (Nokia) | Institution: BRBRAITT, Jabalpur | Group 5**
+    ---
+    This AI-powered system predicts customer churn with over **90% accuracy** using a Random Forest model.
+    """)
+    with gr.Tabs():
+        # Tab 1: Prediction Interface
+        with gr.TabItem("🎯 Churn Prediction"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    gr.Markdown("### Enter Customer Details")
+                    with gr.Row():
+                        customer_id = gr.Textbox(label="Customer ID", value="CUST-001")
+                        region = gr.Dropdown(choices=REGIONS, label="Region", value="North")
+                        plan_type = gr.Dropdown(choices=PLAN_TYPES, label="Plan Type", value="Postpaid")
+                    with gr.Row():
+                        contract_type = gr.Dropdown(choices=CONTRACT_TYPES, label="Contract Type", value="Month-to-month")
+                        payment_method = gr.Dropdown(choices=PAYMENT_METHODS, label="Payment Method", value="Electronic check")
+                        paperless_billing = gr.Checkbox(label="Paperless Billing", value=True)
+                    gr.Markdown("#### Service Usage & Charges")
+                    with gr.Row():
+                        monthly_charges = gr.Number(label="Monthly Charges (₹)", value=1000)
+                        total_charges = gr.Number(label="Total Charges (₹)", value=12000)
+                        tenure_months = gr.Number(label="Tenure (Months)", value=12)
+                    with gr.Row():
+                        data_usage_gb = gr.Number(label="Data Usage (GB)", value=15)
+                        call_minutes = gr.Number(label="Call Minutes", value=500)
+                        sms_count = gr.Number(label="SMS Count", value=100)
+                    gr.Markdown("#### Customer Complaints")
+                    with gr.Row():
+                        complaint_status = gr.Dropdown(choices=COMPLAINT_STATUS, label="Last Complaint Status", value="Not Applicable")
+                        complaint_count = gr.Number(label="Total Complaint Count", value=0)
+                    predict_btn = gr.Button("🔮 Predict Churn Risk", variant="primary", size="lg")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📊 Prediction Result")
+                    prediction_output = gr.Markdown(value="*Results will be displayed here...*")
+                    probability_gauge = gr.Gauge(label="Churn Probability", value=0.0, show_label=True)
+            predict_btn.click(
+                fn=predict_churn,
+                inputs=[customer_id, region, plan_type, monthly_charges, total_charges,
+                        tenure_months, contract_type, paperless_billing, payment_method,
+                        data_usage_gb, call_minutes, sms_count, complaint_status, complaint_count],
+                outputs=[prediction_output, probability_gauge]
+            )
+        # Tab 2: Business Insights
+        with gr.TabItem("💡 Business Insights"):
+            gr.Markdown("### Key Findings & Recommendations")
+            gr.Markdown("""
+            #### 🎯 Model Performance
+            - **Accuracy:** 90%
+            - **AUC Score:** 0.95
+            - **Best Algorithm:** Random Forest Classifier
+            #### 💼 Business Impact
+            - **Current Churn Rate:** 50% in the sample dataset.
+            - **Monthly Revenue at Risk:** Over ₹12,250.
+            - **Potential Annual Loss:** Over ₹147,000.
+            - **Savings Opportunity:** A 25% reduction in churn could save over ₹36,750 annually.
+            #### 🔴 Top Churn Drivers
+            1.  **Contract Type:** `Month-to-month` customers have a near 100% churn rate in high-risk groups.
+            2.  **Tenure:** New customers (0-12 months) are most likely to churn.
+            3.  **Complaints:** A single open complaint doubles the likelihood of churn.
+            """)
+        # Tab 3: Visualizations
+        with gr.TabItem("📈 Visualizations"):
+            gr.Markdown("### Data Analysis Dashboard")
+            image_files = [
+                ("churn_distribution.png", "Overall Churn Distribution"),
+                ("churn_by_contract.png", "Churn by Contract Type"),
+                ("revenue_vs_churn.png", "Revenue Impact Analysis"),
+                ("complaints_analysis.png", "Complaints Impact on Churn"),
+                ("correlation_matrix.png", "Feature Correlation Matrix"),
+            ]
+            for img_file, title in image_files:
+                img = load_image(img_file)
+                if img:
+                    gr.Image(img, label=title, show_label=True)
+                else:
+                    gr.Markdown(f"*{title} - Image not available*")
+        # Tab 4: About Project (UPDATED YEAR)
+        with gr.TabItem("ℹ️ About"):
+            gr.Markdown("""
+            ### 🎓 Academic Project Details
+            - **Course:** TIRTC - Advance AI/ML Training (Nokia)
+            - **Institution:** BRBRAITT, Jabalpur
+            - **Project:** Capstone Project 1
+            - **Team (Group 5):** Abhay Gupta, Jay Kumar, Kripanshu Gupta, Ruhy Namdeo
+            - **Tech Stack:** Scikit-learn, Pandas, Gradio, Gemini, Hugging Face
+            ---
+            **🏆 Project Status:** Complete | **📅 Last Updated:** October 2025 | **🔢 Version:** 1.1.0
+            """)
+    # Footer (UPDATED YEAR)
+    gr.Markdown("--- \n © 2025 BRBRAITT Group 5 | TIRTC Advance AI/ML Training")
+# Launch the app
+if __name__ == "__main__":
+    app.launch(share=True)