Spaces:

Ade1ola
/

credit_risk_classification

Sleeping

File size: 17,836 Bytes

7600b57

import gradio as gr
import pandas as pd
import numpy as np
import os
import hopsworks
import joblib
from dotenv import load_dotenv

# Load environment variables (for Hopsworks credentials)
load_dotenv()

# Connect to Hopsworks and get model serving endpoint
try:
    project = hopsworks.login()
    ms = project.get_model_serving()
    endpoint = ms.get_endpoint("credit-risk-endpoint")  # Replace with your endpoint name
    print("✅ Connected to Hopsworks and found model endpoint")
    model_available = True
except Exception as e:
    print(f"⚠️ Error connecting to Hopsworks: {e}")
    print("Using local model as fallback if available")
    model_available = False
    
    # Try to load local model as fallback
    model_path = 'credit_risk_model.pkl'
    if os.path.exists(model_path):
        model = joblib.load(model_path)
        print(f"✅ Loaded local model from {model_path}")
        local_model_available = True
    else:
        print(f"⚠️ Local model not found at {model_path}")
        local_model_available = False

# ---- HELPER FUNCTIONS ----
def get_age_group(age):
    if age < 30: return '20-30'
    elif age < 40: return '30-40'
    elif age < 50: return '40-50'
    elif age < 60: return '50-60'
    elif age < 70: return '60-70'
    else: return '70+'

def get_credit_amount_group(amount):
    if amount < 2000: return 'Low'
    elif amount < 5000: return 'Medium'
    elif amount < 10000: return 'High'
    else: return 'Very High'

def get_duration_group(duration):
    if duration <= 12: return 'Short'
    elif duration <= 36: return 'Medium'
    else: return 'Long'

def get_employment_stability(emp):
    return {
        'A71': 'Unstable', 'A72': 'Unstable', 'A73': 'Moderate',
        'A74': 'Stable', 'A75': 'Very Stable'
    }.get(emp, 'Moderate')

def get_savings_status(savings):
    return {
        'A61': 'None/Low', 'A62': 'Moderate', 'A63': 'Moderate',
        'A64': 'High', 'A65': 'None/Low'
    }.get(savings, 'None/Low')

def get_credit_history_simple(history):
    return {
        'A30': 'Poor', 'A31': 'Good', 'A32': 'Good',
        'A33': 'Fair', 'A34': 'Poor'
    }.get(history, 'Fair')

def calculate_risk_flags(age, credit_amount, duration, checking_account):
    return {
        'young_high_credit_flag': int(age < 30 and credit_amount > 5000),
        'high_exposure_flag': int(credit_amount > 7500 and duration > 24),
        'critical_high_amount_flag': int(credit_amount > 10000),
        'no_checking_high_credit_flag': int(checking_account == 'A14' and credit_amount > 5000),
        'checking_risk': int(checking_account in ['A13', 'A14'])
    }

def calculate_additional_risk_flags(credit_history, savings_account):
    history_risk = int(credit_history in ['A30', 'A34'])
    savings_risk = int(savings_account in ['A61', 'A65'])
    combined_account_risk = history_risk + savings_risk
    return {
        'history_risk': history_risk,
        'savings_risk': savings_risk,
        'combined_account_risk': combined_account_risk
    }

# ---- PREDICTION FUNCTION ----
def predict_credit_risk(checking_account, duration, credit_history, purpose, credit_amount, savings_account, employment_since, installment_rate, personal_status_sex, other_debtors, present_residence, property, age, other_installment_plans, housing, number_credits, job, people_liable, telephone, foreign_worker):
    # Check if any model is available
    if not model_available and not locals().get('local_model_available', False):
        return """
        <div style='padding: 1rem; border-radius: 0.5rem; background-color: #f44336; color: white;'>
            <h2>Error: No Model Available</h2>
            <p>Neither Hopsworks connection nor local model is available. Please check server logs.</p>
        </div>
        """
    
    try:
        # Calculate derived features
        age_group = get_age_group(age)
        credit_amount_group = get_credit_amount_group(credit_amount)
        duration_group = get_duration_group(duration)
        employment_stability = get_employment_stability(employment_since)
        savings_status = get_savings_status(savings_account)
        credit_history_simple = get_credit_history_simple(credit_history)
        credit_per_month = credit_amount / duration if duration > 0 else 0
        age_to_credit_ratio = credit_amount / age if age > 0 else 0
        debt_burden = credit_per_month * 100 / 2000
        credit_to_duration_ratio = credit_amount / duration if duration > 0 else 0
        
        # Calculate risk flags
        risk_flags = calculate_risk_flags(age, credit_amount, duration, checking_account)
        additional_flags = calculate_additional_risk_flags(credit_history, savings_account)
        
        # Create input data dictionary with all features
        input_data = {
            'index': 0,  # Add index column if needed by your model
            'checking_account': checking_account,
            'duration': duration,
            'credit_history': credit_history,
            'purpose': purpose,
            'credit_amount': credit_amount,
            'savings_account': savings_account,
            'employment_since': employment_since,
            'installment_rate': installment_rate,
            'personal_status_sex': personal_status_sex,
            'other_debtors': other_debtors,
            'present_residence': present_residence,
            'property': property,
            'age': age,
            'other_installment_plans': other_installment_plans,
            'housing': housing,
            'number_credits': number_credits,
            'job': job,
            'people_liable': people_liable,
            'telephone': telephone,
            'foreign_worker': foreign_worker,
            'age_group': age_group,
            'credit_amount_group': credit_amount_group,
            'duration_group': duration_group,
            'credit_per_month': credit_per_month,
            'employment_stability': employment_stability,
            'savings_status': savings_status,
            'credit_history_simple': credit_history_simple,
            'age_to_credit_ratio': age_to_credit_ratio,
            'debt_burden': debt_burden,
            'credit_to_duration_ratio': credit_to_duration_ratio,
            'duration_history_interaction': int(duration > 24 and credit_history in ['A30', 'A33', 'A34']),
            'amount_checking_interaction': int(credit_amount > 5000 and checking_account in ['A13', 'A14']),
            **risk_flags,
            **additional_flags
        }
        
        try:
            # Make prediction using Hopsworks endpoint or local model
            if model_available:
                # Use Hopsworks endpoint for prediction
                response = endpoint.predict(input_data)
                print("Prediction made using Hopsworks endpoint")
                # Extract probability from response (adjust based on your endpoint's return format)
                y_proba = response[0][1] if isinstance(response, list) else response
            else:
                # Use local model as fallback
                df = pd.DataFrame([input_data])
                y_proba = model.predict_proba(df)[0][1]
                print("Prediction made using local model")
            
            # Determine risk level based on probability
            if y_proba > 0.7:
                risk = "High Risk"
                color = "#f44336"  # Red
                approval = "Loan Rejected"
                icon = "❌"
            elif y_proba > 0.4:
                risk = "Medium Risk"
                color = "#ff9800"  # Orange
                approval = "Further Review Required"
                icon = "⚠️"
            else:
                risk = "Low Risk"
                color = "#4caf50"  # Green
                approval = "Loan Approved"
                icon = "✅"
            
            # Format a detailed response
            return f"""
            <div style='padding: 1.5rem; border-radius: 0.5rem; background-color: {color}; color: white;'>
                <h2 style='margin-top: 0;'>{icon} {risk}: {approval}</h2>
                <p style='font-size: 1.2rem;'>Risk Score: {y_proba:.2%}</p>
                <hr style='border-color: rgba(255,255,255,0.3);'>
                <div style='margin-top: 1rem;'>
                    <p><strong>Key Risk Factors:</strong></p>
                    <ul>
                        <li>Credit Amount: ${credit_amount:,.2f} ({credit_amount_group})</li>
                        <li>Loan Duration: {duration} months ({duration_group})</li>
                        <li>Monthly Payment: ${credit_per_month:,.2f}</li>
                        <li>Credit History: {credit_history_simple}</li>
                        <li>Debt Burden: {debt_burden:.2f}%</li>
                    </ul>
                </div>
            </div>
            """
                
        except Exception as inner_e:
            print(f"Prediction error: {inner_e}")
            return f"""
            <div style='padding: 1rem; border-radius: 0.5rem; background-color: #f44336; color: white;'>
                <h2>Error in Prediction</h2>
                <p>{str(inner_e)}</p>
                <p>Please check the server logs for details.</p>
            </div>
            """
            
    except Exception as e:
        print(f"Error in processing: {e}")
        return f"""
        <div style='padding: 1rem; border-radius: 0.5rem; background-color: #f44336; color: white;'>
            <h2>Error Processing Request</h2>
            <p>{str(e)}</p>
            <p>Please check the server logs for details.</p>
        </div>
        """

# ---- GRADIO INTERFACE ----
with gr.Blocks(title="Credit Risk Assessment Tool") as demo:
    gr.Markdown("# 💰 Credit Risk Assessment Tool")
    gr.Markdown("This tool assesses the risk level of a loan application based on applicant details and loan parameters. Fill in all fields below and click 'Evaluate Risk' to get a prediction.")
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### 👤 Applicant Information")
            age = gr.Slider(18, 80, 35, label="Age")
            personal_status_sex = gr.Dropdown(['A91', 'A92', 'A93', 'A94'], label="Personal Status/Sex", value="A91", info="A91=male divorced/separated, A92=female divorced/separated/married, A93=male single, A94=male married/widowed")
            present_residence = gr.Slider(1, 4, 2, label="Present Residence (years)")
            employment_since = gr.Dropdown(['A71', 'A72', 'A73', 'A74', 'A75'], label="Employment Since", value="A73", info="A71=unemployed, A72<1yr, A73=1-4 yrs, A74=4-7 yrs, A75>7yrs")
            job = gr.Dropdown(['A171', 'A172', 'A173', 'A174'], label="Job Status", value="A173", info="A171=unemployed/unskilled, A172=unskilled resident, A173=skilled, A174=highly skilled")
            people_liable = gr.Slider(1, 2, 1, label="Number of People Liable")
            telephone = gr.Dropdown(['A191', 'A192'], label="Telephone", value="A192", info="A191=none, A192=yes (registered under customer name)")
            foreign_worker = gr.Dropdown(['A201', 'A202'], label="Foreign Worker", value="A201", info="A201=yes, A202=no")
            
        with gr.Column():
            gr.Markdown("### 💵 Loan Information")
            credit_amount = gr.Slider(500, 15000, 5000, label="Credit Amount")
            duration = gr.Slider(6, 60, 24, label="Loan Duration (months)")
            purpose = gr.Dropdown(['A40', 'A41', 'A42', 'A43', 'A44', 'A45', 'A46', 'A47', 'A48', 'A49', 'A410'], label="Purpose", value="A43", info="A40=car (new), A41=car (used), A42=furniture, A43=television, A44=appliances, A45=repairs, A46=education, A47=vacation, A48=retraining, A49=business, A410=others")
            installment_rate = gr.Dropdown([1, 2, 3, 4], label="Installment Rate", value=2, info="Percentage of disposable income (1=<20%, 4=>35%)")
            
        with gr.Column():
            gr.Markdown("### 🏦 Financial Information")
            checking_account = gr.Dropdown(['A11', 'A12', 'A13', 'A14'], label="Checking Account", value="A11", info="A11=< 0 DM, A12=0-200 DM, A13=>200 DM, A14=no checking account")
            savings_account = gr.Dropdown(['A61', 'A62', 'A63', 'A64', 'A65'], label="Savings Account", value="A61", info="A61=<100 DM, A62=100-500 DM, A63=500-1000 DM, A64=>1000 DM, A65=no savings account")
            credit_history = gr.Dropdown(['A30', 'A31', 'A32', 'A33', 'A34'], label="Credit History", value="A31", info="A30=no credits, A31=all credits paid, A32=existing credits paid, A33=delay in paying, A34=critical account")
            other_debtors = gr.Dropdown(['A101', 'A102', 'A103'], label="Other Debtors/Guarantors", value="A101", info="A101=none, A102=co-applicant, A103=guarantor")
            property = gr.Dropdown(['A121', 'A122', 'A123', 'A124'], label="Property", value="A121", info="A121=real estate, A122=savings agreement/life insurance, A123=car or other, A124=unknown/no property")
            other_installment_plans = gr.Dropdown(['A141', 'A142', 'A143'], label="Other Installment Plans", value="A142", info="A141=bank, A142=stores, A143=none")
            housing = gr.Dropdown(['A151', 'A152', 'A153'], label="Housing", value="A152", info="A151=rent, A152=own, A153=for free")
            number_credits = gr.Slider(1, 4, 1, label="Number of Existing Credits")
    
    submit_btn = gr.Button("Evaluate Risk", variant="primary")
    output = gr.HTML(label="Risk Assessment")
    
    # Include example inputs
    gr.Examples(
        [
            # Low risk example
            ['A11', 24, 'A32', 'A43', 3000, 'A65', 'A73', 2, 'A93', 'A101', 2, 'A121', 32, 'A143', 'A152', 1, 'A173', 1, 'A192', 'A201'],
            
            # Medium risk example
            ['A13', 36, 'A33', 'A46', 8000, 'A61', 'A72', 3, 'A92', 'A101', 1, 'A123', 25, 'A142', 'A151', 2, 'A172', 2, 'A191', 'A201'],
            
            # High risk example
            ['A14', 48, 'A30', 'A49', 12000, 'A61', 'A71', 4, 'A93', 'A103', 1, 'A124', 22, 'A141', 'A153', 3, 'A171', 2, 'A191', 'A201']
        ],
        [
            checking_account, duration, credit_history, purpose, credit_amount, 
            savings_account, employment_since, installment_rate, personal_status_sex, 
            other_debtors, present_residence, property, age, other_installment_plans, 
            housing, number_credits, job, people_liable, telephone, foreign_worker
        ],
        output,
        label="Example Applications"
    )
    
    # Set up event
    submit_btn.click(
        fn=predict_credit_risk,
        inputs=[
            checking_account, duration, credit_history, purpose, credit_amount, 
            savings_account, employment_since, installment_rate, personal_status_sex, 
            other_debtors, present_residence, property, age, other_installment_plans, 
            housing, number_credits, job, people_liable, telephone, foreign_worker
        ],
        outputs=output
    )
    
    # Add explanation section
    with gr.Accordion("📋 Understanding the Variables", open=False):
        gr.Markdown("""
        ### Feature Descriptions
        
        #### Personal Information
        - **Age**: The applicant's age in years
        - **Personal Status/Sex**: Marital status and gender combined
        - **Present Residence**: How long the applicant has lived at their current address
        - **Employment Since**: How long the applicant has been employed at their current job
        - **Job Status**: Type and quality of employment
        - **People Liable**: Number of people who are financially dependent on the applicant
        - **Telephone**: Whether the applicant has a registered telephone
        - **Foreign Worker**: Whether the applicant is a foreign worker
        
        #### Loan Information
        - **Credit Amount**: The amount of the loan in currency units
        - **Duration**: Term of the loan in months
        - **Purpose**: Reason for the loan
        - **Installment Rate**: Percentage of disposable income set aside for installments
        
        #### Financial Information
        - **Checking Account**: Status of checking account
        - **Savings Account**: Status of savings account
        - **Credit History**: Applicant's credit history
        - **Other Debtors/Guarantors**: Whether there are co-applicants or guarantors
        - **Property**: The applicant's most valuable property
        - **Other Installment Plans**: Whether the applicant has other installment plans
        - **Housing**: Housing situation of the applicant
        - **Number of Credits**: Number of existing loans at this or other banks
        """)
    
    # Add model information
    with gr.Accordion("🔍 About the Model", open=False):
        gr.Markdown("""
        ### Credit Risk Assessment Model
        
        This application uses a machine learning model trained on historical credit data to predict the risk level of new loan applications. The model is deployed on Hopsworks and accessed via an API endpoint.
        
        **Key performance metrics**:
        - Precision-Recall AUC: 0.8732
        - KS Statistic: 0.5190
        - ROC AUC: 0.7843
        
        The model uses a weighted scoring system that emphasizes:
        - 50% weight on Precision-Recall AUC (focuses on identifying risky loans)
        - 30% weight on KS Statistic (measures separation between good and bad loans)
        - 20% weight on ROC AUC (overall discriminative ability)
        
        **Risk Levels**:
        - **Low Risk** (Green): High confidence in loan repayment
        - **Medium Risk** (Orange): Some concerns, further review recommended
        - **High Risk** (Red): Significant likelihood of default
        """)

# Launch the app
if __name__ == "__main__":
    demo.launch()