File size: 3,594 Bytes
13d1c10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

import gradio as gr
import pandas as pd
import joblib
from data_config import region_data, training_columns, unique_pincodes, unique_places

# Load the trained model and scaler globally for efficiency
try:
    model = joblib.load('random_forest_model.joblib')
    scaler = joblib.load('scaler.joblib')
except FileNotFoundError:
    print("Error: Model or scaler file not found. Make sure 'random_forest_model.joblib' and 'scaler.joblib' are in the same directory.")
    # Exit or handle error appropriately for deployment
    exit()

def predict_charges(age, sex, bmi, children, smoker, pincode, place):
    # Create a DataFrame from the raw input
    input_data = pd.DataFrame([{
        'age': age,
        'sex': sex,
        'bmi': bmi,
        'children': children,
        'smoker': smoker,
        'pincode': pincode,
        'place': place
    }])

    # Define numerical and categorical columns as used during training
    numerical_cols_to_scale = ['age', 'bmi', 'children']
    categorical_cols_to_encode = ['sex', 'smoker', 'pincode', 'place']

    # Scale numerical features
    input_data[numerical_cols_to_scale] = scaler.transform(input_data[numerical_cols_to_scale])

    # Apply one-hot encoding to categorical features
    # Ensure drop_first=False to match how `pd.get_dummies` was used initially for X
    input_data_encoded = pd.get_dummies(input_data, columns=categorical_cols_to_encode, drop_first=False)

    # Align columns with the training data's columns
    # This step is critical to ensure that the input DataFrame for prediction
    # has the exact same columns as the training DataFrame (X) and in the same order.
    # It handles cases where a category might not be present in the single input row.
    final_input = pd.DataFrame(columns=training_columns)
    final_input = pd.concat([final_input, input_data_encoded], ignore_index=True)
    final_input = final_input.fillna(False) # Fill missing one-hot columns (e.g., sex_male if only female input) with False

    # Ensure boolean columns are treated as 0/1 for the model if necessary
    for col in final_input.columns:
        if final_input[col].dtype == 'bool':
            final_input[col] = final_input[col].astype(int)

    # Reorder columns to match the training data
    final_input = final_input[training_columns]

    # Make prediction
    prediction = model.predict(final_input)

    return prediction[0]

# Define Gradio input components
age_input = gr.Slider(minimum=18, maximum=100, step=1, value=30, label="Age")
sex_input = gr.Radio(choices=['female', 'male'], value='female', label="Sex")
bmi_input = gr.Slider(minimum=10.0, maximum=60.0, step=0.1, value=25.0, label="BMI")
children_input = gr.Slider(minimum=0, maximum=5, step=1, value=1, label="Children")
smoker_input = gr.Radio(choices=['no', 'yes'], value='no', label="Smoker")
pincode_input = gr.Dropdown(choices=unique_pincodes, value=unique_pincodes[0] if unique_pincodes else None, label="Pincode")
place_input = gr.Dropdown(choices=unique_places, value=unique_places[0] if unique_places else None, label="Place")


# Create the Gradio Interface
interface = gr.Interface(
    fn=predict_charges,
    inputs=[
        age_input,
        sex_input,
        bmi_input,
        children_input,
        smoker_input,
        pincode_input,
        place_input
    ],
    outputs=gr.Number(label="Predicted Insurance Charges"),
    title="Insurance Charge Predictor",
    description="Enter the details to get an estimated insurance charge."
)

# Launch the Gradio interface
if __name__ == '__main__':
    interface.launch(share=True)