Insurance / app.py
pavanammm's picture
first commit
13d1c10 verified
import gradio as gr
import pandas as pd
import joblib
from data_config import region_data, training_columns, unique_pincodes, unique_places
# Load the trained model and scaler globally for efficiency
try:
model = joblib.load('random_forest_model.joblib')
scaler = joblib.load('scaler.joblib')
except FileNotFoundError:
print("Error: Model or scaler file not found. Make sure 'random_forest_model.joblib' and 'scaler.joblib' are in the same directory.")
# Exit or handle error appropriately for deployment
exit()
def predict_charges(age, sex, bmi, children, smoker, pincode, place):
# Create a DataFrame from the raw input
input_data = pd.DataFrame([{
'age': age,
'sex': sex,
'bmi': bmi,
'children': children,
'smoker': smoker,
'pincode': pincode,
'place': place
}])
# Define numerical and categorical columns as used during training
numerical_cols_to_scale = ['age', 'bmi', 'children']
categorical_cols_to_encode = ['sex', 'smoker', 'pincode', 'place']
# Scale numerical features
input_data[numerical_cols_to_scale] = scaler.transform(input_data[numerical_cols_to_scale])
# Apply one-hot encoding to categorical features
# Ensure drop_first=False to match how `pd.get_dummies` was used initially for X
input_data_encoded = pd.get_dummies(input_data, columns=categorical_cols_to_encode, drop_first=False)
# Align columns with the training data's columns
# This step is critical to ensure that the input DataFrame for prediction
# has the exact same columns as the training DataFrame (X) and in the same order.
# It handles cases where a category might not be present in the single input row.
final_input = pd.DataFrame(columns=training_columns)
final_input = pd.concat([final_input, input_data_encoded], ignore_index=True)
final_input = final_input.fillna(False) # Fill missing one-hot columns (e.g., sex_male if only female input) with False
# Ensure boolean columns are treated as 0/1 for the model if necessary
for col in final_input.columns:
if final_input[col].dtype == 'bool':
final_input[col] = final_input[col].astype(int)
# Reorder columns to match the training data
final_input = final_input[training_columns]
# Make prediction
prediction = model.predict(final_input)
return prediction[0]
# Define Gradio input components
age_input = gr.Slider(minimum=18, maximum=100, step=1, value=30, label="Age")
sex_input = gr.Radio(choices=['female', 'male'], value='female', label="Sex")
bmi_input = gr.Slider(minimum=10.0, maximum=60.0, step=0.1, value=25.0, label="BMI")
children_input = gr.Slider(minimum=0, maximum=5, step=1, value=1, label="Children")
smoker_input = gr.Radio(choices=['no', 'yes'], value='no', label="Smoker")
pincode_input = gr.Dropdown(choices=unique_pincodes, value=unique_pincodes[0] if unique_pincodes else None, label="Pincode")
place_input = gr.Dropdown(choices=unique_places, value=unique_places[0] if unique_places else None, label="Place")
# Create the Gradio Interface
interface = gr.Interface(
fn=predict_charges,
inputs=[
age_input,
sex_input,
bmi_input,
children_input,
smoker_input,
pincode_input,
place_input
],
outputs=gr.Number(label="Predicted Insurance Charges"),
title="Insurance Charge Predictor",
description="Enter the details to get an estimated insurance charge."
)
# Launch the Gradio interface
if __name__ == '__main__':
interface.launch(share=True)