import gradio as gr import pandas as pd import joblib from data_config import region_data, training_columns, unique_pincodes, unique_places # Load the trained model and scaler globally for efficiency try: model = joblib.load('random_forest_model.joblib') scaler = joblib.load('scaler.joblib') except FileNotFoundError: print("Error: Model or scaler file not found. Make sure 'random_forest_model.joblib' and 'scaler.joblib' are in the same directory.") # Exit or handle error appropriately for deployment exit() def predict_charges(age, sex, bmi, children, smoker, pincode, place): # Create a DataFrame from the raw input input_data = pd.DataFrame([{ 'age': age, 'sex': sex, 'bmi': bmi, 'children': children, 'smoker': smoker, 'pincode': pincode, 'place': place }]) # Define numerical and categorical columns as used during training numerical_cols_to_scale = ['age', 'bmi', 'children'] categorical_cols_to_encode = ['sex', 'smoker', 'pincode', 'place'] # Scale numerical features input_data[numerical_cols_to_scale] = scaler.transform(input_data[numerical_cols_to_scale]) # Apply one-hot encoding to categorical features # Ensure drop_first=False to match how `pd.get_dummies` was used initially for X input_data_encoded = pd.get_dummies(input_data, columns=categorical_cols_to_encode, drop_first=False) # Align columns with the training data's columns # This step is critical to ensure that the input DataFrame for prediction # has the exact same columns as the training DataFrame (X) and in the same order. # It handles cases where a category might not be present in the single input row. final_input = pd.DataFrame(columns=training_columns) final_input = pd.concat([final_input, input_data_encoded], ignore_index=True) final_input = final_input.fillna(False) # Fill missing one-hot columns (e.g., sex_male if only female input) with False # Ensure boolean columns are treated as 0/1 for the model if necessary for col in final_input.columns: if final_input[col].dtype == 'bool': final_input[col] = final_input[col].astype(int) # Reorder columns to match the training data final_input = final_input[training_columns] # Make prediction prediction = model.predict(final_input) return prediction[0] # Define Gradio input components age_input = gr.Slider(minimum=18, maximum=100, step=1, value=30, label="Age") sex_input = gr.Radio(choices=['female', 'male'], value='female', label="Sex") bmi_input = gr.Slider(minimum=10.0, maximum=60.0, step=0.1, value=25.0, label="BMI") children_input = gr.Slider(minimum=0, maximum=5, step=1, value=1, label="Children") smoker_input = gr.Radio(choices=['no', 'yes'], value='no', label="Smoker") pincode_input = gr.Dropdown(choices=unique_pincodes, value=unique_pincodes[0] if unique_pincodes else None, label="Pincode") place_input = gr.Dropdown(choices=unique_places, value=unique_places[0] if unique_places else None, label="Place") # Create the Gradio Interface interface = gr.Interface( fn=predict_charges, inputs=[ age_input, sex_input, bmi_input, children_input, smoker_input, pincode_input, place_input ], outputs=gr.Number(label="Predicted Insurance Charges"), title="Insurance Charge Predictor", description="Enter the details to get an estimated insurance charge." ) # Launch the Gradio interface if __name__ == '__main__': interface.launch(share=True)