pavanammm commited on
Commit
13d1c10
·
verified ·
1 Parent(s): 37ae2f3

first commit

Browse files
Files changed (5) hide show
  1. app.py +89 -0
  2. data_config.py +30 -0
  3. random_forest_model.joblib +3 -0
  4. requirements.txt +4 -0
  5. scaler.joblib +3 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import joblib
5
+ from data_config import region_data, training_columns, unique_pincodes, unique_places
6
+
7
+ # Load the trained model and scaler globally for efficiency
8
+ try:
9
+ model = joblib.load('random_forest_model.joblib')
10
+ scaler = joblib.load('scaler.joblib')
11
+ except FileNotFoundError:
12
+ print("Error: Model or scaler file not found. Make sure 'random_forest_model.joblib' and 'scaler.joblib' are in the same directory.")
13
+ # Exit or handle error appropriately for deployment
14
+ exit()
15
+
16
+ def predict_charges(age, sex, bmi, children, smoker, pincode, place):
17
+ # Create a DataFrame from the raw input
18
+ input_data = pd.DataFrame([{
19
+ 'age': age,
20
+ 'sex': sex,
21
+ 'bmi': bmi,
22
+ 'children': children,
23
+ 'smoker': smoker,
24
+ 'pincode': pincode,
25
+ 'place': place
26
+ }])
27
+
28
+ # Define numerical and categorical columns as used during training
29
+ numerical_cols_to_scale = ['age', 'bmi', 'children']
30
+ categorical_cols_to_encode = ['sex', 'smoker', 'pincode', 'place']
31
+
32
+ # Scale numerical features
33
+ input_data[numerical_cols_to_scale] = scaler.transform(input_data[numerical_cols_to_scale])
34
+
35
+ # Apply one-hot encoding to categorical features
36
+ # Ensure drop_first=False to match how `pd.get_dummies` was used initially for X
37
+ input_data_encoded = pd.get_dummies(input_data, columns=categorical_cols_to_encode, drop_first=False)
38
+
39
+ # Align columns with the training data's columns
40
+ # This step is critical to ensure that the input DataFrame for prediction
41
+ # has the exact same columns as the training DataFrame (X) and in the same order.
42
+ # It handles cases where a category might not be present in the single input row.
43
+ final_input = pd.DataFrame(columns=training_columns)
44
+ final_input = pd.concat([final_input, input_data_encoded], ignore_index=True)
45
+ final_input = final_input.fillna(False) # Fill missing one-hot columns (e.g., sex_male if only female input) with False
46
+
47
+ # Ensure boolean columns are treated as 0/1 for the model if necessary
48
+ for col in final_input.columns:
49
+ if final_input[col].dtype == 'bool':
50
+ final_input[col] = final_input[col].astype(int)
51
+
52
+ # Reorder columns to match the training data
53
+ final_input = final_input[training_columns]
54
+
55
+ # Make prediction
56
+ prediction = model.predict(final_input)
57
+
58
+ return prediction[0]
59
+
60
+ # Define Gradio input components
61
+ age_input = gr.Slider(minimum=18, maximum=100, step=1, value=30, label="Age")
62
+ sex_input = gr.Radio(choices=['female', 'male'], value='female', label="Sex")
63
+ bmi_input = gr.Slider(minimum=10.0, maximum=60.0, step=0.1, value=25.0, label="BMI")
64
+ children_input = gr.Slider(minimum=0, maximum=5, step=1, value=1, label="Children")
65
+ smoker_input = gr.Radio(choices=['no', 'yes'], value='no', label="Smoker")
66
+ pincode_input = gr.Dropdown(choices=unique_pincodes, value=unique_pincodes[0] if unique_pincodes else None, label="Pincode")
67
+ place_input = gr.Dropdown(choices=unique_places, value=unique_places[0] if unique_places else None, label="Place")
68
+
69
+
70
+ # Create the Gradio Interface
71
+ interface = gr.Interface(
72
+ fn=predict_charges,
73
+ inputs=[
74
+ age_input,
75
+ sex_input,
76
+ bmi_input,
77
+ children_input,
78
+ smoker_input,
79
+ pincode_input,
80
+ place_input
81
+ ],
82
+ outputs=gr.Number(label="Predicted Insurance Charges"),
83
+ title="Insurance Charge Predictor",
84
+ description="Enter the details to get an estimated insurance charge."
85
+ )
86
+
87
+ # Launch the Gradio interface
88
+ if __name__ == '__main__':
89
+ interface.launch(share=True)
data_config.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # This file contains configuration data for the insurance charges prediction application.
3
+
4
+ import random
5
+
6
+ # Define the region_data dictionary with pincodes and places for each region.
7
+ region_data = {'southeast': {'pincodes': ['30301', '33101', '37201', '27514', '29501'], 'places': ['Atlanta, Georgia', 'Miami, Florida', 'Nashville, Tennessee', 'Chapel Hill, North Carolina', 'Florence, South Carolina']}, 'southwest': {'pincodes': ['85001', '73301', '87501', '73102', '85701'], 'places': ['Phoenix, Arizona', 'Austin, Texas', 'Santa Fe, New Mexico', 'Oklahoma City, Oklahoma', 'Tucson, Arizona']}, 'northwest': {'pincodes': ['98101', '97201', '83702', '99201', '59715'], 'places': ['Seattle, Washington', 'Portland, Oregon', 'Boise, Idaho', 'Spokane, Washington', 'Bozeman, Montana']}, 'northeast': {'pincodes': ['10001', '02108', '19103', '07030', '06103'], 'places': ['New York, New York', 'Boston, Massachusetts', 'Philadelphia, Pennsylvania', 'Hoboken, New Jersey', 'Hartford, Connecticut']}}
8
+
9
+ # List of columns used during model training.
10
+ # This ensures that input data for prediction is processed with the same column structure.
11
+ training_columns = ['age', 'bmi', 'children', 'sex_female', 'sex_male', 'smoker_no', 'smoker_yes', 'pincode_02108', 'pincode_06103', 'pincode_07030', 'pincode_10001', 'pincode_19103', 'pincode_27514', 'pincode_29501', 'pincode_30301', 'pincode_33101', 'pincode_37201', 'pincode_59715', 'pincode_73102', 'pincode_73301', 'pincode_83702', 'pincode_85001', 'pincode_85701', 'pincode_87501', 'pincode_97201', 'pincode_98101', 'pincode_99201', 'place_Atlanta, Georgia', 'place_Austin, Texas', 'place_Boise, Idaho', 'place_Boston, Massachusetts', 'place_Bozeman, Montana', 'place_Chapel Hill, North Carolina', 'place_Florence, South Carolina', 'place_Hartford, Connecticut', 'place_Hoboken, New Jersey', 'place_Miami, Florida', 'place_Nashville, Tennessee', 'place_New York, New York', 'place_Oklahoma City, Oklahoma', 'place_Philadelphia, Pennsylvania', 'place_Phoenix, Arizona', 'place_Portland, Oregon', 'place_Santa Fe, New Mexico', 'place_Seattle, Washington', 'place_Spokane, Washington', 'place_Tucson, Arizona']
12
+
13
+ # Function to assign a random pincode and place based on region
14
+ def assign_location_data(row):
15
+ region = row['region']
16
+ if region in region_data:
17
+ pincode = random.choice(region_data[region]['pincodes'])
18
+ place = random.choice(region_data[region]['places'])
19
+ return pincode, place
20
+ return None, None
21
+
22
+ # Extract unique pincodes and places from region_data for dropdown options
23
+ all_pincodes = []
24
+ all_places = []
25
+ for region in region_data:
26
+ all_pincodes.extend(region_data[region]['pincodes'])
27
+ all_places.extend(region_data[region]['places'])
28
+
29
+ unique_pincodes = sorted(list(set(all_pincodes)))
30
+ unique_places = sorted(list(set(all_places)))
random_forest_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e6afb614fefc63a80834f684cd21f53b194b1db360326c0ca5d9542c24f80bb
3
+ size 22880369
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ scikit-learn
3
+ gradio
4
+ joblib
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f9ae5dad94b3950eb2ddd5b9cc85d383daa67e8944421be1fd1ba2ebc1311f2
3
+ size 943