File size: 4,785 Bytes
af90f0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c9c809
 
af90f0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Import the libraries
import joblib

from sklearn.datasets import fetch_openml

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer

from sklearn.pipeline import make_pipeline

from sklearn.model_selection import train_test_split, RandomizedSearchCV

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'

# Run training script 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
   
import joblib

# Choose and instantiate RandomForestRegressor model
model = RandomForestRegressor()


# Train the model using the training data
model.fit(X_train, y_train)

# Save the trained model to a file (e.g., 'insurance_model.joblib')
joblib.dump(model, 'model.joblib')


# Load the freshly trained model from disk
model = joblib.load('model.joblib')

# Prepare the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent
log_file.parent.mkdir(exist_ok=True)


#######################################################################################################################
scheduler = CommitScheduler(
    repo_id="-----------",  # provide a name "insurance-charge-mlops-logs" for the repo_id
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2
)

# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
# the functions runs when 'Submit' is clicked or when a API request is made

import pandas as pd
import joblib

# Assuming 'model' is a trained model loaded using joblib.load()

def predict(age, sex, bmi, children, smoker, region):
    """Predicts insurance charges based on customer features.

    Args:
        age (int): Age of the customer.
        sex (int): Gender of the customer (0 for female, 1 for male).
        bmi (float): Body Mass Index of the customer.
        children (int): Number of children the customer has.
        smoker (int): Smoking status of the customer (0 for yes, 1 for no).
        region (int): Region of the customer (1-4 representing different regions).

    Returns:
        float: Predicted insurance charges.
    """

    # Create a DataFrame with the input features
    input_data = pd.DataFrame({
        'age': [age],
        'sex': [sex],
        'bmi': [bmi],
        'children': [children],
        'smoker': [smoker],
        'region': [region]
    })

    # Make the prediction using the loaded model
    prediction = model.predict(input_data)

    # Return the prediction
    return prediction[0]



    # While the prediction is made, log both the inputs and outputs to a  log file
    # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
    # access

    with scheduler.lock:
        with log_file.open("a") as f:
            f.write(json.dumps(
                {
                    'age': age,
                    'bmi': bmi,
                    'children': children,
                    'sex': sex,
                    'smoker': smoker,
                    'region': region,
                    'prediction': prediction[0]
                }
            ))
            f.write("\n")

    return prediction[0]

# Set up UI components for input and output
import gradio as gr

age = gr.inputs.Slider(minimum=18, maximum=64, default=30, step=1, label="Age")
sex = gr.inputs.Radio(["female", "male"], type)


# Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
import gradio as gr

# ... (previous code for model training and predict function) ...

# Set up UI components for input and output
age = gr.inputs.Slider(minimum=18, maximum=64, default=30, step=1, label="Age")
sex = gr.inputs.Radio(["female", "male"], type="value", default="female", label="Sex")  # Corrected type
bmi = gr.inputs.Number(default=25, label="BMI")
children = gr.inputs.Slider(minimum=0, maximum=5, default=0, step=1, label="Children")
smoker = gr.inputs.Radio(["yes", "no"], type="value", default="no", label="Smoker")  # Corrected type
region = gr.inputs.Dropdown(["southwest", "southeast", "northwest", "northeast"], type="value", default="southwest", label="Region")  # Corrected type

# Create the Gradio interface
demo = gr.Interface(
    fn=predict, 
    inputs=[age, sex, bmi, children, smoker, region], 
    outputs="number",
    title="HealthyLife Insurance Charge Prediction"
)

# Launch with a load balancer
demo.queue()
demo.launch(share=False)