File size: 3,504 Bytes
47c7a21
 
 
 
 
9fc9817
47c7a21
 
 
9fc9817
47c7a21
 
9fc9817
 
 
 
 
 
 
 
47c7a21
9fc9817
 
47c7a21
 
 
9fc9817
47c7a21
925394a
47c7a21
 
 
 
 
9fc9817
47c7a21
 
 
7c67604
9fc9817
47c7a21
 
 
 
9fc9817
 
07337b3
 
 
 
 
 
9fc9817
47c7a21
9fc9817
47c7a21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68909df
 
9fc9817
 
 
 
 
63de8af
0fbd913
 
 
3a9daac
9fc9817
 
 
 
0fbd913
9fc9817
47c7a21
 
9fc9817
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Import the libraries
import os
import uuid
import joblib
import json
import time
import gradio as gr
import pandas as pd

from huggingface_hub import HfApi
from huggingface_hub import CommitScheduler
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from datetime import datetime

# Load the Hugging Face API token
hf_token = os.getenv("HF_TOKEN")

# Initialize Hugging Face API
api = HfApi()
# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'
os.system("python train.py")
# Load the freshly trained model from disk 
model = joblib.load('model.joblib')
# Prepare the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent

scheduler = CommitScheduler(
    repo_id="insurance-charge-mlops-logs",  # provide a name "insurance-charge-mlops-logs" for the repo_id
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2,
    token=hf_token
)

# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
# the functions runs when 'Submit' is clicked or when a API request is made
def insurance_charge_predictor(age, bmi, children, sex, smoker, region):
    input_data = pd.DataFrame({
        'age': [age],
        'bmi': [bmi],
        'children': [children],
        'sex': [sex],
        'smoker': [smoker],
        'region': [region]
    })

    prediction = model.predict(input_data)
    # While the prediction is made, log both the inputs and outputs to a  log file
    # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
    # access
    with scheduler.lock:
        with log_file.open("a") as f:
            f.write(json.dumps(
                {
                    'age': age,
                    'bmi': bmi,
                    'children': children,
                    'sex': sex,
                    'smoker': smoker,
                    'region': region,
                    'prediction': prediction[0]
                }
            ))
            f.write("\n")

    return prediction[0]

# Set up UI components for input and output
# Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
with gr.Blocks() as demo:
    gr.Markdown("HealthyLife Insurance Charge Prediction")
    status = gr.State(["none"])
    with gr.Row():
        with gr.Column():
            age = gr.Slider(minimum=18, maximum=100, value=25, label="Age", step=1)
            bmi = gr.Slider(minimum=10, maximum=50, value=25, label="BMI")
            children = gr.Slider(minimum=0, maximum=10, value=0, label="Children", step=1)
            sex = gr.Radio(choices=["male", "female"], label="Sex")
            smoker = gr.Radio(choices=["yes", "no"], label="Smoker")
            region = gr.Dropdown(choices=["northeast", "northwest", "southeast", "southwest"], label="Region")

        with gr.Column():
            output = gr.Number(label="Predicted Insurance Charge")
            predict_btn = gr.Button("Submit")

            predict_btn.click(insurance_charge_predictor, inputs=[age, bmi, children, sex, smoker, region], outputs=output)

# Launch with a load balancer
demo.queue()
demo.launch(share=True, show_error=True)