File size: 3,504 Bytes
47c7a21 9fc9817 47c7a21 9fc9817 47c7a21 9fc9817 47c7a21 9fc9817 47c7a21 9fc9817 47c7a21 925394a 47c7a21 9fc9817 47c7a21 7c67604 9fc9817 47c7a21 9fc9817 07337b3 9fc9817 47c7a21 9fc9817 47c7a21 68909df 9fc9817 63de8af 0fbd913 3a9daac 9fc9817 0fbd913 9fc9817 47c7a21 9fc9817 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | # Import the libraries
import os
import uuid
import joblib
import json
import time
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi
from huggingface_hub import CommitScheduler
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from datetime import datetime
# Load the Hugging Face API token
hf_token = os.getenv("HF_TOKEN")
# Initialize Hugging Face API
api = HfApi()
# Run the training script placed in the same directory as app.py
# The training script will train and persist a linear regression
# model with the filename 'model.joblib'
os.system("python train.py")
# Load the freshly trained model from disk
model = joblib.load('model.joblib')
# Prepare the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent
scheduler = CommitScheduler(
repo_id="insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id
repo_type="dataset",
folder_path=log_folder,
path_in_repo="data",
every=2,
token=hf_token
)
# Define the predict function which will take features, convert to dataframe and make predictions using the saved model
# the functions runs when 'Submit' is clicked or when a API request is made
def insurance_charge_predictor(age, bmi, children, sex, smoker, region):
input_data = pd.DataFrame({
'age': [age],
'bmi': [bmi],
'children': [children],
'sex': [sex],
'smoker': [smoker],
'region': [region]
})
prediction = model.predict(input_data)
# While the prediction is made, log both the inputs and outputs to a log file
# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
# access
with scheduler.lock:
with log_file.open("a") as f:
f.write(json.dumps(
{
'age': age,
'bmi': bmi,
'children': children,
'sex': sex,
'smoker': smoker,
'region': region,
'prediction': prediction[0]
}
))
f.write("\n")
return prediction[0]
# Set up UI components for input and output
# Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
with gr.Blocks() as demo:
gr.Markdown("HealthyLife Insurance Charge Prediction")
status = gr.State(["none"])
with gr.Row():
with gr.Column():
age = gr.Slider(minimum=18, maximum=100, value=25, label="Age", step=1)
bmi = gr.Slider(minimum=10, maximum=50, value=25, label="BMI")
children = gr.Slider(minimum=0, maximum=10, value=0, label="Children", step=1)
sex = gr.Radio(choices=["male", "female"], label="Sex")
smoker = gr.Radio(choices=["yes", "no"], label="Smoker")
region = gr.Dropdown(choices=["northeast", "northwest", "southeast", "southwest"], label="Region")
with gr.Column():
output = gr.Number(label="Predicted Insurance Charge")
predict_btn = gr.Button("Submit")
predict_btn.click(insurance_charge_predictor, inputs=[age, bmi, children, sex, smoker, region], outputs=output)
# Launch with a load balancer
demo.queue()
demo.launch(share=True, show_error=True)
|