# Import the libraries import os import uuid import joblib import json import time import gradio as gr import pandas as pd from huggingface_hub import HfApi from huggingface_hub import CommitScheduler from pathlib import Path from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder from datetime import datetime # Load the Hugging Face API token hf_token = os.getenv("HF_TOKEN") # Initialize Hugging Face API api = HfApi() # Run the training script placed in the same directory as app.py # The training script will train and persist a linear regression # model with the filename 'model.joblib' os.system("python train.py") # Load the freshly trained model from disk model = joblib.load('model.joblib') # Prepare the logging functionality log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" log_folder = log_file.parent scheduler = CommitScheduler( repo_id="insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id repo_type="dataset", folder_path=log_folder, path_in_repo="data", every=2, token=hf_token ) # Define the predict function which will take features, convert to dataframe and make predictions using the saved model # the functions runs when 'Submit' is clicked or when a API request is made def insurance_charge_predictor(age, bmi, children, sex, smoker, region): input_data = pd.DataFrame({ 'age': [age], 'bmi': [bmi], 'children': [children], 'sex': [sex], 'smoker': [smoker], 'region': [region] }) prediction = model.predict(input_data) # While the prediction is made, log both the inputs and outputs to a log file # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel # access with scheduler.lock: with log_file.open("a") as f: f.write(json.dumps( { 'age': age, 'bmi': bmi, 'children': children, 'sex': sex, 'smoker': smoker, 'region': region, 'prediction': prediction[0] } )) f.write("\n") return prediction[0] # Set up UI components for input and output # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction" with gr.Blocks() as demo: gr.Markdown("HealthyLife Insurance Charge Prediction") status = gr.State(["none"]) with gr.Row(): with gr.Column(): age = gr.Slider(minimum=18, maximum=100, value=25, label="Age", step=1) bmi = gr.Slider(minimum=10, maximum=50, value=25, label="BMI") children = gr.Slider(minimum=0, maximum=10, value=0, label="Children", step=1) sex = gr.Radio(choices=["male", "female"], label="Sex") smoker = gr.Radio(choices=["yes", "no"], label="Smoker") region = gr.Dropdown(choices=["northeast", "northwest", "southeast", "southwest"], label="Region") with gr.Column(): output = gr.Number(label="Predicted Insurance Charge") predict_btn = gr.Button("Submit") predict_btn.click(insurance_charge_predictor, inputs=[age, bmi, children, sex, smoker, region], outputs=output) # Launch with a load balancer demo.queue() demo.launch(share=True, show_error=True)