# Import the libraries import joblib from sklearn.datasets import fetch_openml from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import make_column_transformer from sklearn.pipeline import make_pipeline from sklearn.model_selection import train_test_split, RandomizedSearchCV from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, classification_report # Run the training script placed in the same directory as app.py # The training script will train and persist a linear regression # model with the filename 'model.joblib' # Run training script import pandas as pd from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor import joblib # Choose and instantiate RandomForestRegressor model model = RandomForestRegressor() # Train the model using the training data model.fit(X_train, y_train) # Save the trained model to a file (e.g., 'insurance_model.joblib') joblib.dump(model, 'model.joblib') # Load the freshly trained model from disk model = joblib.load('model.joblib') # Prepare the logging functionality log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" log_folder = log_file.parent log_file.parent.mkdir(exist_ok=True) ####################################################################################################################### scheduler = CommitScheduler( repo_id="-----------", # provide a name "insurance-charge-mlops-logs" for the repo_id repo_type="dataset", folder_path=log_folder, path_in_repo="data", every=2 ) # Define the predict function which will take features, convert to dataframe and make predictions using the saved model # the functions runs when 'Submit' is clicked or when a API request is made import pandas as pd import joblib # Assuming 'model' is a trained model loaded using joblib.load() def predict(age, sex, bmi, children, smoker, region): """Predicts insurance charges based on customer features. Args: age (int): Age of the customer. sex (int): Gender of the customer (0 for female, 1 for male). bmi (float): Body Mass Index of the customer. children (int): Number of children the customer has. smoker (int): Smoking status of the customer (0 for yes, 1 for no). region (int): Region of the customer (1-4 representing different regions). Returns: float: Predicted insurance charges. """ # Create a DataFrame with the input features input_data = pd.DataFrame({ 'age': [age], 'sex': [sex], 'bmi': [bmi], 'children': [children], 'smoker': [smoker], 'region': [region] }) # Make the prediction using the loaded model prediction = model.predict(input_data) # Return the prediction return prediction[0] # While the prediction is made, log both the inputs and outputs to a log file # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel # access with scheduler.lock: with log_file.open("a") as f: f.write(json.dumps( { 'age': age, 'bmi': bmi, 'children': children, 'sex': sex, 'smoker': smoker, 'region': region, 'prediction': prediction[0] } )) f.write("\n") return prediction[0] # Set up UI components for input and output import gradio as gr age = gr.inputs.Slider(minimum=18, maximum=64, default=30, step=1, label="Age") sex = gr.inputs.Radio(["female", "male"], type) # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction" import gradio as gr # ... (previous code for model training and predict function) ... # Set up UI components for input and output age = gr.inputs.Slider(minimum=18, maximum=64, default=30, step=1, label="Age") sex = gr.inputs.Radio(["female", "male"], type="value", default="female", label="Sex") # Corrected type bmi = gr.inputs.Number(default=25, label="BMI") children = gr.inputs.Slider(minimum=0, maximum=5, default=0, step=1, label="Children") smoker = gr.inputs.Radio(["yes", "no"], type="value", default="no", label="Smoker") # Corrected type region = gr.inputs.Dropdown(["southwest", "southeast", "northwest", "northeast"], type="value", default="southwest", label="Region") # Corrected type # Create the Gradio interface demo = gr.Interface( fn=predict, inputs=[age, sex, bmi, children, smoker, region], outputs="number", title="HealthyLife Insurance Charge Prediction" ) # Launch with a load balancer demo.queue() demo.launch(share=False)