Spaces:
Runtime error
Runtime error
| # +++ | |
| # Import the libraries | |
| #--------------------------------------------------------------------------------------------------------- | |
| import os | |
| import uuid | |
| import joblib | |
| import json | |
| # IMPORTANT: I already installed the package "gradio" in my current Virtual Environment (VEnvDSDIL_gpu_Py3.12) as: pip install -q gradio_client | |
| # Do NOT install "gradio_client" package again in Anaconda otherwise it will mess up the package. | |
| import gradio as gr | |
| import pandas as pd | |
| # must install the package "huggingface_hub" first in the current python Virtual Environment, with pip, not with conda, as follows | |
| # pip install huggingface_hub | |
| # i.e., in the command line interface within the activated Virtual Environment: | |
| # (VEnvDSDIL_gpu_Py3.12) epalvarez@DSDILmStation01:~ $ pip install huggingface_hub | |
| from huggingface_hub import CommitScheduler | |
| from pathlib import Path | |
| #--------------------------------------------------------------------------------------------------------- | |
| # Run the training script placed in the same directory as app.py | |
| # The training script will train and persist a linear regression | |
| # model with the filename 'model_ic.joblib' | |
| print(f"\n... Initializing train_ic.py\n") | |
| os.system('python train_ic.py') # Take a command line argument: execute the "train_ic.py" in a subterminal... this will load the data file and serialize the model into "model_ic.joblib | |
| print(f"\n... train_ic.py initialized.\n") | |
| # Load the freshly trained model from disk | |
| # Reconstruct a Python object from a file persisted with joblib.dump. | |
| # Returns: The Python object stored in the file. | |
| # Obtain current directory and data file path | |
| current_directory = Path.cwd() | |
| print(f"current_directory: {current_directory}\n") | |
| # Use joinpath to add subdirectories and a filename | |
| saved_model_file_path = current_directory.joinpath("model_ic.joblib") | |
| print(f"saved_model_file_path: {saved_model_file_path}\n") | |
| # Retrieve serialized model object | |
| insurance_charge_predictor = joblib.load(filename=saved_model_file_path) | |
| # Prepare the logging functionality | |
| log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" | |
| log_folder = log_file.parent | |
| print(f"\nInformation:\n\tlog_file: {log_file}\n\tlog_folder: {log_folder}\n") | |
| # Scheduler will log every 2 API calls: | |
| scheduler = CommitScheduler( | |
| repo_id="insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id | |
| repo_type="dataset", | |
| folder_path=log_folder, | |
| path_in_repo="data", | |
| every=2 | |
| ) | |
| # Define the "predict function" which will take features, convert to dataframe and make predictions using the saved model | |
| # the functions runs when 'Submit' is clicked or when a API request is made | |
| # IMPORTANT Note: do not modify the names of keys for "sample" and "scheduler"; the keys should be named exactly as the names in the columns in the DataFrame. | |
| # Otherwise, an run-time error will occur. | |
| #------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| def predict_insurance_charge(age, bmi, children, sex, smoker, region): | |
| sample = { | |
| 'age': age, | |
| 'bmi': bmi, | |
| 'children': children, | |
| 'sex': sex, | |
| 'smoker': smoker, | |
| 'region': region | |
| } | |
| data_point = pd.DataFrame([sample]) | |
| prediction = insurance_charge_predictor.predict(data_point).tolist() # use the model_ic.joblib retrieved above to make the prediction | |
| # While the prediction is made, log both the inputs and outputs to a log file | |
| # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel access | |
| # Push prediction to a dataset repo for logging | |
| # Each time we get a prediction we will determine if we should log it to a hugging_face dataset according to the schedule definition outside this function | |
| with scheduler.lock: | |
| with log_file.open("a") as f: | |
| f.write(json.dumps( | |
| { | |
| 'age': age, | |
| 'bmi': bmi, | |
| 'children': children, | |
| 'sex': sex, | |
| 'smoker': smoker, | |
| 'region': region, | |
| 'prediction': prediction[0][0] | |
| } | |
| )) | |
| f.write("\n") | |
| prediction_result = prediction[0][0] | |
| print(f"\nPrediction result: {prediction_result} - {type(prediction_result)}\n") | |
| #print(f"\nDebug - prediction[0]: {prediction[0]} - {type(prediction[0])}\n") | |
| #print(f"\nDebug - prediction[0][0]: {prediction[0][0]} - {type(prediction[0][0])}\n") | |
| return prediction_result | |
| #return prediction[0] | |
| #return prediction[0][0] | |
| #-------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
| # Set up UI components for input and output | |
| # Input components | |
| age_input = gr.Number(label="Age [attained years]") | |
| bmi_input = gr.Number(label='BMI') | |
| children_input = gr.Number(label='Children [#]') | |
| sex_input = gr.Dropdown(['male', 'female'], label='Sex') | |
| smoker_input = gr.Dropdown(['no', 'yes'], label='Smoker') | |
| region_input = gr.Dropdown(['southeast', 'southwest', 'northeast', 'northwest'], label='Region') | |
| # Output component | |
| model_output = gr.Label(label="Insurance Charge [$]") | |
| # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction" | |
| demo = gr.Interface( | |
| fn=predict_insurance_charge, | |
| inputs=[age_input, bmi_input, children_input, | |
| sex_input, smoker_input, region_input], | |
| outputs=model_output, | |
| title="Insurance Charge Predictor", | |
| description="This API allows you to predict the appropriate insurance charge based on the input parameters.", | |
| allow_flagging="auto", # automatically push to the HuggingFace Dataset | |
| concurrency_limit=8 | |
| ) | |
| # Launch with a load balancer | |
| demo.queue() | |
| demo.launch(share=False) | |
| # To create a public link, set "share=True" in launch() .... but if I execute this app.py locally, then I have to have my computer on for the public users to access the browser interface | |