savi-cyber commited on
Commit
9fc9817
·
verified ·
1 Parent(s): 8592005

uploading updated files

Browse files
Files changed (3) hide show
  1. app.py +45 -37
  2. model.joblib +2 -2
  3. train.py +54 -0
app.py CHANGED
@@ -3,58 +3,58 @@ import os
3
  import uuid
4
  import joblib
5
  import json
6
-
7
  import gradio as gr
8
  import pandas as pd
9
 
 
10
  from huggingface_hub import CommitScheduler
11
  from pathlib import Path
 
 
 
 
 
 
 
 
 
12
 
 
 
13
  # Run the training script placed in the same directory as app.py
14
  # The training script will train and persist a linear regression
15
  # model with the filename 'model.joblib'
 
16
  # Load the freshly trained model from disk
17
-
18
  # Prepare the logging functionality
19
  log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
20
  log_folder = log_file.parent
21
 
22
  scheduler = CommitScheduler(
23
- repo_id="-----------", # provide a name "insurance-charge-mlops-logs" for the repo_id
24
  repo_type="dataset",
25
  folder_path=log_folder,
26
  path_in_repo="data",
27
  every=2
 
28
  )
29
- # Create variable for model.joblib
30
- insurance_charge_predictor = joblib.load('model.joblib')
31
 
32
- age_input = gr.Number(label='Age')
33
- bmi_input = gr.Number(label='BMI')
34
- children_input = gr.Number(label='Children')
35
- sex_input = gr.Number(label='Sex')
36
- smoker_input = gr.Number(label='Smoker')
37
- region_input = gr.Number(label='Region')
38
- type_input = gr.Dropdown(
39
- ['Low', 'Mean', 'High'],
40
- label='Type'
41
- )
42
  # Define the predict function which will take features, convert to dataframe and make predictions using the saved model
43
- def insurance_charge_predictor(age, bmi, children, sex, smoker, region, type):
44
- sample = {
45
- 'Age': age,
46
- 'BMI': bmi,
47
- 'Children': children,
48
- 'Sex': sex,
49
- 'Smoker': smoker,
50
- 'Region': region,
51
- 'Type': type
52
- }
53
  # the functions runs when 'Submit' is clicked or when a API request is made
 
 
 
 
 
 
 
 
 
54
 
 
55
  # While the prediction is made, log both the inputs and outputs to a log file
56
- data_point = pd.DataFrame([sample])
57
- prediction = insurance_charge_predictor.predict(data_point).tolist()
58
  # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
59
  # access
60
  with scheduler.lock:
@@ -75,16 +75,24 @@ def insurance_charge_predictor(age, bmi, children, sex, smoker, region, type):
75
  return prediction[0]
76
 
77
  # Set up UI components for input and output
78
- demo = gr.Interface(
79
- fn=Predict_Insurance_Charge,
80
- inputs=[age_input, bmi_input, children_input, sex_input, smoker_input, region_input],
81
- outputs=model_output,
82
  # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
83
- title="HealthyLife Insurance Charge Prediction",
84
- description="This API allows you to predict optimal insurance charges for customers",
85
- allow_flagging="auto",
86
- concurrency_limit=8
87
- )
 
 
 
 
 
 
 
 
 
 
 
 
88
  # Launch with a load balancer
89
  demo.queue()
90
- demo.launch(share=False)
 
3
  import uuid
4
  import joblib
5
  import json
6
+ import time
7
  import gradio as gr
8
  import pandas as pd
9
 
10
+ from huggingface_hub import HfApi
11
  from huggingface_hub import CommitScheduler
12
  from pathlib import Path
13
+ from sklearn.model_selection import train_test_split
14
+ from sklearn.ensemble import RandomForestRegressor
15
+ from sklearn.pipeline import Pipeline
16
+ from sklearn.compose import ColumnTransformer
17
+ from sklearn.preprocessing import OneHotEncoder
18
+ from datetime import datetime
19
+
20
+ # Load the Hugging Face API token
21
+ hf_token = os.getenv("HF_TOKEN")
22
 
23
+ # Initialize Hugging Face API
24
+ api = HfApi()
25
  # Run the training script placed in the same directory as app.py
26
  # The training script will train and persist a linear regression
27
  # model with the filename 'model.joblib'
28
+ os.system("python train.py")
29
  # Load the freshly trained model from disk
30
+ joblib.load('model.joblib')
31
  # Prepare the logging functionality
32
  log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
33
  log_folder = log_file.parent
34
 
35
  scheduler = CommitScheduler(
36
+ repo_id="insurance-charge-mlops-logs", # provide a name "insurance-charge-mlops-logs" for the repo_id
37
  repo_type="dataset",
38
  folder_path=log_folder,
39
  path_in_repo="data",
40
  every=2
41
+ token=hf_token
42
  )
 
 
43
 
 
 
 
 
 
 
 
 
 
 
44
  # Define the predict function which will take features, convert to dataframe and make predictions using the saved model
 
 
 
 
 
 
 
 
 
 
45
  # the functions runs when 'Submit' is clicked or when a API request is made
46
+ def insurance_charge_predictor(age, bmi, children, sex, smoker, region):
47
+ input_data = pd.DataFrame({
48
+ 'Age': [age],
49
+ 'BMI': [bmi],
50
+ 'Children': [children],
51
+ 'Sex': [sex],
52
+ 'Smoker': [smoker],
53
+ 'Region': [region]
54
+ })
55
 
56
+ prediction = model.predict(input_data)
57
  # While the prediction is made, log both the inputs and outputs to a log file
 
 
58
  # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
59
  # access
60
  with scheduler.lock:
 
75
  return prediction[0]
76
 
77
  # Set up UI components for input and output
 
 
 
 
78
  # Create the gradio interface, make title "HealthyLife Insurance Charge Prediction"
79
+ with gr.Blocks(title="HealthyLife Insurance Charge Prediction") as demo:
80
+ status = gr.State(["none"])
81
+ with gr.Row():
82
+ with gr.Column():
83
+ region = gr.Dropdown(choices=["northeast", "northwest", "southeast", "southwest"], label="Region")
84
+ sex = gr.Radio(choices=["male", "female"], label="Sex")
85
+ smoker = gr.Radio(choices=["yes", "no"], label="Smoker")
86
+ age = gr.Slider(minimum=18, maximum=100, value=25, label="Age", step=1)
87
+ bmi = gr.Slider(minimum=10, maximum=50, value=25, label="BMI")
88
+ children = gr.Slider(minimum=0, maximum=10, value=0, label="Children", step=1)
89
+
90
+ with gr.Column():
91
+ output = gr.Number(label="Predicted Insurance Charge")
92
+ predict_btn = gr.Button("Submit")
93
+
94
+ predict_btn.click(predict_charges, inputs=[age, sex, bmi, children, smoker, region], outputs=output)
95
+
96
  # Launch with a load balancer
97
  demo.queue()
98
+ demo.launch(share=True, show_error=True)
model.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e86925f6e1316a2a1a54e48362efd01b134efba34c8249a5b33488539321dbf4
3
- size 3654
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa4571e72863112ad036c9d864d5227d4821da21f1044fac06c0a1b983fc4cd
3
+ size 4526
train.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ import pandas as pd
3
+ import joblib
4
+
5
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
+ from sklearn.compose import make_column_transformer
7
+ from sklearn.compose import ColumnTransformer
8
+ from sklearn.pipeline import make_pipeline
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.linear_model import LinearRegression
11
+ from sklearn.metrics import mean_squared_error, r2_score
12
+
13
+ # Read the data
14
+ data_df = pd.read_csv("insurance.csv")
15
+ # Process the data
16
+ data_df = data_df.drop(columns=['index'])
17
+
18
+ X = data_df.drop(columns='charges')
19
+ y = data_df['charges']
20
+
21
+ categorical_features = X.select_dtypes(include=['object']).columns
22
+ numeric_features = X.select_dtypes(include=['number']).columns
23
+
24
+ print("Creating data subsets")
25
+
26
+ preprocessor = make_column_transformer(
27
+ (StandardScaler(), numeric_features),
28
+ (OneHotEncoder(handle_unknown='ignore'), categorical_features)
29
+ )
30
+
31
+ Xtrain, Xtest, ytrain, ytest = train_test_split(
32
+ X, y,
33
+ test_size=0.2,
34
+ random_state=42
35
+ )
36
+ model_linear_regression = LinearRegression(n_jobs=-1)
37
+
38
+ print("Estimating Model Pipeline")
39
+
40
+ model_pipeline = make_pipeline(
41
+ preprocessor,
42
+ model_linear_regression
43
+ )
44
+
45
+ model_pipeline.fit(Xtrain, ytrain)
46
+
47
+ print("Logging Metrics")
48
+ print(f"R-squared: {r2_score(ytest, model_pipeline.predict(Xtest))}")
49
+
50
+ print("Serializing Model")
51
+
52
+ saved_model_path = "model.joblib"
53
+
54
+ joblib.dump(model_pipeline, saved_model_path)