Kavya-Jain commited on
Commit
a603065
·
verified ·
1 Parent(s): 6abfac2

Upload 7 files

Browse files
Files changed (5) hide show
  1. .dockerignore +7 -6
  2. Preprocessing.py +65 -69
  3. main.py +3 -11
  4. requirements.txt +2 -1
  5. train_and_save_model.py +52 -51
.dockerignore CHANGED
@@ -1,7 +1,8 @@
1
- .git
2
- .venv
3
- __pycache__
4
- *.pyc
5
  *.ipynb
6
- .DS_Store
7
- *.log
 
 
1
+ .git
2
+ .venv
3
+ __pycache__
4
+ *.pyc
5
  *.ipynb
6
+ .DS_Store
7
+ *.log
8
+ *.pkl
Preprocessing.py CHANGED
@@ -4,49 +4,44 @@ import joblib
4
  import os
5
  from sklearn.impute import SimpleImputer
6
  from sklearn.preprocessing import LabelEncoder, StandardScaler
 
7
 
8
- NUM_IMPUTER_PATH = "numerical_imputer.pkl"
9
- CAT_IMPUTER_PATH = "categorical_imputer.pkl"
10
- LE_STRUCTURE_TYPE_PATH = "label_encoder_structure_type.pkl"
11
- SCALER_PATH = "scaler.pkl"
12
 
13
- numerical_imputer = None
14
- categorical_imputer = None
15
- le_structure_type = None
16
- scaler = None
17
- #I have done this to set them as a placeholder in this file....therefore no discrepancies related to it will occur
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- try:
20
- numerical_imputer = joblib.load(NUM_IMPUTER_PATH)
21
- print(f"Loaded {NUM_IMPUTER_PATH}. Expected features: {getattr(numerical_imputer, 'feature_names_in_', 'N/A')}")
22
- except FileNotFoundError :
23
- print(f"Warning : {NUM_IMPUTER_PATH} not found")
24
- except Exception as e :
25
- print(f"Error loading {NUM_IMPUTER_PATH}: {e}")
26
-
27
- try:
28
- categorical_imputer = joblib.load(CAT_IMPUTER_PATH)
29
- print(f"Loaded {CAT_IMPUTER_PATH}. Expected features: {getattr(categorical_imputer, 'feature_names_in_', 'N/A')}")
30
- except FileNotFoundError :
31
- print(f"Warning: {CAT_IMPUTER_PATH} not found")
32
- except Exception as e :
33
- print(f"Error loading {CAT_IMPUTER_PATH}: {e}")
34
-
35
- try:
36
- le_structure_type = joblib.load(LE_STRUCTURE_TYPE_PATH)
37
- print(f"Loaded {LE_STRUCTURE_TYPE_PATH}")
38
- except FileNotFoundError :
39
- print(f"Warning: {LE_STRUCTURE_TYPE_PATH} not found")
40
- except Exception as e :
41
- print(f"Error loading {LE_STRUCTURE_TYPE_PATH}: {e}")
42
-
43
- try:
44
- scaler = joblib.load(SCALER_PATH)
45
- print(f"Loaded {SCALER_PATH}. Expected features: {getattr(scaler, 'feature_names_in_', 'N/A')}")
46
- except FileNotFoundError :
47
- print(f"Warning: {SCALER_PATH} not found")
48
- except Exception as e :
49
- print(f"Error loading {SCALER_PATH}: {e}")
50
 
51
  #You can see that I've used the try and except model for loading the data so that if error occurs I'm completely aware of it
52
 
@@ -56,7 +51,8 @@ NUMERICAL_FEATURES = [
56
  CATEGORICAL_FEATURES = ['structure_type']
57
 
58
  FINAL_MODEL_EXPECTED_FEATURES = [
59
- 'site_area', 'structure_type', 'water_consumption', 'recycling_rate', 'utilisation_rate', 'air_qality_index', 'issue_reolution_time', 'resident_count'
 
60
  ]
61
  #Final model expected features contains the list of the final output of the trained data
62
 
@@ -69,14 +65,14 @@ def preprocess_input(input_data: dict) -> pd.DataFrame:
69
  if 'structure_type' in df_processed.columns:
70
  df_processed['structure_type'] = df_processed['structure_type'].astype(str).str.lower().str.strip()
71
  print(f"'structure_type' standardized to: '{df_processed['structure_type'].iloc[0]}'")
72
-
73
  if numerical_imputer is not None and NUMERICAL_FEATURES:
74
  missing_input = [col for col in NUMERICAL_FEATURES if col not in df_processed.columns]
75
-
76
  if missing_input:
77
  raise ValueError(f"Error : Numerical features {missing_input} are missing from input DataFrame!")
78
- #This is only to verify...It will give us the missing columns which should be present while doing numerical imputation....basically, I'm trying to handle all the errors possible
79
-
80
  try:
81
  df_processed[NUMERICAL_FEATURES] = numerical_imputer.transform(df_processed[NUMERICAL_FEATURES])
82
 
@@ -89,17 +85,17 @@ def preprocess_input(input_data: dict) -> pd.DataFrame:
89
 
90
  if categorical_imputer is not None and CATEGORICAL_FEATURES:
91
  missing_input = [col for col in CATEGORICAL_FEATURES if col not in df_processed.columns]
92
-
93
- if missing_input:
94
- raise ValueError(f"Error : Categorical features {missing_input} are missing from input DataFrame!")
95
- try:
96
- df_processed[CATEGORICAL_FEATURES] = categorical_imputer.transform(df_processed[CATEGORICAL_FEATURES])
97
-
98
- except Exception as e:
99
- raise RuntimeError(
100
- f"Error during categorical imputation\n"
101
- f"Error : {e}"
102
- )
103
 
104
  if le_structure_type is not None and 'structure_type' in df_processed.columns:
105
  try:
@@ -114,18 +110,18 @@ def preprocess_input(input_data: dict) -> pd.DataFrame:
114
 
115
  if scaler is not None and NUMERICAL_FEATURES:
116
  missing_input = [col for col in NUMERICAL_FEATURES if col not in df_processed.columns]
117
-
118
- if missing_input:
119
- raise ValueError(f"Error : Numerical features {missing_input} are missing from input DataFrame")
120
-
121
- try:
122
- df_processed[NUMERICAL_FEATURES] = scaler.transform(df_processed[NUMERICAL_FEATURES])
123
-
124
- except Exception as e:
125
- raise RuntimeError(
126
- f"Error during scaling\n"
127
- f"Error: {e}"
128
- )
129
 
130
  print(f"Current df_processed columns before final reorder: {df_processed.columns.tolist()}")
131
  #Checkpoint
@@ -141,4 +137,4 @@ def preprocess_input(input_data: dict) -> pd.DataFrame:
141
  return df_final
142
 
143
  #The function I created above was based upon the numerical and categorical imputation, label encoding, scaling or basically all the data preprocessing that should be done after training all the models.....
144
- #I have show all the error messages in my coding lines because I got stuck in this process many time and to highlight the mistakes I have created some checkpoints also in between....Therefore, now all the data operations are done and the next thing is DEPLOYMENT-> creation of FastAPI and deployment on AWS etc.
 
4
  import os
5
  from sklearn.impute import SimpleImputer
6
  from sklearn.preprocessing import LabelEncoder, StandardScaler
7
+ import requests
8
 
9
+ GITHUB_BASE_URL = "https://raw.githubusercontent.com/jainkavya738/Electricity-ML-Artifacts/main/"
 
 
 
10
 
11
+ NUM_IMPUTER_URL = "https://github.com/jainkavya738/Electricity-ML-Artifacts/raw/refs/heads/main/numerical_imputer.pkl"
12
+ CAT_IMPUTER_URL = "https://github.com/jainkavya738/Electricity-ML-Artifacts/raw/refs/heads/main/categorical_imputer.pkl"
13
+ LE_STRUCTURE_TYPE_URL = "https://github.com/jainkavya738/Electricity-ML-Artifacts/raw/refs/heads/main/label_encoder_structure_type.pkl "
14
+ SCALER_URL = "https://github.com/jainkavya738/Electricity-ML-Artifacts/raw/refs/heads/main/scaler.pkl"
15
+ MODEL_URL = "https://github.com/jainkavya738/Electricity-ML-Artifacts/raw/refs/heads/main/model.pkl"
16
+
17
+ def download_and_load_pkl(url, filename):
18
+ print(f"Attempting to download {filename} from {url}")
19
+ try:
20
+ response = requests.get(url, stream=True)
21
+ response.raise_for_status()
22
+
23
+ with open(filename, 'wb') as f:
24
+ for chunk in response.iter_content(chunk_size=8192):
25
+ f.write(chunk)
26
+
27
+ obj = joblib.load(filename)
28
+ print(f"Successfully downloaded and loaded {filename}")
29
+ os.remove(filename)
30
+ return obj
31
+ except requests.exceptions.RequestException as e:
32
+ print(f"Error downloading {filename} from {url}: {e}")
33
+ return None
34
+ except Exception as e:
35
+ print(f"Error loading {filename} after download: {e}")
36
+ return None
37
+
38
+ numerical_imputer = download_and_load_pkl(NUM_IMPUTER_URL, "numerical_imputer.pkl")
39
+ categorical_imputer = download_and_load_pkl(CAT_IMPUTER_URL, "categorical_imputer.pkl")
40
+ le_structure_type = download_and_load_pkl(LE_STRUCTURE_TYPE_URL, "label_encoder_structure_type.pkl")
41
+ scaler = download_and_load_pkl(SCALER_URL, "scaler.pkl")
42
+ model = download_and_load_pkl(MODEL_URL, "model.pkl")
43
 
44
+ #I have done this to set them as a placeholder in this file....therefore no discrepancies related to it will occur
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  #You can see that I've used the try and except model for loading the data so that if error occurs I'm completely aware of it
47
 
 
51
  CATEGORICAL_FEATURES = ['structure_type']
52
 
53
  FINAL_MODEL_EXPECTED_FEATURES = [
54
+ 'site_area', 'water_consumption', 'recycling_rate', 'utilisation_rate',
55
+ 'air_qality_index', 'issue_reolution_time', 'resident_count', 'structure_type'
56
  ]
57
  #Final model expected features contains the list of the final output of the trained data
58
 
 
65
  if 'structure_type' in df_processed.columns:
66
  df_processed['structure_type'] = df_processed['structure_type'].astype(str).str.lower().str.strip()
67
  print(f"'structure_type' standardized to: '{df_processed['structure_type'].iloc[0]}'")
68
+
69
  if numerical_imputer is not None and NUMERICAL_FEATURES:
70
  missing_input = [col for col in NUMERICAL_FEATURES if col not in df_processed.columns]
71
+
72
  if missing_input:
73
  raise ValueError(f"Error : Numerical features {missing_input} are missing from input DataFrame!")
74
+ #This is only to verify...It will give us the missing columns which should  be present while doing numerical imputation....basically, I'm trying to handle all the errors possible
75
+
76
  try:
77
  df_processed[NUMERICAL_FEATURES] = numerical_imputer.transform(df_processed[NUMERICAL_FEATURES])
78
 
 
85
 
86
  if categorical_imputer is not None and CATEGORICAL_FEATURES:
87
  missing_input = [col for col in CATEGORICAL_FEATURES if col not in df_processed.columns]
88
+
89
+ if missing_input:
90
+ raise ValueError(f"Error : Categorical features {missing_input} are missing from input DataFrame!")
91
+ try:
92
+ df_processed[CATEGORICAL_FEATURES] = categorical_imputer.transform(df_processed[CATEGORICAL_FEATURES])
93
+
94
+ except Exception as e:
95
+ raise RuntimeError(
96
+ f"Error during categorical imputation\n"
97
+ f"Error : {e}"
98
+ )
99
 
100
  if le_structure_type is not None and 'structure_type' in df_processed.columns:
101
  try:
 
110
 
111
  if scaler is not None and NUMERICAL_FEATURES:
112
  missing_input = [col for col in NUMERICAL_FEATURES if col not in df_processed.columns]
113
+
114
+ if missing_input:
115
+ raise ValueError(f"Error : Numerical features {missing_input} are missing from input DataFrame")
116
+
117
+ try:
118
+ df_processed[NUMERICAL_FEATURES] = scaler.transform(df_processed[NUMERICAL_FEATURES])
119
+
120
+ except Exception as e:
121
+ raise RuntimeError(
122
+ f"Error during scaling\n"
123
+ f"Error: {e}"
124
+ )
125
 
126
  print(f"Current df_processed columns before final reorder: {df_processed.columns.tolist()}")
127
  #Checkpoint
 
137
  return df_final
138
 
139
  #The function I created above was based upon the numerical and categorical imputation, label encoding, scaling or basically all the data preprocessing that should be done after training all the models.....
140
+ #I have show all the error messages in my coding lines because I got stuck in this process many time and to highlight the mistakes I have created some checkpoints also in between....Therefore, now all the data operations are done and the next thing is DEPLOYMENT-> creation of FastAPI and deployment on AWS etc.
main.py CHANGED
@@ -4,23 +4,15 @@ import joblib
4
  import pandas as pd
5
  import os
6
 
7
- from Preprocessing import preprocess_input
8
 
9
  app = FastAPI(
10
  title="Electricity Cost Prediction API",
11
  description="Predicts electricity cost based on facility and operational parameters"
12
  )
13
 
14
- MODEL_PATH = "model.pkl"
15
-
16
- if not os.path.exists(MODEL_PATH):
17
- raise FileNotFoundError(
18
- "Model file not found"
19
- )
20
- try:
21
- model = joblib.load(MODEL_PATH)
22
- except Exception as e:
23
- raise RuntimeError(f"Error loading model from {MODEL_PATH}: {e}")
24
 
25
  class ElectricityInput(BaseModel):
26
  site_area: float = Field(..., description="Area of the site in square units")
 
4
  import pandas as pd
5
  import os
6
 
7
+ from Preprocessing import preprocess_input,model
8
 
9
  app = FastAPI(
10
  title="Electricity Cost Prediction API",
11
  description="Predicts electricity cost based on facility and operational parameters"
12
  )
13
 
14
+ if model is None:
15
+ raise RuntimeError("Critical Error: ML model failed to load from external source during application startup.")
 
 
 
 
 
 
 
 
16
 
17
  class ElectricityInput(BaseModel):
18
  site_area: float = Field(..., description="Area of the site in square units")
requirements.txt CHANGED
@@ -5,4 +5,5 @@ scikit-learn
5
  joblib
6
  pydantic
7
  gunicorn
8
- openpyxl
 
 
5
  joblib
6
  pydantic
7
  gunicorn
8
+ openpyxl
9
+ requests
train_and_save_model.py CHANGED
@@ -8,62 +8,63 @@ from sklearn.preprocessing import LabelEncoder, StandardScaler
8
  from sklearn.model_selection import train_test_split
9
  from sklearn.linear_model import LinearRegression
10
  import joblib
11
- import os
12
  import re
13
 
14
- DATASET_PATH = "C:/Users/kavya/Documents/GDG_Files_Kavya/electricity_predictor_API/electricity_cost_dataset.csv.xlsx"
15
- MODEL_OUTPUT_DIR = "."
 
16
 
17
  os.makedirs(MODEL_OUTPUT_DIR, exist_ok=True)
18
 
19
  def RenamingColumns(Column_Name):
20
- Column_Name = re.sub(r'\s+', '_', Column_Name)
21
- Column_Name = re.sub(r'[^\w_]', '', Column_Name)
22
- return Column_Name.lower()
23
 
24
  try:
25
- df = pd.read_excel(DATASET_PATH)
26
- print("Original columns ->\n")
27
- print(df.columns.tolist())
28
 
29
- new_columns = []
30
-
31
- #As I've to rename the columns....I'm using a for loop to do this->
32
- #If, the column names given as an input in the FastAPI are not same as the column names in the dataset...an error will be occured on the fastAPI application
33
-
34
- for col in df.columns:
35
- new_col = RenamingColumns(col)
36
- new_columns.append(new_col)
37
 
38
- df.columns = new_columns
39
 
40
- print("Renamed Columns ->\n")
41
- print(df.columns.tolist())
42
 
43
  except FileNotFoundError:
44
- print(f"Error: Dataset not found! Please ensure the file is in the same directory")
45
- exit()
46
-
47
  except Exception as e:
48
- print(f"Error : {e}")
49
- exit()
50
-
51
  #I used try and except blocks for ERROR HANDLING
52
  #Now, all the names have been changed and I've converted same as the datset ones...Therefor from here, I've used new names
53
 
54
  TARGET_COL = 'electricity_cost'
55
 
56
  if TARGET_COL not in df.columns:
57
- print(f"Error: Target column '{TARGET_COL}' not found!")
58
- exit()
59
 
60
  features_df = df.drop(columns=[TARGET_COL])
61
  #Using .drop, I removed the feature which will not be used in calculation
62
  y = df[TARGET_COL]
63
 
64
  NUMERICAL_FEATURES = [
65
- 'site_area', 'water_consumption', 'recycling_rate', 'utilisation_rate',
66
- 'air_qality_index', 'issue_reolution_time', 'resident_count'
67
  ]
68
  CATEGORICAL_FEATURES = ['structure_type']
69
 
@@ -71,44 +72,44 @@ all_expected_features = NUMERICAL_FEATURES + CATEGORICAL_FEATURES
71
  missing_features = [col for col in all_expected_features if col not in features_df.columns]
72
 
73
  if missing_features:
74
- print(f"Error: The following expected features are missing from the data after renaming: {missing_features}")
75
- exit()
76
  #The above steps were only for the safety purpose...to recheck if there is any missing features.
77
  #Actually, I did it just because I was facing many errors...therefore just to check I added some checkpoints.
78
 
79
  numerical_imputer = SimpleImputer(strategy='mean')
80
  if NUMERICAL_FEATURES:
81
- features_df[NUMERICAL_FEATURES] = numerical_imputer.fit_transform(features_df[NUMERICAL_FEATURES])
82
- joblib.dump(numerical_imputer, os.path.join(MODEL_OUTPUT_DIR, 'numerical_imputer.pkl'))
83
- print("Numerical imputer fitted and saved")
84
  else:
85
- print("No numerical columns to impute")
86
 
87
  categorical_imputer = SimpleImputer(strategy='most_frequent')
88
  if CATEGORICAL_FEATURES:
89
- features_df[CATEGORICAL_FEATURES] = categorical_imputer.fit_transform(features_df[CATEGORICAL_FEATURES])
90
- joblib.dump(categorical_imputer, os.path.join(MODEL_OUTPUT_DIR, 'categorical_imputer.pkl'))
91
- print("Categorical imputer fitted and saved")
92
  else:
93
- print("No categorical columns to impute")
94
  #I used joblib because I wanted to use this data later as well...therefore, whenever I will be in need of it I will load this with joblib.load()
95
 
96
  if 'structure_type' in features_df.columns:
97
- features_df['structure_type'] = features_df['structure_type'].astype(str).str.lower().str.strip()
98
- le_structure_type = LabelEncoder()
99
- features_df['structure_type'] = le_structure_type.fit_transform(features_df['structure_type'])
100
- joblib.dump(le_structure_type, os.path.join(MODEL_OUTPUT_DIR, 'label_encoder_structure_type.pkl'))
101
- print("LabelEncoder for 'structure_type' fitted and saved.")
102
  else:
103
- print("structure_type column not found or not categorical, skipping LabelEncoder.")
104
 
105
  if NUMERICAL_FEATURES:
106
- scaler = StandardScaler()
107
- features_df[NUMERICAL_FEATURES] = scaler.fit_transform(features_df[NUMERICAL_FEATURES])
108
- joblib.dump(scaler, os.path.join(MODEL_OUTPUT_DIR, 'scaler.pkl'))
109
- print("StandardScaler fitted and saved.")
110
  else:
111
- print("No numerical columns to scale.")
112
 
113
  #You can see that, I've used joblib.dump to create a separate directory for each imputer and encoder made
114
 
 
8
  from sklearn.model_selection import train_test_split
9
  from sklearn.linear_model import LinearRegression
10
  import joblib
11
+ import os
12
  import re
13
 
14
+ # Changed to a relative path for better portability on deployment platforms
15
+ DATASET_PATH = "electricity_cost_dataset.csv.xlsx"
16
+ MODEL_OUTPUT_DIR = "."
17
 
18
  os.makedirs(MODEL_OUTPUT_DIR, exist_ok=True)
19
 
20
  def RenamingColumns(Column_Name):
21
+ Column_Name = re.sub(r'\s+', '_', Column_Name)
22
+ Column_Name = re.sub(r'[^\w_]', '', Column_Name)
23
+ return Column_Name.lower()
24
 
25
  try:
26
+ df = pd.read_excel(DATASET_PATH)
27
+ print("Original columns ->\n")
28
+ print(df.columns.tolist())
29
 
30
+ new_columns = []
31
+
32
+ #As I've to rename the columns....I'm using a for loop to do this->
33
+ #If, the column names given as an input in the FastAPI are not same as the column names in the dataset...an error will be occured on the fastAPI application
34
+
35
+ for col in df.columns:
36
+ new_col = RenamingColumns(col)
37
+ new_columns.append(new_col)
38
 
39
+ df.columns = new_columns
40
 
41
+ print("Renamed Columns ->\n")
42
+ print(df.columns.tolist())
43
 
44
  except FileNotFoundError:
45
+ print(f"Error: Dataset not found! Please ensure the file is in the same directory")
46
+ exit()
47
+
48
  except Exception as e:
49
+ print(f"Error : {e}")
50
+ exit()
51
+
52
  #I used try and except blocks for ERROR HANDLING
53
  #Now, all the names have been changed and I've converted same as the datset ones...Therefor from here, I've used new names
54
 
55
  TARGET_COL = 'electricity_cost'
56
 
57
  if TARGET_COL not in df.columns:
58
+ print(f"Error: Target column '{TARGET_COL}' not found!")
59
+ exit()
60
 
61
  features_df = df.drop(columns=[TARGET_COL])
62
  #Using .drop, I removed the feature which will not be used in calculation
63
  y = df[TARGET_COL]
64
 
65
  NUMERICAL_FEATURES = [
66
+ 'site_area', 'water_consumption', 'recycling_rate', 'utilisation_rate',
67
+ 'air_qality_index', 'issue_reolution_time', 'resident_count'
68
  ]
69
  CATEGORICAL_FEATURES = ['structure_type']
70
 
 
72
  missing_features = [col for col in all_expected_features if col not in features_df.columns]
73
 
74
  if missing_features:
75
+ print(f"Error: The following expected features are missing from the data after renaming: {missing_features}")
76
+ exit()
77
  #The above steps were only for the safety purpose...to recheck if there is any missing features.
78
  #Actually, I did it just because I was facing many errors...therefore just to check I added some checkpoints.
79
 
80
  numerical_imputer = SimpleImputer(strategy='mean')
81
  if NUMERICAL_FEATURES:
82
+ features_df[NUMERICAL_FEATURES] = numerical_imputer.fit_transform(features_df[NUMERICAL_FEATURES])
83
+ joblib.dump(numerical_imputer, os.path.join(MODEL_OUTPUT_DIR, 'numerical_imputer.pkl'))
84
+ print("Numerical imputer fitted and saved")
85
  else:
86
+ print("No numerical columns to impute")
87
 
88
  categorical_imputer = SimpleImputer(strategy='most_frequent')
89
  if CATEGORICAL_FEATURES:
90
+ features_df[CATEGORICAL_FEATURES] = categorical_imputer.fit_transform(features_df[CATEGORICAL_FEATURES])
91
+ joblib.dump(categorical_imputer, os.path.join(MODEL_OUTPUT_DIR, 'categorical_imputer.pkl'))
92
+ print("Categorical imputer fitted and saved")
93
  else:
94
+ print("No categorical columns to impute")
95
  #I used joblib because I wanted to use this data later as well...therefore, whenever I will be in need of it I will load this with joblib.load()
96
 
97
  if 'structure_type' in features_df.columns:
98
+ features_df['structure_type'] = features_df['structure_type'].astype(str).str.lower().str.strip()
99
+ le_structure_type = LabelEncoder()
100
+ features_df['structure_type'] = le_structure_type.fit_transform(features_df['structure_type'])
101
+ joblib.dump(le_structure_type, os.path.join(MODEL_OUTPUT_DIR, 'label_encoder_structure_type.pkl'))
102
+ print("LabelEncoder for 'structure_type' fitted and saved.")
103
  else:
104
+ print("structure_type column not found or not categorical, skipping LabelEncoder.")
105
 
106
  if NUMERICAL_FEATURES:
107
+ scaler = StandardScaler()
108
+ features_df[NUMERICAL_FEATURES] = scaler.fit_transform(features_df[NUMERICAL_FEATURES])
109
+ joblib.dump(scaler, os.path.join(MODEL_OUTPUT_DIR, 'scaler.pkl'))
110
+ print("StandardScaler fitted and saved.")
111
  else:
112
+ print("No numerical columns to scale.")
113
 
114
  #You can see that, I've used joblib.dump to create a separate directory for each imputer and encoder made
115