Spaces:
Sleeping
Sleeping
File size: 4,711 Bytes
a21e473 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | import sys
import os
import numpy as np
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.pipeline import Pipeline
from src.constant.training_pipeline import TARGET_COLUMN
from src.constant.training_pipeline import DATA_TRANSFORMATION_IMPUTER_PARAMS
from src.entity.artifact_entity import (
DataTransformationArtifact,
DataValidationArtifact,
)
from src.exception.exception import NetworkSecurityException
from src.logging.logger import logging
from src.utils.main_utils.utils import save_np_array, save_object
from src.entity.config_entity import Data_transformation_config
class DataTransformation:
def __init__(self, data_validation_artifact: DataValidationArtifact, data_transformation_config: Data_transformation_config):
try:
self.data_validation_artifact:DataValidationArtifact = data_validation_artifact
self.data_transformation_config:Data_transformation_config = data_transformation_config
except Exception as e:
raise NetworkSecurityException(e, sys) from e
@staticmethod
def read_data(file_path) -> pd.DataFrame:
try:
return pd.read_csv(file_path)
except Exception as e:
raise NetworkSecurityException(e, sys) from e
def get_data_transformer_object(self) -> Pipeline:
"""
it initialises a KNNImputer object with the parameter specified in the training_pipeline.py file and returns
a pipeline with the KNNImputer object as the first step.
args:
cls: DataTransformation
Returns:
a pipeline object
"""
logging.info("Entered get_data_transformation_object methof of transformation class")
try:
knn_imputer = KNNImputer(**DATA_TRANSFORMATION_IMPUTER_PARAMS)
logging.info(f"intialise knn imputer with {DATA_TRANSFORMATION_IMPUTER_PARAMS}")
pipeline = Pipeline(steps=[("imputer", knn_imputer)])
return pipeline
except Exception as e:
raise NetworkSecurityException(e, sys) from e
def initiate_data_transformation(self)-> DataTransformationArtifact:
try:
logging.info("Started data transformation!")
train_df = DataTransformation.read_data(self.data_validation_artifact.valid_train_file_path)
test_df = DataTransformation.read_data(self.data_validation_artifact.valid_test_file_path)
# training dataframe
input_feature_train_df = train_df.drop(columns=[TARGET_COLUMN],axis = 1)
target_feature_train_df = train_df[TARGET_COLUMN]
target_feature_train_df = target_feature_train_df.replace(-1,0)
# testing dataframe
input_feature_test_df = test_df.drop(columns=[TARGET_COLUMN],axis = 1)
target_feature_test_df = test_df[TARGET_COLUMN]
target_feature_test_df = target_feature_test_df.replace(-1,0)
preprocessor = self.get_data_transformer_object()
preprocessor_obj = preprocessor.fit(input_feature_train_df)
logging.info("Preprocessor object created and fitted on training data")
transformed_input_train_feature = preprocessor_obj.transform(input_feature_train_df)
transformed_input_test_feature = preprocessor_obj.transform(input_feature_test_df)
# combining transformed input features with target feature
train_arr = np.c_[transformed_input_train_feature, np.array(target_feature_train_df)]
test_arr = np.c_[transformed_input_test_feature, np.array(target_feature_test_df)]
# save numpy array data
save_np_array(self.data_transformation_config.transformed_train_file_path, array=train_arr)
save_np_array(self.data_transformation_config.transformed_test_file_path,array = test_arr )
save_object(self.data_transformation_config.transformed_object_file_path,preprocessor_obj)
save_object("final_model/preprocessor.pkl", preprocessor_obj)
# preparing artifacts
Data_transformation_artifact = DataTransformationArtifact(
transformed_object_file_path=self.data_transformation_config.transformed_object_file_path,
transformed_train_file_path=self.data_transformation_config.transformed_train_file_path,
transformed_test_file_path=self.data_transformation_config.transformed_test_file_path,
)
return Data_transformation_artifact
except Exception as e:
raise NetworkSecurityException(e, sys) from e
|