Spaces:
Build error
Build error
| import sys | |
| from dataclasses import dataclass | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import OrdinalEncoder,StandardScaler | |
| from ..exception import CustomException | |
| from src.logger import logging | |
| import os | |
| from src.utils import save_object | |
| class DataTransformationConfig: | |
| preprocessor_obj_file_path = os.path.join('artifacts','preprocessor.pkl') | |
| class DataTransformation: | |
| def __init__(self): | |
| self.data_transformation_config = DataTransformationConfig() | |
| def get_data_transformation_object(self): | |
| ''' | |
| This function is responsible for data transformation | |
| ''' | |
| try: | |
| # Define which columns should be ordinal-encoded and which should be scaled | |
| categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope'] | |
| numerical_cols = ['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak'] | |
| # Define the custom ranking for each ordinal variable | |
| Sex_categories = ['M','F'] | |
| CP_categories = ['ATA','NAP','ASY','TA'] | |
| RE_categories = ['Normal','ST','LVH'] | |
| EA_categories =['N','Y'] | |
| SS_categories = ['Up','Flat','Down'] | |
| # Numerical Pipeline | |
| num_pipeline = Pipeline( | |
| steps = [ | |
| ('imputer',SimpleImputer(strategy='median')), | |
| ('scaler',StandardScaler()) | |
| ] | |
| ) | |
| # Categorical Pipeline | |
| cat_pipeline = Pipeline( | |
| steps=[ | |
| ('imputer',SimpleImputer(strategy='most_frequent')), | |
| ('ordinal_encoder',OrdinalEncoder(categories=[Sex_categories,CP_categories,RE_categories,EA_categories,SS_categories])), | |
| ('scaler',StandardScaler()) | |
| ] | |
| ) | |
| logging.info(f'Categorical Columns : {categorical_cols}') | |
| logging.info(f'Numerical Columns : {numerical_cols}') | |
| preprocessor = ColumnTransformer( | |
| [ | |
| ('num_pipeline',num_pipeline,numerical_cols), | |
| ('cat_pipeline',cat_pipeline,categorical_cols) | |
| ] | |
| ) | |
| return preprocessor | |
| except Exception as e: | |
| logging.info('Exception occured in Data Transformation Phase') | |
| raise CustomException(e,sys) | |
| def initate_data_transformation(self,train_path,test_path): | |
| try: | |
| # Reading train and test data | |
| train_df = pd.read_csv(train_path) | |
| test_df = pd.read_csv(test_path) | |
| logging.info('Read train and test data completed') | |
| logging.info(f'Train Dataframe Head : \n{train_df.head().to_string()}') | |
| logging.info(f'Test Dataframe Head : \n{test_df.head().to_string()}') | |
| logging.info('Obtaining preprocessing object') | |
| preprocessing_obj = self.get_data_transformation_object() | |
| target_column_name = 'HeartDisease' | |
| drop_columns = [target_column_name] | |
| input_feature_train_df = train_df.drop(columns=drop_columns,axis=1) | |
| target_feature_train_df=train_df[target_column_name] | |
| input_feature_test_df=test_df.drop(columns=drop_columns,axis=1) | |
| target_feature_test_df=test_df[target_column_name] | |
| logging.info("Applying preprocessing object on training and testing datasets.") | |
| input_feature_train_arr=preprocessing_obj.fit_transform(input_feature_train_df) | |
| input_feature_test_arr=preprocessing_obj.transform(input_feature_test_df) | |
| train_arr = np.c_[input_feature_train_arr, np.array(target_feature_train_df)] | |
| test_arr = np.c_[input_feature_test_arr, np.array(target_feature_test_df)] | |
| save_object( | |
| file_path=self.data_transformation_config.preprocessor_obj_file_path, | |
| obj=preprocessing_obj | |
| ) | |
| logging.info('Preprocessor pickle file saved') | |
| return ( | |
| train_arr, | |
| test_arr, | |
| self.data_transformation_config.preprocessor_obj_file_path, | |
| ) | |
| except Exception as e: | |
| logging.info('Exception occured in initiate_data_transformation function') | |
| raise CustomException(e,sys) |