Upload 15 files
Browse files- src/__pycache__/exception.cpython-312.pyc +0 -0
- src/__pycache__/utils.cpython-312.pyc +0 -0
- src/_init_.py +0 -0
- src/components/_init_.py +0 -0
- src/components/data_ingestion.py +80 -0
- src/components/data_transformation.py +138 -0
- src/components/model_trainer.py +119 -0
- src/exception.py +19 -0
- src/logger.py +25 -0
- src/pipeline/__init__.py +0 -0
- src/pipeline/__pycache__/__init__.cpython-312.pyc +0 -0
- src/pipeline/__pycache__/predict_pipeline.cpython-312.pyc +0 -0
- src/pipeline/predict_pipeline.py +68 -0
- src/pipeline/train_pipeline.py +0 -0
- src/utils.py +62 -0
src/__pycache__/exception.cpython-312.pyc
ADDED
|
Binary file (1.6 kB). View file
|
|
|
src/__pycache__/utils.cpython-312.pyc
ADDED
|
Binary file (2.82 kB). View file
|
|
|
src/_init_.py
ADDED
|
File without changes
|
src/components/_init_.py
ADDED
|
File without changes
|
src/components/data_ingestion.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Import necessary libraries and modules
|
| 2 |
+
import os # For file and directory operations
|
| 3 |
+
import sys # For system-specific parameters and functions
|
| 4 |
+
from ..exception import CustomException
|
| 5 |
+
|
| 6 |
+
from src.logger import logging # Adjusted to absolute import
|
| 7 |
+
|
| 8 |
+
import pandas as pd # For data manipulation and analysis
|
| 9 |
+
|
| 10 |
+
from sklearn.model_selection import train_test_split # For splitting data into training and testing sets
|
| 11 |
+
from dataclasses import dataclass # For creating data classes
|
| 12 |
+
|
| 13 |
+
#Import components for data transformation and model training
|
| 14 |
+
from src.components.data_transformation import DataTransformation # Data transformation class
|
| 15 |
+
from src.components.data_transformation import DataTransformationConfig # Configuration for data transformation
|
| 16 |
+
from src.components.model_trainer import ModelTrainerConfig # Configuration for model training
|
| 17 |
+
from src.components.model_trainer import ModelTrainer # Model training class
|
| 18 |
+
|
| 19 |
+
# Define a data class for Data Ingestion Configuration
|
| 20 |
+
@dataclass
|
| 21 |
+
class DataIngestionConfig:
|
| 22 |
+
# Specify file paths for training, testing, and raw data
|
| 23 |
+
train_data_path: str = os.path.join('artifacts', "train.csv")
|
| 24 |
+
test_data_path: str = os.path.join('artifacts', "test.csv")
|
| 25 |
+
raw_data_path: str = os.path.join('artifacts', "data.csv")
|
| 26 |
+
|
| 27 |
+
# Data Ingestion Class Definition
|
| 28 |
+
class DataIngestion:
|
| 29 |
+
def __init__(self):
|
| 30 |
+
# Initialize ingestion configuration
|
| 31 |
+
self.ingestion_config = DataIngestionConfig()
|
| 32 |
+
|
| 33 |
+
# Method to initiate data ingestion
|
| 34 |
+
def initiate_data_ingestion(self):
|
| 35 |
+
logging.info("Entered the data ingestion method or component")
|
| 36 |
+
try:
|
| 37 |
+
# Read the dataset from the specified path
|
| 38 |
+
df = pd.read_csv('notebook/data/stud.csv') # Update path based on your directory structure
|
| 39 |
+
logging.info('Read the dataset as dataframe')
|
| 40 |
+
|
| 41 |
+
# Create necessary directories if they do not exist
|
| 42 |
+
os.makedirs(os.path.dirname(self.ingestion_config.train_data_path), exist_ok=True)
|
| 43 |
+
|
| 44 |
+
# Save the raw data to CSV
|
| 45 |
+
df.to_csv(self.ingestion_config.raw_data_path, index=False, header=True)
|
| 46 |
+
|
| 47 |
+
logging.info("Train test split initiated")
|
| 48 |
+
# Split the data into training and testing sets
|
| 49 |
+
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
|
| 50 |
+
|
| 51 |
+
# Save the training and testing sets to CSV
|
| 52 |
+
train_set.to_csv(self.ingestion_config.train_data_path, index=False, header=True)
|
| 53 |
+
test_set.to_csv(self.ingestion_config.test_data_path, index=False, header=True)
|
| 54 |
+
|
| 55 |
+
logging.info("Ingestion of the data is completed")
|
| 56 |
+
|
| 57 |
+
# Return the paths of the train and test data
|
| 58 |
+
return (
|
| 59 |
+
self.ingestion_config.train_data_path,
|
| 60 |
+
self.ingestion_config.test_data_path
|
| 61 |
+
)
|
| 62 |
+
except Exception as e:
|
| 63 |
+
raise CustomException(e, sys) # Raise custom exception on error
|
| 64 |
+
|
| 65 |
+
# Main block to execute the data ingestion process
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
# Create an instance of DataIngestion
|
| 68 |
+
obj = DataIngestion()
|
| 69 |
+
# Initiate data ingestion and get train and test data paths
|
| 70 |
+
train_data, test_data = obj.initiate_data_ingestion()
|
| 71 |
+
|
| 72 |
+
# Create an instance of DataTransformation
|
| 73 |
+
data_transformation = DataTransformation()
|
| 74 |
+
# Perform data transformation and obtain training and testing arrays
|
| 75 |
+
train_arr, test_arr, _ = data_transformation.initiate_data_transformation(train_data, test_data)
|
| 76 |
+
|
| 77 |
+
# Create an instance of ModelTrainer
|
| 78 |
+
modeltrainer = ModelTrainer()
|
| 79 |
+
# Train the model and print the result
|
| 80 |
+
print(modeltrainer.initiate_model_trainer(train_arr, test_arr))
|
src/components/data_transformation.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
|
| 4 |
+
# Importing necessary libraries to handle data manipulation and machine learning.
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from sklearn.compose import ColumnTransformer
|
| 8 |
+
from sklearn.impute import SimpleImputer
|
| 9 |
+
from sklearn.pipeline import Pipeline
|
| 10 |
+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
| 11 |
+
|
| 12 |
+
# Custom modules for handling exceptions and logging messages.
|
| 13 |
+
from src.exception import CustomException
|
| 14 |
+
from src.logger import logging
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
+
# Utility function to save files (used later).
|
| 19 |
+
from src.utils import save_object
|
| 20 |
+
|
| 21 |
+
# This class holds configuration details, such as where to save the preprocessor.
|
| 22 |
+
@dataclass
|
| 23 |
+
class DataTransformationConfig:
|
| 24 |
+
preprocessor_obj_file_path = os.path.join('artifacts', "preprocessor.pkl")
|
| 25 |
+
|
| 26 |
+
# Main class responsible for data transformation.
|
| 27 |
+
class DataTransformation:
|
| 28 |
+
def __init__(self):
|
| 29 |
+
# Setting up the configuration for data transformation, like where to save the preprocessor file.
|
| 30 |
+
self.data_transformation_config = DataTransformationConfig()
|
| 31 |
+
|
| 32 |
+
# This function prepares and returns an object that will handle all the transformations.
|
| 33 |
+
def get_data_transformation_object(self):
|
| 34 |
+
'''
|
| 35 |
+
Function to create a preprocessing object that transforms data (both numerical and categorical).
|
| 36 |
+
'''
|
| 37 |
+
try:
|
| 38 |
+
# Specifying which columns contain numbers and which contain categories (text).
|
| 39 |
+
numerical_columns = ["writing_score", "reading_score"]
|
| 40 |
+
categorical_columns = [
|
| 41 |
+
"gender",
|
| 42 |
+
"race_ethnicity",
|
| 43 |
+
"parental_level_of_education",
|
| 44 |
+
"lunch",
|
| 45 |
+
"test_preparation_course",
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
# Creating a pipeline to handle numerical columns:
|
| 49 |
+
# 1. Filling in missing values with the median.
|
| 50 |
+
# 2. Standardizing the data (scaling it to have similar ranges).
|
| 51 |
+
num_pipeline = Pipeline(
|
| 52 |
+
steps=[
|
| 53 |
+
("imputer", SimpleImputer(strategy="median")),
|
| 54 |
+
("scaler", StandardScaler())
|
| 55 |
+
]
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Creating a pipeline to handle categorical columns:
|
| 59 |
+
# 1. Filling in missing values with the most frequent category.
|
| 60 |
+
# 2. Converting text categories into numbers (OneHotEncoder).
|
| 61 |
+
# 3. Scaling the data but not adjusting the mean.
|
| 62 |
+
cat_pipeline = Pipeline(
|
| 63 |
+
steps=[
|
| 64 |
+
("imputer", SimpleImputer(strategy="most_frequent")),
|
| 65 |
+
("one_hot_encoder", OneHotEncoder()),
|
| 66 |
+
("scaler", StandardScaler(with_mean=False))
|
| 67 |
+
]
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Logging information about the columns.
|
| 71 |
+
logging.info(f"Categorical columns: {categorical_columns}")
|
| 72 |
+
logging.info(f"Numerical columns: {numerical_columns}")
|
| 73 |
+
|
| 74 |
+
# Combining the two pipelines (for numbers and categories) into one processor.
|
| 75 |
+
preprocessor = ColumnTransformer(
|
| 76 |
+
[
|
| 77 |
+
("num_pipeline", num_pipeline, numerical_columns),
|
| 78 |
+
("cat_pipeline", cat_pipeline, categorical_columns)
|
| 79 |
+
]
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# Returning the combined processor object.
|
| 83 |
+
return preprocessor
|
| 84 |
+
|
| 85 |
+
# If something goes wrong, this block will raise a custom error.
|
| 86 |
+
except Exception as e:
|
| 87 |
+
raise CustomException(e, sys)
|
| 88 |
+
|
| 89 |
+
# This function initiates the transformation process using the created processor.
|
| 90 |
+
def initiate_data_transformation(self, train_path, test_path):
|
| 91 |
+
try:
|
| 92 |
+
# Loading training and testing data from CSV files.
|
| 93 |
+
train_df = pd.read_csv(train_path)
|
| 94 |
+
test_df = pd.read_csv(test_path)
|
| 95 |
+
|
| 96 |
+
logging.info("Reading train and test data completed.")
|
| 97 |
+
|
| 98 |
+
# Getting the preprocessing object created earlier.
|
| 99 |
+
logging.info("Obtaining preprocessing object.")
|
| 100 |
+
preprocessing_obj = self.get_data_transformation_object()
|
| 101 |
+
|
| 102 |
+
# Defining the target column (the one we want to predict) and numerical columns.
|
| 103 |
+
target_column_name = "math_score"
|
| 104 |
+
numerical_columns = ["writing_score", "reading_score"]
|
| 105 |
+
|
| 106 |
+
# Separating the input features (all columns except the target) and the target (math scores).
|
| 107 |
+
input_feature_train_df = train_df.drop(columns=[target_column_name], axis=1)
|
| 108 |
+
target_feature_train_df = train_df[target_column_name]
|
| 109 |
+
|
| 110 |
+
input_feature_test_df = test_df.drop(columns=[target_column_name], axis=1)
|
| 111 |
+
target_feature_test_df = test_df[target_column_name]
|
| 112 |
+
|
| 113 |
+
logging.info("Applying preprocessing object on training and testing data.")
|
| 114 |
+
|
| 115 |
+
# Applying the preprocessing steps (like scaling and encoding) to the training and test data.
|
| 116 |
+
input_feature_train_arr = preprocessing_obj.fit_transform(input_feature_train_df)
|
| 117 |
+
input_feature_test_arr = preprocessing_obj.transform(input_feature_test_df)
|
| 118 |
+
|
| 119 |
+
# Combining the transformed input features and the target column for both training and testing.
|
| 120 |
+
train_arr = np.c_[input_feature_train_arr, np.array(target_feature_train_df)]
|
| 121 |
+
test_arr = np.c_[input_feature_test_arr, np.array(target_feature_test_df)]
|
| 122 |
+
|
| 123 |
+
logging.info("Saved preprocessing object.")
|
| 124 |
+
|
| 125 |
+
# Saving the preprocessing object for future use (so it doesn't have to be recreated).
|
| 126 |
+
save_object(
|
| 127 |
+
file_path = self.data_transformation_config.preprocessor_obj_file_path,
|
| 128 |
+
obj = preprocessing_obj
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# Returning the transformed data and the path where the processor was saved.
|
| 132 |
+
return (
|
| 133 |
+
train_arr,
|
| 134 |
+
test_arr,
|
| 135 |
+
self.data_transformation_config.preprocessor_obj_file_path,
|
| 136 |
+
)
|
| 137 |
+
except Exception as e:
|
| 138 |
+
raise CustomException(e, sys)
|
src/components/model_trainer.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
from catboost import CatBoostRegressor
|
| 6 |
+
from sklearn.ensemble import (
|
| 7 |
+
AdaBoostRegressor,
|
| 8 |
+
GradientBoostingRegressor,
|
| 9 |
+
RandomForestRegressor,
|
| 10 |
+
)
|
| 11 |
+
from sklearn.linear_model import LinearRegression
|
| 12 |
+
from sklearn.metrics import r2_score
|
| 13 |
+
from sklearn.neighbors import KNeighborsRegressor
|
| 14 |
+
from sklearn.tree import DecisionTreeRegressor
|
| 15 |
+
from xgboost import XGBRegressor
|
| 16 |
+
|
| 17 |
+
from src.exception import CustomException
|
| 18 |
+
from src.logger import logging
|
| 19 |
+
|
| 20 |
+
from src.utils import save_object,evaluate_models
|
| 21 |
+
|
| 22 |
+
@dataclass
|
| 23 |
+
class ModelTrainerConfig:
|
| 24 |
+
trained_model_file_path=os.path.join("artifacts","model.pkl")
|
| 25 |
+
|
| 26 |
+
class ModelTrainer:
|
| 27 |
+
def __init__(self):
|
| 28 |
+
self.model_trainer_config=ModelTrainerConfig()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def initiate_model_trainer(self,train_array,test_array):
|
| 32 |
+
try:
|
| 33 |
+
logging.info("Split training and test input data")
|
| 34 |
+
X_train,y_train,X_test,y_test=(
|
| 35 |
+
train_array[:,:-1],
|
| 36 |
+
train_array[:,-1],
|
| 37 |
+
test_array[:,:-1],
|
| 38 |
+
test_array[:,-1]
|
| 39 |
+
)
|
| 40 |
+
models = {
|
| 41 |
+
"Random Forest": RandomForestRegressor(),
|
| 42 |
+
"Decision Tree": DecisionTreeRegressor(),
|
| 43 |
+
"Gradient Boosting": GradientBoostingRegressor(),
|
| 44 |
+
"Linear Regression": LinearRegression(),
|
| 45 |
+
"XGBRegressor": XGBRegressor(),
|
| 46 |
+
"CatBoosting Regressor": CatBoostRegressor(verbose=False),
|
| 47 |
+
"AdaBoost Regressor": AdaBoostRegressor(),
|
| 48 |
+
}
|
| 49 |
+
params={
|
| 50 |
+
"Decision Tree": {
|
| 51 |
+
'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
|
| 52 |
+
# 'splitter':['best','random'],
|
| 53 |
+
# 'max_features':['sqrt','log2'],
|
| 54 |
+
},
|
| 55 |
+
"Random Forest":{
|
| 56 |
+
# 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
|
| 57 |
+
|
| 58 |
+
# 'max_features':['sqrt','log2',None],
|
| 59 |
+
'n_estimators': [8,16,32,64,128,256]
|
| 60 |
+
},
|
| 61 |
+
"Gradient Boosting":{
|
| 62 |
+
# 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'],
|
| 63 |
+
'learning_rate':[.1,.01,.05,.001],
|
| 64 |
+
'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
|
| 65 |
+
# 'criterion':['squared_error', 'friedman_mse'],
|
| 66 |
+
# 'max_features':['auto','sqrt','log2'],
|
| 67 |
+
'n_estimators': [8,16,32,64,128,256]
|
| 68 |
+
},
|
| 69 |
+
"Linear Regression":{},
|
| 70 |
+
"XGBRegressor":{
|
| 71 |
+
'learning_rate':[.1,.01,.05,.001],
|
| 72 |
+
'n_estimators': [8,16,32,64,128,256]
|
| 73 |
+
},
|
| 74 |
+
"CatBoosting Regressor":{
|
| 75 |
+
'depth': [6,8,10],
|
| 76 |
+
'learning_rate': [0.01, 0.05, 0.1],
|
| 77 |
+
'iterations': [30, 50, 100]
|
| 78 |
+
},
|
| 79 |
+
"AdaBoost Regressor":{
|
| 80 |
+
'learning_rate':[.1,.01,0.5,.001],
|
| 81 |
+
# 'loss':['linear','square','exponential'],
|
| 82 |
+
'n_estimators': [8,16,32,64,128,256]
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
model_report:dict=evaluate_models(X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test,
|
| 88 |
+
models=models,param=params)
|
| 89 |
+
|
| 90 |
+
## To get best model score from dict
|
| 91 |
+
best_model_score = max(sorted(model_report.values()))
|
| 92 |
+
|
| 93 |
+
## To get best model name from dict
|
| 94 |
+
|
| 95 |
+
best_model_name = list(model_report.keys())[
|
| 96 |
+
list(model_report.values()).index(best_model_score)
|
| 97 |
+
]
|
| 98 |
+
best_model = models[best_model_name]
|
| 99 |
+
|
| 100 |
+
if best_model_score<0.6:
|
| 101 |
+
raise CustomException("No best model found")
|
| 102 |
+
logging.info(f"Best found model on both training and testing dataset")
|
| 103 |
+
|
| 104 |
+
save_object(
|
| 105 |
+
file_path=self.model_trainer_config.trained_model_file_path,
|
| 106 |
+
obj=best_model
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
predicted=best_model.predict(X_test)
|
| 110 |
+
|
| 111 |
+
r2_square = r2_score(y_test, predicted)
|
| 112 |
+
return r2_square
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
raise CustomException(e,sys)
|
src/exception.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# src/exception.py
|
| 2 |
+
import sys
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
class CustomException(Exception):
|
| 6 |
+
def __init__(self, error_message: Exception, error_detail: sys):
|
| 7 |
+
super().__init__(error_message)
|
| 8 |
+
self.error_message = CustomException.get_detailed_error_message(error_message, error_detail)
|
| 9 |
+
|
| 10 |
+
@staticmethod
|
| 11 |
+
def get_detailed_error_message(error: Exception, error_detail: sys):
|
| 12 |
+
_, _, exc_tb = error_detail.exc_info()
|
| 13 |
+
file_name = exc_tb.tb_frame.f_code.co_filename
|
| 14 |
+
line_number = exc_tb.tb_lineno
|
| 15 |
+
error_message = f"Error occurred in script: {file_name} at line {line_number}: {str(error)}"
|
| 16 |
+
return error_message
|
| 17 |
+
|
| 18 |
+
def __str__(self):
|
| 19 |
+
return self.error_message
|
src/logger.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging # Import the logging module to enable logging messages
|
| 2 |
+
import os # Import the os module to interact with the operating system
|
| 3 |
+
from datetime import datetime # Import datetime to work with date and time
|
| 4 |
+
|
| 5 |
+
# Generate a log file name with the current timestamp (format: MM_DD_YYYY_HH_MM_SS.log)
|
| 6 |
+
LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
|
| 7 |
+
|
| 8 |
+
# Create a path for the "logs" folder in the current working directory
|
| 9 |
+
logs_dir = os.path.join(os.getcwd(), "logs")
|
| 10 |
+
|
| 11 |
+
# Create the "logs" directory if it doesn't exist already (exist_ok=True avoids errors if it exists)
|
| 12 |
+
os.makedirs(logs_dir, exist_ok=True)
|
| 13 |
+
|
| 14 |
+
# Define the full path where the log file will be saved
|
| 15 |
+
LOG_FILE_PATH = os.path.join(logs_dir, LOG_FILE)
|
| 16 |
+
|
| 17 |
+
# Configure the logging settings
|
| 18 |
+
logging.basicConfig(
|
| 19 |
+
filename=LOG_FILE_PATH, # Set the log file path
|
| 20 |
+
format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s", # Define the log message format
|
| 21 |
+
level=logging.INFO, # Set the logging level to INFO (log INFO and above)
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Log a message to verify that logging is working
|
| 25 |
+
logging.info("Logging has been configured successfully.")
|
src/pipeline/__init__.py
ADDED
|
File without changes
|
src/pipeline/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (190 Bytes). View file
|
|
|
src/pipeline/__pycache__/predict_pipeline.cpython-312.pyc
ADDED
|
Binary file (2.94 kB). View file
|
|
|
src/pipeline/predict_pipeline.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from src.exception import CustomException
|
| 4 |
+
from src.utils import load_object
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class PredictPipeline:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
pass
|
| 11 |
+
|
| 12 |
+
def predict(self,features):
|
| 13 |
+
try:
|
| 14 |
+
model_path=os.path.join("artifacts","model.pkl")
|
| 15 |
+
preprocessor_path=os.path.join('artifacts','preprocessor.pkl')
|
| 16 |
+
print("Before Loading")
|
| 17 |
+
model=load_object(file_path=model_path)
|
| 18 |
+
preprocessor=load_object(file_path=preprocessor_path)
|
| 19 |
+
print("After Loading")
|
| 20 |
+
data_scaled=preprocessor.transform(features)
|
| 21 |
+
preds=model.predict(data_scaled)
|
| 22 |
+
return preds
|
| 23 |
+
|
| 24 |
+
except Exception as e:
|
| 25 |
+
raise CustomException(e,sys)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class CustomData:
|
| 30 |
+
def __init__( self,
|
| 31 |
+
gender: str,
|
| 32 |
+
race_ethnicity: str,
|
| 33 |
+
parental_level_of_education,
|
| 34 |
+
lunch: str,
|
| 35 |
+
test_preparation_course: str,
|
| 36 |
+
reading_score: int,
|
| 37 |
+
writing_score: int):
|
| 38 |
+
|
| 39 |
+
self.gender = gender
|
| 40 |
+
|
| 41 |
+
self.race_ethnicity = race_ethnicity
|
| 42 |
+
|
| 43 |
+
self.parental_level_of_education = parental_level_of_education
|
| 44 |
+
|
| 45 |
+
self.lunch = lunch
|
| 46 |
+
|
| 47 |
+
self.test_preparation_course = test_preparation_course
|
| 48 |
+
|
| 49 |
+
self.reading_score = reading_score
|
| 50 |
+
|
| 51 |
+
self.writing_score = writing_score
|
| 52 |
+
|
| 53 |
+
def get_data_as_data_frame(self):
|
| 54 |
+
try:
|
| 55 |
+
custom_data_input_dict = {
|
| 56 |
+
"gender": [self.gender],
|
| 57 |
+
"race_ethnicity": [self.race_ethnicity],
|
| 58 |
+
"parental_level_of_education": [self.parental_level_of_education],
|
| 59 |
+
"lunch": [self.lunch],
|
| 60 |
+
"test_preparation_course": [self.test_preparation_course],
|
| 61 |
+
"reading_score": [self.reading_score],
|
| 62 |
+
"writing_score": [self.writing_score],
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
return pd.DataFrame(custom_data_input_dict)
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
raise CustomException(e, sys)
|
src/pipeline/train_pipeline.py
ADDED
|
File without changes
|
src/utils.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import dill
|
| 7 |
+
import pickle
|
| 8 |
+
from sklearn.metrics import r2_score
|
| 9 |
+
from sklearn.model_selection import GridSearchCV
|
| 10 |
+
|
| 11 |
+
from src.exception import CustomException
|
| 12 |
+
|
| 13 |
+
def save_object(file_path, obj):
|
| 14 |
+
try:
|
| 15 |
+
dir_path = os.path.dirname(file_path)
|
| 16 |
+
|
| 17 |
+
os.makedirs(dir_path, exist_ok=True)
|
| 18 |
+
|
| 19 |
+
with open(file_path, "wb") as file_obj:
|
| 20 |
+
pickle.dump(obj, file_obj)
|
| 21 |
+
|
| 22 |
+
except Exception as e:
|
| 23 |
+
raise CustomException(e, sys)
|
| 24 |
+
|
| 25 |
+
def evaluate_models(X_train, y_train,X_test,y_test,models,param):
|
| 26 |
+
try:
|
| 27 |
+
report = {}
|
| 28 |
+
|
| 29 |
+
for i in range(len(list(models))):
|
| 30 |
+
model = list(models.values())[i]
|
| 31 |
+
para=param[list(models.keys())[i]]
|
| 32 |
+
|
| 33 |
+
gs = GridSearchCV(model,para,cv=3)
|
| 34 |
+
gs.fit(X_train,y_train)
|
| 35 |
+
|
| 36 |
+
model.set_params(**gs.best_params_)
|
| 37 |
+
model.fit(X_train,y_train)
|
| 38 |
+
|
| 39 |
+
#model.fit(X_train, y_train) # Train model
|
| 40 |
+
|
| 41 |
+
y_train_pred = model.predict(X_train)
|
| 42 |
+
|
| 43 |
+
y_test_pred = model.predict(X_test)
|
| 44 |
+
|
| 45 |
+
train_model_score = r2_score(y_train, y_train_pred)
|
| 46 |
+
|
| 47 |
+
test_model_score = r2_score(y_test, y_test_pred)
|
| 48 |
+
|
| 49 |
+
report[list(models.keys())[i]] = test_model_score
|
| 50 |
+
|
| 51 |
+
return report
|
| 52 |
+
|
| 53 |
+
except Exception as e:
|
| 54 |
+
raise CustomException(e, sys)
|
| 55 |
+
|
| 56 |
+
def load_object(file_path):
|
| 57 |
+
try:
|
| 58 |
+
with open(file_path, "rb") as file_obj:
|
| 59 |
+
return pickle.load(file_obj)
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
raise CustomException(e, sys)
|