| import pandas as pd | |
| from src.logger import logging | |
| from src.exception import FertilizerException | |
| from src.config import mongo_client | |
| import os | |
| import sys | |
| import numpy as np | |
| import yaml | |
| import dill | |
| def get_collection_as_dataframe( | |
| database_name: str, collection_name: str | |
| ) -> pd.DataFrame: | |
| """ | |
| Description: This function return collection as dataframe | |
| ========================================================= | |
| Params: | |
| database_name: database name | |
| collection_name: collection name | |
| ========================================================= | |
| return Pandas dataframe of a collection | |
| """ | |
| try: | |
| logging.info( | |
| f"Reading data from database: {database_name} and collection: {collection_name}" | |
| ) | |
| df = pd.DataFrame(list(mongo_client[database_name][collection_name].find())) | |
| logging.info(f"{database_name} found in the mongodb") | |
| if "_id" in df.columns: | |
| logging.info("Dropping column: '_id'") | |
| df = df.drop(columns=["_id"], axis=1) | |
| logging.info(f"Row and columns in df: {df.shape}") | |
| return df | |
| except Exception as e: | |
| raise FertilizerException(e, sys) | |
| def seperate_dependant_column(df: pd.DataFrame, exclude_column: list) -> pd.DataFrame: | |
| final_dataframe = df.drop(exclude_column, axis=1) | |
| return final_dataframe | |
| def get_column_indices(numerical_features: list, categorical_features: list, base_file_path: str): | |
| dataset = pd.read_csv(base_file_path) | |
| numerical_feature_indices = [dataset.columns.get_loc(feature) for feature in numerical_features] | |
| categorical_feature_indices = [dataset.columns.get_loc(feature) for feature in categorical_features] | |
| return numerical_feature_indices, categorical_feature_indices | |
| def write_yaml_file(file_path, data: dict): | |
| try: | |
| file_dir = os.path.dirname(file_path) | |
| os.makedirs(file_dir, exist_ok=True) | |
| with open(file_path, "w") as file_writer: | |
| yaml.dump(data, file_writer) | |
| except Exception as e: | |
| raise FertilizerException(e, sys) | |
| def save_object(file_path: str, obj: object) -> None: | |
| try: | |
| logging.info("Entered the save object method of utils") | |
| os.makedirs(os.path.dirname(file_path), exist_ok=True) | |
| with open(file_path, "wb") as file_obj: | |
| dill.dump(obj, file_obj) | |
| logging.info("Exited the save object method of utils") | |
| except Exception as e: | |
| raise FertilizerException(e, sys) | |
| def load_object(file_path: str) -> object: | |
| try: | |
| if not os.path.exists(file_path): | |
| raise Exception(f"The file: {file_path} is not exists") | |
| with open(file_path, "rb") as file_obj: | |
| return dill.load(file_obj) | |
| except Exception as e: | |
| raise FertilizerException(e, sys) | |
| def save_numpy_array_data(file_path: str, array: np.array): | |
| """ | |
| save numpy array data to file | |
| file_path : str location of the file to save | |
| array: np.array data to save | |
| """ | |
| try: | |
| dir_path = os.path.dirname(file_path) | |
| os.makedirs(dir_path, exist_ok=True) | |
| with open(file_path, "wb") as file_ojb: | |
| np.save(file_obj, array) | |
| except Exception as e: | |
| raise FertilizerException(e, sys) | |
| def load_numpy_array_data(file_path: str) -> np.array: | |
| """ | |
| load numpy array data from file | |
| file_path: str location of file to load | |
| return: np.array data loaded | |
| """ | |
| try: | |
| with open(file_path, "rb") as file_obj: | |
| return np.load(file_obj, allow_pickle=True) | |
| except Exception as e: | |
| raise CropException(e, sys) | |