Spaces:
Runtime error
Runtime error
| from networksecurity.exception.exception import NetworkSecurityException | |
| from networksecurity.logging.logger import logging | |
| import os | |
| import sys | |
| import numpy as np | |
| import pandas as pd | |
| from typing import List | |
| import pymongo | |
| from sklearn.model_selection import train_test_split | |
| ## Configuration of the data ingestion config | |
| from networksecurity.entity.config_entity import DataIngestionConfig | |
| from networksecurity.entity.artifact_entity import DataIngestionArtifact | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| MONGO_DB_URL = os.getenv("MONGO_DB_URL") | |
| class DataIngestion: | |
| def __init__(self, data_ingestion_config): | |
| try: | |
| self.data_ingestion_config = data_ingestion_config | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) | |
| def export_collection_as_dataframe(self): | |
| """ | |
| Read data from mongodb OR fallback to CSV if MongoDB unavailable | |
| """ | |
| try: | |
| # Original MongoDB code | |
| database_name = self.data_ingestion_config.database_name | |
| collection_name = self.data_ingestion_config.collection_name | |
| self.mongo_client = pymongo.MongoClient(MONGO_DB_URL) | |
| collection = self.mongo_client[database_name][collection_name] | |
| df = pd.DataFrame(list(collection.find())) | |
| if "_id" in df.columns.to_list(): | |
| df = df.drop(columns=["_id"], axis = 1) | |
| df.replace({"na":np.nan}, inplace=True) | |
| return df | |
| except Exception as e: | |
| # FALLBACK: Use your test.csv | |
| import logging | |
| logging.info(f"MongoDB unavailable, using sample CSV: {str(e)}") | |
| try: | |
| # Construct absolute path relative to this script file | |
| # Script is in networksecurity/components/data_ingestion.py | |
| # Root is 2 levels up from current_dir (components) | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| root_dir = os.path.dirname(os.path.dirname(current_dir)) | |
| csv_path = os.path.join(root_dir, "Network_data", "phisingData.csv") | |
| logging.info(f"MongoDB unavailable. Attempting to load CSV from: {csv_path}") | |
| df = pd.read_csv(csv_path) | |
| logging.info(f" Loaded {len(df)} rows from CSV") | |
| return df | |
| except FileNotFoundError: | |
| raise NetworkSecurityException(f"Sample CSV not found at {csv_path}. CWD: {os.getcwd()}", sys) | |
| def export_data_into_feature_store(self, dataframe: pd.DataFrame): | |
| try: | |
| feature_store_file_path = self.data_ingestion_config.feature_store_file_path | |
| ## Creating Folder | |
| dir_path = os.path.dirname(feature_store_file_path) | |
| os.makedirs(dir_path,exist_ok=True) | |
| dataframe.to_csv(feature_store_file_path, index=False, header=True) | |
| return dataframe | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) | |
| def split_data_as_train_test(self, dataframe: pd.DataFrame): | |
| try: | |
| train_set, test_set = train_test_split( | |
| dataframe, test_size=self.data_ingestion_config.train_test_split_ratio | |
| ) | |
| logging.info("Performed train test split on the dataframe") | |
| logging.info("Exited split_data_as_train_test method of Data_Ingestion class") | |
| dir_path = os.path.dirname(self.data_ingestion_config.training_file_path) | |
| os.makedirs(dir_path, exist_ok=True) | |
| logging.info(f"Exporting train and test file path") | |
| train_set.to_csv( | |
| self.data_ingestion_config.training_file_path, index=False, header=True | |
| ) | |
| test_set.to_csv( | |
| self.data_ingestion_config.testing_file_path, index=False, header=True | |
| ) | |
| logging.info(f"Exported train and test file path") | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) | |
| def initiate_data_ingestion(self): | |
| try: | |
| dataframe = self.export_collection_as_dataframe() | |
| dataframe = self.export_data_into_feature_store(dataframe) | |
| self.split_data_as_train_test(dataframe) | |
| dataingestionartifact=DataIngestionArtifact(training_file_path=self.data_ingestion_config.training_file_path, | |
| testing_file_path=self.data_ingestion_config.testing_file_path) | |
| return dataingestionartifact | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) |