Spaces:
Runtime error
Runtime error
Inder-26 commited on
Commit ·
f228efb
1
Parent(s): 42da6ea
Data Validation done
Browse files- data_schema/schema.yaml +66 -0
- main.py +9 -2
- networksecurity/components/data_validation.py +125 -0
- networksecurity/constant/training_pipeline/__init__.py +12 -0
- networksecurity/entity/artifact_entity.py +10 -1
- networksecurity/entity/config_entity.py +53 -1
- networksecurity/utils/main_utils/__init__.py +0 -0
- networksecurity/utils/main_utils/utils.py +33 -0
- requirements.txt +2 -1
data_schema/schema.yaml
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
columns:
|
| 2 |
+
- having_IP_Address: int64
|
| 3 |
+
- URL_Length: int64
|
| 4 |
+
- Shortining_Service: int64
|
| 5 |
+
- having_At_Symbol: int64
|
| 6 |
+
- double_slash_redirecting: int64
|
| 7 |
+
- Prefix_Suffix: int64
|
| 8 |
+
- having_Sub_Domain: int64
|
| 9 |
+
- SSLfinal_State: int64
|
| 10 |
+
- Domain_registeration_length: int64
|
| 11 |
+
- Favicon: int64
|
| 12 |
+
- port: int64
|
| 13 |
+
- HTTPS_token: int64
|
| 14 |
+
- Request_URL: int64
|
| 15 |
+
- URL_of_Anchor: int64
|
| 16 |
+
- Links_in_tags: int64
|
| 17 |
+
- SFH: int64
|
| 18 |
+
- Submitting_to_email: int64
|
| 19 |
+
- Abnormal_URL: int64
|
| 20 |
+
- Redirect: int64
|
| 21 |
+
- on_mouseover: int64
|
| 22 |
+
- RightClick: int64
|
| 23 |
+
- popUpWidnow: int64
|
| 24 |
+
- Iframe: int64
|
| 25 |
+
- age_of_domain: int64
|
| 26 |
+
- DNSRecord: int64
|
| 27 |
+
- web_traffic: int64
|
| 28 |
+
- Page_Rank: int64
|
| 29 |
+
- Google_Index: int64
|
| 30 |
+
- Links_pointing_to_page: int64
|
| 31 |
+
- Statistical_report: int64
|
| 32 |
+
- Result: int64
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
numerical_columns:
|
| 36 |
+
- having_IP_Address
|
| 37 |
+
- URL_Length
|
| 38 |
+
- Shortining_Service
|
| 39 |
+
- having_At_Symbol
|
| 40 |
+
- double_slash_redirecting
|
| 41 |
+
- Prefix_Suffix
|
| 42 |
+
- having_Sub_Domain
|
| 43 |
+
- SSLfinal_State
|
| 44 |
+
- Domain_registeration_length
|
| 45 |
+
- Favicon
|
| 46 |
+
- port
|
| 47 |
+
- HTTPS_token
|
| 48 |
+
- Request_URL
|
| 49 |
+
- URL_of_Anchor
|
| 50 |
+
- Links_in_tags
|
| 51 |
+
- SFH
|
| 52 |
+
- Submitting_to_email
|
| 53 |
+
- Abnormal_URL
|
| 54 |
+
- Redirect
|
| 55 |
+
- on_mouseover
|
| 56 |
+
- RightClick
|
| 57 |
+
- popUpWidnow
|
| 58 |
+
- Iframe
|
| 59 |
+
- age_of_domain
|
| 60 |
+
- DNSRecord
|
| 61 |
+
- web_traffic
|
| 62 |
+
- Page_Rank
|
| 63 |
+
- Google_Index
|
| 64 |
+
- Links_pointing_to_page
|
| 65 |
+
- Statistical_report
|
| 66 |
+
- Result
|
main.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
from networksecurity.components.data_ingestion import DataIngestion
|
|
|
|
| 2 |
from networksecurity.exception.exception import NetworkSecurityException
|
| 3 |
from networksecurity.logging.logger import logging
|
| 4 |
-
from networksecurity.entity.config_entity import DataIngestionConfig
|
| 5 |
from networksecurity.entity.config_entity import TraningPipelineConfig
|
| 6 |
import sys
|
| 7 |
|
|
@@ -13,6 +14,12 @@ if __name__ == "__main__":
|
|
| 13 |
logging.info("Initiate the data ingestion")
|
| 14 |
dataingestionartifact=dataingestion.initiate_data_ingestion()
|
| 15 |
print(dataingestionartifact)
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
except Exception as e:
|
| 18 |
raise NetworkSecurityException(e, sys)
|
|
|
|
| 1 |
from networksecurity.components.data_ingestion import DataIngestion
|
| 2 |
+
from networksecurity.components.data_validation import DataValidation
|
| 3 |
from networksecurity.exception.exception import NetworkSecurityException
|
| 4 |
from networksecurity.logging.logger import logging
|
| 5 |
+
from networksecurity.entity.config_entity import DataIngestionConfig,DataValidationConfig
|
| 6 |
from networksecurity.entity.config_entity import TraningPipelineConfig
|
| 7 |
import sys
|
| 8 |
|
|
|
|
| 14 |
logging.info("Initiate the data ingestion")
|
| 15 |
dataingestionartifact=dataingestion.initiate_data_ingestion()
|
| 16 |
print(dataingestionartifact)
|
| 17 |
+
data_validation_config=DataValidationConfig(traningpipelineconfig)
|
| 18 |
+
datavalidation=DataValidation(dataingestionartifact,data_validation_config)
|
| 19 |
+
logging.info("Initiate the data validation")
|
| 20 |
+
data_validation_artifact=datavalidation.initiate_data_validation()
|
| 21 |
+
logging.info(f"Data validation completed {data_validation_artifact}")
|
| 22 |
+
print(data_validation_artifact)
|
| 23 |
+
|
| 24 |
except Exception as e:
|
| 25 |
raise NetworkSecurityException(e, sys)
|
networksecurity/components/data_validation.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from networksecurity.entity.artifact_entity import DataIngestionArtifact, DataValidationArtifact
|
| 2 |
+
from networksecurity.entity.config_entity import DataValidationConfig
|
| 3 |
+
from networksecurity.exception.exception import NetworkSecurityException
|
| 4 |
+
from networksecurity.constant.training_pipeline import SCHEMA_FILE_PATH
|
| 5 |
+
from networksecurity.utils.main_utils.utils import read_yaml_file
|
| 6 |
+
from networksecurity.utils.main_utils.utils import write_yaml_file
|
| 7 |
+
from networksecurity.logging.logger import logging
|
| 8 |
+
from scipy.stats import ks_2samp
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import os,sys
|
| 11 |
+
|
| 12 |
+
class DataValidation:
|
| 13 |
+
def __init__(self, data_ingestion_artifact: DataIngestionArtifact,
|
| 14 |
+
data_validation_config: DataValidationConfig):
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
self.data_ingestion_artifact = data_ingestion_artifact
|
| 18 |
+
self.data_validation_config = data_validation_config
|
| 19 |
+
self._schema_config = read_yaml_file(SCHEMA_FILE_PATH)
|
| 20 |
+
except Exception as e:
|
| 21 |
+
raise NetworkSecurityException(e, sys)
|
| 22 |
+
|
| 23 |
+
@staticmethod
|
| 24 |
+
def read_data(file_path)->pd.DataFrame:
|
| 25 |
+
try:
|
| 26 |
+
return pd.read_csv(file_path)
|
| 27 |
+
except Exception as e:
|
| 28 |
+
raise NetworkSecurityException(e, sys)
|
| 29 |
+
|
| 30 |
+
def validate_number_of_columns(self, dataframe: pd.DataFrame)->bool:
|
| 31 |
+
try:
|
| 32 |
+
number_of_columns = len(self._schema_config)
|
| 33 |
+
logging.info(f"Required number of columns: {number_of_columns}")
|
| 34 |
+
logging.info(f"DataFrame columns count: {len(dataframe.columns)}")
|
| 35 |
+
|
| 36 |
+
if len(dataframe.columns)==number_of_columns:
|
| 37 |
+
return True
|
| 38 |
+
return False
|
| 39 |
+
|
| 40 |
+
except Exception as e:
|
| 41 |
+
raise NetworkSecurityException(e, sys)
|
| 42 |
+
|
| 43 |
+
def validate_number_of_numerical_columns(self, dataframe: pd.DataFrame) -> bool:
|
| 44 |
+
try:
|
| 45 |
+
# Required numerical columns from schema
|
| 46 |
+
required_numerical_columns = self._schema_config["numerical_columns"]
|
| 47 |
+
required_count = len(required_numerical_columns)
|
| 48 |
+
|
| 49 |
+
# Actual numerical columns in dataframe
|
| 50 |
+
numerical_df = dataframe.select_dtypes(include=["int64"])
|
| 51 |
+
actual_count = len(numerical_df.columns)
|
| 52 |
+
|
| 53 |
+
logging.info(f"Required number of numerical columns: {required_count}")
|
| 54 |
+
logging.info(f"Dataframe numerical columns count: {actual_count}")
|
| 55 |
+
|
| 56 |
+
if actual_count == required_count:
|
| 57 |
+
return True
|
| 58 |
+
return False
|
| 59 |
+
|
| 60 |
+
except Exception as e:
|
| 61 |
+
raise NetworkSecurityException(e, sys)
|
| 62 |
+
|
| 63 |
+
def detect_dataset_drift(self, base_df, current_df, threshold=0.01)-> bool:
|
| 64 |
+
try:
|
| 65 |
+
status = True
|
| 66 |
+
report={}
|
| 67 |
+
for column in base_df.columns:
|
| 68 |
+
d1 = base_df[column]
|
| 69 |
+
d2 = current_df[column]
|
| 70 |
+
is_same_dist=ks_2samp(d1, d2)
|
| 71 |
+
if threshold <=is_same_dist.pvalue:
|
| 72 |
+
is_found = False
|
| 73 |
+
else:
|
| 74 |
+
is_found = True
|
| 75 |
+
status = False
|
| 76 |
+
report.update({column:{
|
| 77 |
+
"p_value":float(is_same_dist.pvalue),
|
| 78 |
+
"drift_status":is_found
|
| 79 |
+
}})
|
| 80 |
+
drift_report_file_path = self.data_validation_config.drift_report_file_path
|
| 81 |
+
dir_path = os.path.dirname(drift_report_file_path)
|
| 82 |
+
os.makedirs(dir_path, exist_ok=True)
|
| 83 |
+
write_yaml_file(file_path=drift_report_file_path, content=report)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
except Exception as e:
|
| 87 |
+
raise NetworkSecurityException(e, sys)
|
| 88 |
+
|
| 89 |
+
def initiate_data_validation(self)-> DataValidationArtifact:
|
| 90 |
+
try:
|
| 91 |
+
train_file_path = self.data_ingestion_artifact.training_file_path
|
| 92 |
+
test_file_path = self.data_ingestion_artifact.testing_file_path
|
| 93 |
+
|
| 94 |
+
## Read data from train and test
|
| 95 |
+
train_dataframe = DataValidation.read_data(train_file_path)
|
| 96 |
+
test_dataframe = DataValidation.read_data(test_file_path)
|
| 97 |
+
|
| 98 |
+
## Validate number of columns
|
| 99 |
+
status = self.validate_number_of_columns(dataframe=train_dataframe)
|
| 100 |
+
if not status:
|
| 101 |
+
error_message = f"Train dataframe does not contain all columns \n"
|
| 102 |
+
status = self.validate_number_of_columns(dataframe=test_dataframe)
|
| 103 |
+
if not status:
|
| 104 |
+
error_message = f"Test dataframe does not contain all columns \n"
|
| 105 |
+
|
| 106 |
+
## Check datadrift
|
| 107 |
+
status=self.detect_dataset_drift(base_df=train_dataframe, current_df=test_dataframe)
|
| 108 |
+
dir_path = os.path.dirname(self.data_validation_config.valid_train_file_path)
|
| 109 |
+
os.makedirs(dir_path, exist_ok=True)
|
| 110 |
+
|
| 111 |
+
train_dataframe.to_csv(self.data_validation_config.valid_train_file_path, index=False, header=True)
|
| 112 |
+
test_dataframe.to_csv(self.data_validation_config.valid_test_file_path, index=False, header=True)
|
| 113 |
+
|
| 114 |
+
data_validation_artifact = DataValidationArtifact(
|
| 115 |
+
validation_status=status,
|
| 116 |
+
valid_train_file_path=self.data_validation_config.valid_train_file_path,
|
| 117 |
+
valid_test_file_path=self.data_validation_config.valid_test_file_path,
|
| 118 |
+
invalid_train_file_path=None,
|
| 119 |
+
invalid_test_file_path=None,
|
| 120 |
+
drift_report_file_path=self.data_validation_config.drift_report_file_path
|
| 121 |
+
)
|
| 122 |
+
return data_validation_artifact
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
raise NetworkSecurityException(e, sys)
|
networksecurity/constant/training_pipeline/__init__.py
CHANGED
|
@@ -14,6 +14,8 @@ FILE_NAME: str = "phisingkData.csv"
|
|
| 14 |
TRAIN_FILE_NAME: str = "train.csv"
|
| 15 |
TEST_FILE_NAME: str = "test.csv"
|
| 16 |
|
|
|
|
|
|
|
| 17 |
"""
|
| 18 |
Data Ingestion realted constant start with DATA_INGESTION VAR NAME
|
| 19 |
"""
|
|
@@ -24,3 +26,13 @@ DATA_INGESTION_DIR_NAME: str = "data_ingestion"
|
|
| 24 |
DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
|
| 25 |
DATA_INGESTION_INGESTED_DIR: str = "ingested"
|
| 26 |
DATA_INGESTION_TRAIN_TEST_SPLIT_RATION: float = 0.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
TRAIN_FILE_NAME: str = "train.csv"
|
| 15 |
TEST_FILE_NAME: str = "test.csv"
|
| 16 |
|
| 17 |
+
SCHEMA_FILE_PATH = os.path.join("data_schema", "schema.yaml")
|
| 18 |
+
|
| 19 |
"""
|
| 20 |
Data Ingestion realted constant start with DATA_INGESTION VAR NAME
|
| 21 |
"""
|
|
|
|
| 26 |
DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
|
| 27 |
DATA_INGESTION_INGESTED_DIR: str = "ingested"
|
| 28 |
DATA_INGESTION_TRAIN_TEST_SPLIT_RATION: float = 0.2
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
"""
|
| 32 |
+
Data Validation related constant start with DATA_VALIDATION VAR NAME
|
| 33 |
+
"""
|
| 34 |
+
DATA_VALIDATION_DIR_NAME: str ="data_validation"
|
| 35 |
+
DATA_VALIDATION_VALID_DIR: str ="validated"
|
| 36 |
+
DATA_VALIDATION_INVALID_DIR: str ="invalid"
|
| 37 |
+
DATA_VALIDATION_DRIFT_REPORT_DIR: str ="drift_report"
|
| 38 |
+
DATA_VALIDATION_DRIFT_REPORT_FILE_NAME: str ="report.yaml"
|
networksecurity/entity/artifact_entity.py
CHANGED
|
@@ -3,4 +3,13 @@ from dataclasses import dataclass
|
|
| 3 |
@dataclass
|
| 4 |
class DataIngestionArtifact:
|
| 5 |
training_file_path:str
|
| 6 |
-
testing_file_path:str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
@dataclass
|
| 4 |
class DataIngestionArtifact:
|
| 5 |
training_file_path:str
|
| 6 |
+
testing_file_path:str
|
| 7 |
+
|
| 8 |
+
@dataclass
|
| 9 |
+
class DataValidationArtifact:
|
| 10 |
+
validation_status: bool
|
| 11 |
+
valid_train_file_path: str
|
| 12 |
+
valid_test_file_path: str
|
| 13 |
+
invalid_train_file_path: str
|
| 14 |
+
invalid_test_file_path: str
|
| 15 |
+
drift_report_file_path: str
|
networksecurity/entity/config_entity.py
CHANGED
|
@@ -30,4 +30,56 @@ class DataIngestionConfig:
|
|
| 30 |
)
|
| 31 |
self.train_test_split_ratio: float = training_pipeline.DATA_INGESTION_TRAIN_TEST_SPLIT_RATION
|
| 32 |
self.collection_name: str = training_pipeline.DATA_INGESTION_COLLECTION_NAME
|
| 33 |
-
self.database_name: str = training_pipeline.DATA_INGESTION_DATABASE_NAME
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
)
|
| 31 |
self.train_test_split_ratio: float = training_pipeline.DATA_INGESTION_TRAIN_TEST_SPLIT_RATION
|
| 32 |
self.collection_name: str = training_pipeline.DATA_INGESTION_COLLECTION_NAME
|
| 33 |
+
self.database_name: str = training_pipeline.DATA_INGESTION_DATABASE_NAME
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class DataValidationConfig:
|
| 37 |
+
def __init__(self, training_pipeline_config: TraningPipelineConfig):
|
| 38 |
+
# Main data validation directory
|
| 39 |
+
self.data_validation_dir: str = os.path.join(
|
| 40 |
+
training_pipeline_config.artifact_dir,
|
| 41 |
+
training_pipeline.DATA_VALIDATION_DIR_NAME
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Valid data directory
|
| 45 |
+
self.valid_data_dir: str = os.path.join(
|
| 46 |
+
self.data_validation_dir,
|
| 47 |
+
training_pipeline.DATA_VALIDATION_VALID_DIR
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Invalid data directory
|
| 51 |
+
self.invalid_data_dir: str = os.path.join(
|
| 52 |
+
self.data_validation_dir,
|
| 53 |
+
training_pipeline.DATA_VALIDATION_INVALID_DIR
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# Valid train file path
|
| 57 |
+
self.valid_train_file_path: str = os.path.join(
|
| 58 |
+
self.valid_data_dir,
|
| 59 |
+
training_pipeline.TRAIN_FILE_NAME
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Valid test file path
|
| 63 |
+
self.valid_test_file_path: str = os.path.join(
|
| 64 |
+
self.valid_data_dir,
|
| 65 |
+
training_pipeline.TEST_FILE_NAME
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
# Invalid train file path
|
| 69 |
+
self.invalid_train_file_path: str = os.path.join(
|
| 70 |
+
self.invalid_data_dir,
|
| 71 |
+
training_pipeline.TRAIN_FILE_NAME
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
# Invalid test file path
|
| 75 |
+
self.invalid_test_file_path: str = os.path.join(
|
| 76 |
+
self.invalid_data_dir,
|
| 77 |
+
training_pipeline.TEST_FILE_NAME
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# Drift report file path
|
| 81 |
+
self.drift_report_file_path: str = os.path.join(
|
| 82 |
+
self.data_validation_dir,
|
| 83 |
+
training_pipeline.DATA_VALIDATION_DRIFT_REPORT_DIR,
|
| 84 |
+
training_pipeline.DATA_VALIDATION_DRIFT_REPORT_FILE_NAME,
|
| 85 |
+
)
|
networksecurity/utils/main_utils/__init__.py
ADDED
|
File without changes
|
networksecurity/utils/main_utils/utils.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import yaml
|
| 2 |
+
from networksecurity.exception.exception import NetworkSecurityException
|
| 3 |
+
from networksecurity.logging.logger import logging
|
| 4 |
+
import os,sys
|
| 5 |
+
import numpy as np
|
| 6 |
+
import dill
|
| 7 |
+
import pickle
|
| 8 |
+
|
| 9 |
+
def read_yaml_file(file_path: str) -> dict:
|
| 10 |
+
"""
|
| 11 |
+
Reads a YAML file and returns its contents as a dictionary.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
file_path (str): The path to the YAML file.
|
| 15 |
+
Returns:
|
| 16 |
+
dict: The contents of the YAML file.
|
| 17 |
+
"""
|
| 18 |
+
try:
|
| 19 |
+
with open(file_path, 'rb') as yaml_file:
|
| 20 |
+
return yaml.safe_load(yaml_file)
|
| 21 |
+
except Exception as e:
|
| 22 |
+
raise NetworkSecurityException(e, sys)
|
| 23 |
+
|
| 24 |
+
def write_yaml_file(file_path: str, content: object, replace: bool = False) -> None:
|
| 25 |
+
try:
|
| 26 |
+
if replace:
|
| 27 |
+
if os.path.exists(file_path):
|
| 28 |
+
os.remove(file_path)
|
| 29 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
| 30 |
+
with open(file_path, 'w') as file:
|
| 31 |
+
yaml.dump(content, file)
|
| 32 |
+
except Exception as e:
|
| 33 |
+
raise NetworkSecurityException(e, sys)
|
requirements.txt
CHANGED
|
@@ -6,5 +6,6 @@ pymongo
|
|
| 6 |
certifi
|
| 7 |
pymongo[srv]==3.11
|
| 8 |
scikit-learn
|
| 9 |
-
|
|
|
|
| 10 |
#-e .
|
|
|
|
| 6 |
certifi
|
| 7 |
pymongo[srv]==3.11
|
| 8 |
scikit-learn
|
| 9 |
+
dill
|
| 10 |
+
pyaml
|
| 11 |
#-e .
|