ALYYAN commited on
Commit
30672d3
·
1 Parent(s): 8cafdb8

Define DVC pipeline for data ingestion and training

Browse files
.dvc/config CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [core]
2
+ remote = myremote
3
+ ['remote "myremote"']
4
+ url = ../../dvc-storage
.github/workflows/.gitkeep ADDED
File without changes
app.py ADDED
File without changes
config/config.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ artifacts_root: artifacts
2
+
3
+ data_ingestion:
4
+ root_dir: artifacts/data_ingestion
5
+ dataset_name: frabbisw/facial-age
6
+ local_data_file: artifacts/data_ingestion/data.zip
7
+ unzip_dir: artifacts/data_ingestion
8
+
9
+ data_preparation:
10
+ root_dir: artifacts/data_preparation
11
+ data_path: artifacts/data_ingestion/face_age
12
+ dataset_name: facial_age_prepared_dataset
13
+
14
+ model_trainer:
15
+ root_dir: artifacts/model_trainer
16
+ trained_model_path: artifacts/model_trainer/facial_age_detector_model
17
+ # Using EfficientFormer-L1, a much lighter model than ViT
18
+ model_name: "snap-research/efficientformer-l1"
dvc.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ stages:
2
+ data_ingestion:
3
+ cmd: python src/cnnClassifier/pipeline/stage_01_data_ingestion.py
4
+ deps:
5
+ - src/cnnClassifier/pipeline/stage_01_data_ingestion.py
6
+ - src/cnnClassifier/components/data_ingestion.py
7
+ - config/config.yaml
8
+ outs:
9
+ - artifacts/data_ingestion
10
+
11
+ model_training:
12
+ cmd: python src/cnnClassifier/pipeline/stage_02_model_training.py
13
+ deps:
14
+ - src/cnnClassifier/pipeline/stage_02_model_training.py
15
+ - src/cnnClassifier/components/model_trainer.py
16
+ - config/config.yaml
17
+ - params.yaml
18
+ - artifacts/data_ingestion # Depends on the output of the previous stage
19
+ outs:
20
+ - artifacts/model_trainer
main.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier import logger
2
+ from cnnClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
3
+ from cnnClassifier.pipeline.stage_02_model_training import ModelTrainingPipeline
4
+
5
+ STAGE_NAME = "Data Ingestion stage"
6
+ try:
7
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
8
+ data_ingestion = DataIngestionTrainingPipeline()
9
+ data_ingestion.main()
10
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
11
+ except Exception as e:
12
+ logger.exception(e)
13
+ raise e
14
+
15
+ STAGE_NAME = "Model Training stage"
16
+ try:
17
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
18
+ model_training = ModelTrainingPipeline()
19
+ model_training.main()
20
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
21
+ except Exception as e:
22
+ logger.exception(e)
23
+ raise e
params.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Training Parameters
2
+ IMAGE_SIZE: 224 # EfficientFormer-L1 was trained on 224x224
3
+ LEARNING_RATE: 2e-5
4
+ BATCH_SIZE: 32
5
+ NUM_TRAIN_EPOCHS: 20 # Adjust as needed
6
+ WEIGHT_DECAY: 0.01
7
+ WARMUP_STEPS: 100
8
+ TEST_SPLIT_SIZE: 0.2
9
+ RANDOM_STATE: 42
readme.md ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For PyTorch with CUDA 11.8 - MUST be installed with the extra index URL
2
+ --extra-index-url https://download.pytorch.org/whl/cu118
3
+ torch==2.1.0+cu118
4
+ torchvision==0.16.0+cu118
5
+ torchaudio==2.1.0
6
+
7
+ # Pin NumPy to a version compatible with Torch 2.1.0
8
+ numpy<2.0
9
+
10
+ # Hugging Face
11
+ transformers
12
+ datasets>=2.14.5
13
+ evaluate
14
+ accelerate>=0.27
15
+
16
+ # MLOps and Utilities
17
+ mlflow
18
+ dvc[s3] # Assuming you might use S3 with DVC for AWS
19
+ python-box
20
+ PyYAML
21
+ ensure
22
+ pandas
23
+ scikit-learn
24
+ Pillow
25
+ tqdm
26
+ imblearn
27
+
28
+ # Frontend and Real-time Processing
29
+ streamlit
30
+ opencv-python
31
+ mtcnn
32
+
33
+ # AWS Deployment
34
+ boto3
research/trials.ipynb ADDED
File without changes
setup.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import setuptools
2
+
3
+ with open("README.md", "r", encoding="utf-8") as f:
4
+ long_description = f.read()
5
+
6
+ __version__ = "0.0.0"
7
+
8
+ REPO_NAME = "Facial-Age-Detection"
9
+ AUTHOR_USER_NAME = "AlyyanAhmed21" # Change this
10
+ SRC_REPO = "cnnClassifier"
11
+ AUTHOR_EMAIL = "alyyanawan19@gmail.com" # Change this
12
+
13
+
14
+ setuptools.setup(
15
+ name=SRC_REPO,
16
+ version=__version__,
17
+ author=AUTHOR_USER_NAME,
18
+ author_email=AUTHOR_EMAIL,
19
+ description="A python package for facial age detection app",
20
+ long_description=long_description,
21
+ long_description_content="text/markdown",
22
+ url=f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}",
23
+ project_urls={
24
+ "Bug Tracker": f"https://github.com/{AUTHOR_USER_NAME}/{REPO_NAME}/issues",
25
+ },
26
+ package_dir={"": "src"},
27
+ packages=setuptools.find_packages(where="src")
28
+ )
src/cnnClassifier/__init__.py ADDED
File without changes
src/cnnClassifier/components/__init__.py ADDED
File without changes
src/cnnClassifier/components/data_ingestion.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import zipfile
3
+ from cnnClassifier import logger
4
+ from cnnClassifier.entity.config_entity import DataIngestionConfig
5
+
6
+ class DataIngestion:
7
+ def __init__(self, config: DataIngestionConfig):
8
+ self.config = config
9
+
10
+ def download_file(self):
11
+ """
12
+ Downloads the dataset from Kaggle.
13
+ Make sure to have your kaggle.json file in ~/.kaggle/ or set KAGGLE_USERNAME and KAGGLE_KEY env variables.
14
+ """
15
+ try:
16
+ logger.info(f"Downloading dataset from kaggle: {self.config.dataset_name}")
17
+ os.system(f"kaggle datasets download {self.config.dataset_name} -p {os.path.dirname(self.config.local_data_file)}")
18
+ # The downloaded file will be named 'facial-age.zip'. We need to rename it to 'data.zip' as per our config.
19
+ downloaded_zip_path = os.path.join(os.path.dirname(self.config.local_data_file), 'facial-age.zip')
20
+ os.rename(downloaded_zip_path, self.config.local_data_file)
21
+ logger.info(f"Dataset downloaded and saved at {self.config.local_data_file}")
22
+ except Exception as e:
23
+ logger.error(f"Failed to download dataset. Error: {e}")
24
+ raise e
25
+
26
+ def extract_zip_file(self):
27
+ """
28
+ Extracts the zip file into the data directory
29
+ """
30
+ unzip_path = self.config.unzip_dir
31
+ os.makedirs(unzip_path, exist_ok=True)
32
+ with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:
33
+ zip_ref.extractall(unzip_path)
34
+ logger.info(f"Dataset extracted to {unzip_path}")
src/cnnClassifier/components/model_trainer.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from tqdm import tqdm
5
+ from datasets import Dataset, Image, ClassLabel
6
+ from imblearn.over_sampling import RandomOverSampler
7
+ from transformers import (
8
+ EfficientFormerImageProcessor,
9
+ EfficientFormerForImageClassification,
10
+ TrainingArguments,
11
+ Trainer,
12
+ DefaultDataCollator
13
+ )
14
+ from torchvision.transforms import (
15
+ Compose,
16
+ Normalize,
17
+ RandomRotation,
18
+ RandomResizedCrop,
19
+ RandomHorizontalFlip,
20
+ Resize,
21
+ ToTensor
22
+ )
23
+ import evaluate
24
+ from cnnClassifier.entity.config_entity import ModelTrainerConfig
25
+
26
+ class ModelTrainer:
27
+ def __init__(self, config: ModelTrainerConfig):
28
+ self.config = config
29
+ self.label2id = None
30
+ self.id2label = None
31
+
32
+ def _prepare_data(self):
33
+ logger.info("Preparing data...")
34
+ label_dict = {'001': '01', '002': '02', '003': '03', '004': '04', '005': '05',
35
+ '006': '06-07', '007': '06-07', '008': '08-09', '009': '08-09',
36
+ '010': '10-12', '011': '10-12', '012': '10-12', '013': '13-15',
37
+ '014': '13-15', '015': '13-15', '016': '16-20', '017': '16-20',
38
+ '018': '16-20', '019': '16-20', '020': '16-20', '021': '21-25',
39
+ '022': '21-25', '023': '21-25', '024': '21-25', '025': '21-25',
40
+ '026': '26-30', '027': '26-30', '028': '26-30', '029': '26-30',
41
+ '030': '26-30', '031': '31-35', '032': '31-35', '033': '31-35',
42
+ '034': '31-35', '035': '31-35', '036': '36-40', '037': '36-40',
43
+ '038': '36-40', '039': '36-40', '040': '36-40', '041': '41-45',
44
+ '042': '41-45', '043': '41-45', '044': '41-45', '045': '41-45',
45
+ '046': '46-50', '047': '46-50', '048': '46-50', '049': '46-50',
46
+ '050': '46-50', '051': '51-55', '052': '51-55', '053': '51-55',
47
+ '054': '51-55', '055': '51-55', '056': '56-60', '057': '56-60',
48
+ '058': '56-60', '059': '56-60', '060': '56-60', '061': '61-65',
49
+ '062': '61-65', '063': '61-65', '064': '61-65', '065': '61-65',
50
+ '066': '66-70', '067': '66-70', '068': '66-70', '069': '66-70',
51
+ '070': '66-70', '071': '71-80', '072': '71-80', '073': '71-80',
52
+ '074': '71-80', '075': '71-80', '076': '71-80', '077': '71-80',
53
+ '078': '71-80', '079': '71-80', '080': '71-80', '081': '81-90',
54
+ '082': '81-90', '083': '81-90', '084': '81-90', '085': '81-90',
55
+ '086': '81-90', '087': '81-90', '088': '81-90', '089': '81-90',
56
+ '090': '81-90', '091': '90+', '092': '90+', '093': '90+',
57
+ '095': '90+', '096': '90+', '099': '90+', '100': '90+',
58
+ '101': '90+', '110': '90+'}
59
+
60
+ file_names, labels = [], []
61
+ data_path = Path(self.config.data_path)
62
+ for file in tqdm(sorted(data_path.glob('*/*.*'))):
63
+ label = str(file).split('/')[-2]
64
+ labels.append(label_dict[label])
65
+ file_names.append(str(file))
66
+
67
+ df = pd.DataFrame.from_dict({"image": file_names, "label": labels})
68
+
69
+ # Random oversampling
70
+ ros = RandomOverSampler(random_state=self.config.random_state)
71
+ df_resampled, y_resampled = ros.fit_resample(df[['image']], df['label'])
72
+ df = pd.concat([df_resampled, y_resampled], axis=1)
73
+
74
+ dataset = Dataset.from_pandas(df).cast_column("image", Image())
75
+
76
+ labels_list = sorted(list(set(labels)))
77
+ self.label2id = {label: i for i, label in enumerate(labels_list)}
78
+ self.id2label = {i: label for i, label in enumerate(labels_list)}
79
+
80
+ ClassLabels = ClassLabel(num_classes=len(labels_list), names=labels_list)
81
+ dataset = dataset.map(lambda x: {'label': ClassLabels.str2int(x['label'])}, batched=True)
82
+ dataset = dataset.cast_column('label', ClassLabels)
83
+
84
+ return dataset.train_test_split(test_size=self.config.test_split_size, shuffle=True, stratify_by_column="label")
85
+
86
+ def train(self):
87
+ device = "cuda" if torch.cuda.is_available() else "cpu"
88
+ logger.info(f"Using device: {device}")
89
+
90
+ split_dataset = self._prepare_data()
91
+ train_data = split_dataset['train']
92
+ test_data = split_dataset['test']
93
+
94
+ processor = EfficientFormerImageProcessor.from_pretrained(self.config.model_name)
95
+
96
+ image_mean, image_std = processor.image_mean, processor.image_std
97
+ size = self.config.image_size
98
+
99
+ normalize = Normalize(mean=image_mean, std=image_std)
100
+ _train_transforms = Compose([
101
+ Resize((size, size)),
102
+ RandomRotation(15),
103
+ RandomHorizontalFlip(0.5),
104
+ ToTensor(),
105
+ normalize
106
+ ])
107
+ _val_transforms = Compose([
108
+ Resize((size, size)),
109
+ ToTensor(),
110
+ normalize
111
+ ])
112
+
113
+ def train_transforms(examples):
114
+ examples['pixel_values'] = [_train_transforms(image.convert("RGB")) for image in examples['image']]
115
+ return examples
116
+
117
+ def val_transforms(examples):
118
+ examples['pixel_values'] = [_val_transforms(image.convert("RGB")) for image in examples['image']]
119
+ return examples
120
+
121
+ train_data.set_transform(train_transforms)
122
+ test_data.set_transform(val_transforms)
123
+
124
+ def collate_fn(examples):
125
+ pixel_values = torch.stack([example["pixel_values"] for example in examples])
126
+ labels = torch.tensor([example['label'] for example in examples])
127
+ return {"pixel_values": pixel_values, "labels": labels}
128
+
129
+ model = EfficientFormerForImageClassification.from_pretrained(
130
+ self.config.model_name,
131
+ num_labels=len(self.id2label),
132
+ id2label=self.id2label,
133
+ label2id=self.label2id,
134
+ ignore_mismatched_sizes=True # Important for transfer learning
135
+ ).to(device)
136
+
137
+ accuracy = evaluate.load("accuracy")
138
+ def compute_metrics(eval_pred):
139
+ predictions, label_ids = eval_pred
140
+ predicted_labels = predictions.argmax(axis=1)
141
+ return accuracy.compute(predictions=predicted_labels, references=label_ids)
142
+
143
+ args = TrainingArguments(
144
+ output_dir=self.config.root_dir,
145
+ logging_dir=f'{self.config.root_dir}/logs',
146
+ evaluation_strategy="epoch",
147
+ learning_rate=self.config.learning_rate,
148
+ per_device_train_batch_size=self.config.batch_size,
149
+ per_device_eval_batch_size=self.config.batch_size,
150
+ num_train_epochs=self.config.num_train_epochs,
151
+ weight_decay=self.config.weight_decay,
152
+ warmup_steps=self.config.warmup_steps,
153
+ save_strategy='epoch',
154
+ load_best_model_at_end=True,
155
+ metric_for_best_model="accuracy",
156
+ save_total_limit=1,
157
+ report_to="none"
158
+ )
159
+
160
+ trainer = Trainer(
161
+ model=model,
162
+ args=args,
163
+ train_dataset=train_data,
164
+ eval_dataset=test_data,
165
+ data_collator=collate_fn,
166
+ compute_metrics=compute_metrics,
167
+ tokenizer=processor,
168
+ )
169
+
170
+ trainer.train()
171
+
172
+ logger.info(f"Saving best model to {self.config.trained_model_path}")
173
+ trainer.save_model(self.config.trained_model_path)
src/cnnClassifier/config/__init__.py ADDED
File without changes
src/cnnClassifier/config/configuration.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.constants import *
2
+ from cnnClassifier.utils.common import read_yaml, create_directories
3
+ from cnnClassifier.entity.config_entity import DataIngestionConfig, DataPreparationConfig, ModelTrainerConfig
4
+
5
+ class ConfigurationManager:
6
+ def __init__(
7
+ self,
8
+ config_filepath = CONFIG_FILE_PATH,
9
+ params_filepath = PARAMS_FILE_PATH):
10
+
11
+ self.config = read_yaml(config_filepath)
12
+ self.params = read_yaml(params_filepath)
13
+
14
+ create_directories([self.config.artifacts_root])
15
+
16
+ def get_data_ingestion_config(self) -> DataIngestionConfig:
17
+ config = self.config.data_ingestion
18
+
19
+ create_directories([config.root_dir])
20
+
21
+ data_ingestion_config = DataIngestionConfig(
22
+ root_dir=config.root_dir,
23
+ dataset_name=config.dataset_name,
24
+ local_data_file=config.local_data_file,
25
+ unzip_dir=config.unzip_dir
26
+ )
27
+ return data_ingestion_config
28
+
29
+ def get_data_preparation_config(self) -> DataPreparationConfig:
30
+ config = self.config.data_preparation
31
+ create_directories([config.root_dir])
32
+
33
+ data_preparation_config = DataPreparationConfig(
34
+ root_dir=config.root_dir,
35
+ data_path=config.data_path,
36
+ dataset_name=config.dataset_name
37
+ )
38
+ return data_preparation_config
39
+
40
+ def get_model_trainer_config(self) -> ModelTrainerConfig:
41
+ config = self.config.model_trainer
42
+ params = self.params
43
+ create_directories([config.root_dir])
44
+
45
+ model_trainer_config = ModelTrainerConfig(
46
+ root_dir=Path(config.root_dir),
47
+ trained_model_path=Path(config.trained_model_path),
48
+ model_name=config.model_name,
49
+ image_size=params.IMAGE_SIZE,
50
+ learning_rate=params.LEARNING_RATE,
51
+ batch_size=params.BATCH_SIZE,
52
+ num_train_epochs=params.NUM_TRAIN_EPOCHS,
53
+ weight_decay=params.WEIGHT_DECAY,
54
+ warmup_steps=params.WARMUP_STEPS,
55
+ test_split_size=params.TEST_SPLIT_SIZE,
56
+ random_state=params.RANDOM_STATE
57
+ )
58
+ return model_trainer_config
src/cnnClassifier/constants/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ CONFIG_FILE_PATH = Path("config/config.yaml")
4
+ PARAMS_FILE_PATH = Path("params.yaml")
src/cnnClassifier/entity/__init__.py ADDED
File without changes
src/cnnClassifier/entity/config_entity.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+ @dataclass(frozen=True)
5
+ class DataIngestionConfig:
6
+ root_dir: Path
7
+ dataset_name: str
8
+ local_data_file: Path
9
+ unzip_dir: Path
10
+
11
+ @dataclass(frozen=True)
12
+ class DataPreparationConfig:
13
+ root_dir: Path
14
+ data_path: Path
15
+ dataset_name: str
16
+
17
+ @dataclass(frozen=True)
18
+ class ModelTrainerConfig:
19
+ root_dir: Path
20
+ trained_model_path: Path
21
+ model_name: str
22
+ image_size: int
23
+ learning_rate: float
24
+ batch_size: int
25
+ num_train_epochs: int
26
+ weight_decay: float
27
+ warmup_steps: int
28
+ test_split_size: float
29
+ random_state: int
src/cnnClassifier/pipeline/__init__.py ADDED
File without changes
src/cnnClassifier/pipeline/stage_01_data_ingestion.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.data_ingestion import DataIngestion
3
+ from cnnClassifier import logger
4
+
5
+ STAGE_NAME = "Data Ingestion stage"
6
+
7
+ class DataIngestionTrainingPipeline:
8
+ def __init__(self):
9
+ pass
10
+
11
+ def main(self):
12
+ config = ConfigurationManager()
13
+ data_ingestion_config = config.get_data_ingestion_config()
14
+ data_ingestion = DataIngestion(config=data_ingestion_config)
15
+ data_ingestion.download_file()
16
+ data_ingestion.extract_zip_file()
17
+
18
+
19
+ if __name__ == '__main__':
20
+ try:
21
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
22
+ obj = DataIngestionTrainingPipeline()
23
+ obj.main()
24
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
25
+ except Exception as e:
26
+ logger.exception(e)
27
+ raise e
src/cnnClassifier/pipeline/stage_02_model_training.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cnnClassifier.config.configuration import ConfigurationManager
2
+ from cnnClassifier.components.model_trainer import ModelTrainer
3
+ from cnnClassifier import logger
4
+
5
+ STAGE_NAME = "Model Training stage"
6
+
7
+ class ModelTrainingPipeline:
8
+ def __init__(self):
9
+ pass
10
+
11
+ def main(self):
12
+ config = ConfigurationManager()
13
+ model_trainer_config = config.get_model_trainer_config()
14
+ model_trainer = ModelTrainer(config=model_trainer_config)
15
+ model_trainer.train()
16
+
17
+
18
+ if __name__ == '__main__':
19
+ try:
20
+ logger.info(f">>>>>> stage {STAGE_NAME} started <<<<<<")
21
+ obj = ModelTrainingPipeline()
22
+ obj.main()
23
+ logger.info(f">>>>>> stage {STAGE_NAME} completed <<<<<<\n\nx==========x")
24
+ except Exception as e:
25
+ logger.exception(e)
26
+ raise e
src/cnnClassifier/utils/__init__.py ADDED
File without changes
src/cnnClassifier/utils/common.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from box.exceptions import BoxValueError
3
+ import yaml
4
+ from cnnClassifier import logger
5
+ import json
6
+ import joblib
7
+ from ensure import ensure_annotations
8
+ from box import ConfigBox
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ @ensure_annotations
13
+ def read_yaml(path_to_yaml: Path) -> ConfigBox:
14
+ """reads yaml file and returns
15
+ Args:
16
+ path_to_yaml (str): path like input
17
+ Raises:
18
+ ValueError: if yaml file is empty
19
+ e: empty file
20
+ Returns:
21
+ ConfigBox: ConfigBox type
22
+ """
23
+ try:
24
+ with open(path_to_yaml) as yaml_file:
25
+ content = yaml.safe_load(yaml_file)
26
+ logger.info(f"yaml file: {path_to_yaml} loaded successfully")
27
+ return ConfigBox(content)
28
+ except BoxValueError:
29
+ raise ValueError("yaml file is empty")
30
+ except Exception as e:
31
+ raise e
32
+
33
+ @ensure_annotations
34
+ def create_directories(path_to_directories: list, verbose=True):
35
+ """create list of directories
36
+ Args:
37
+ path_to_directories (list): list of path of directories
38
+ ignore_log (bool, optional): ignore if multiple dirs is to be created. Defaults to False.
39
+ """
40
+ for path in path_to_directories:
41
+ os.makedirs(path, exist_ok=True)
42
+ if verbose:
43
+ logger.info(f"created directory at: {path}")
template.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import logging
4
+
5
+ logging.basicConfig(level=logging.INFO, format='[%(asctime)s]: %(message)s:')
6
+
7
+ project_name = "cnnClassifier"
8
+
9
+ list_of_files = [
10
+ ".github/workflows/.gitkeep",
11
+ f"src/{project_name}/__init__.py",
12
+ f"src/{project_name}/components/__init__.py",
13
+ f"src/{project_name}/utils/__init__.py",
14
+ f"src/{project_name}/utils/common.py",
15
+ f"src/{project_name}/config/__init__.py",
16
+ f"src/{project_name}/config/configuration.py",
17
+ f"src/{project_name}/pipeline/__init__.py",
18
+ f"src/{project_name}/entity/__init__.py",
19
+ f"src/{project_name}/entity/config_entity.py",
20
+ f"src/{project_name}/constants/__init__.py",
21
+ "config/config.yaml",
22
+ "dvc.yaml",
23
+ "params.yaml",
24
+ "requirements.txt",
25
+ "setup.py",
26
+ "research/trials.ipynb",
27
+ "templates/index.html",
28
+ "app.py" # For Streamlit
29
+ ]
30
+
31
+ for filepath in list_of_files:
32
+ filepath = Path(filepath)
33
+ filedir, filename = os.path.split(filepath)
34
+
35
+ if filedir != "":
36
+ os.makedirs(filedir, exist_ok=True)
37
+ logging.info(f"Creating directory; {filedir} for the file: {filename}")
38
+
39
+ if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
40
+ with open(filepath, "w") as f:
41
+ pass
42
+ logging.info(f"Creating empty file: {filepath}")
43
+ else:
44
+ logging.info(f"{filename} is already exists")
templates/index.html ADDED
File without changes