import os import zipfile import gdown from pathlib import Path from cnnClassifier import logger from cnnClassifier.utils.common import get_size from cnnClassifier.entity.config_entity import DataIngestionConfig class DataIngestion: def __init__(self, config: DataIngestionConfig): self.config = config def download_file(self): if not os.path.exists(self.config.local_data_file): gdown.download(self.config.source_URL, str(self.config.local_data_file), quiet=False, fuzzy=True) logger.info(f"Downloaded data to {self.config.local_data_file}") else: logger.info(f"File already exists of size: {get_size(Path(self.config.local_data_file))}") def extract_zip_file(self): """Extracts the zip file into the unzip directory.""" unzip_path = self.config.unzip_dir os.makedirs(unzip_path, exist_ok=True) with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref: zip_ref.extractall(unzip_path)