{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops\\\\research'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "%pwd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'d:\\\\MLOps-Project\\\\Chicken-Disease-Classification-Using-Mlops'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.chdir('../')\n", "%pwd" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass\n", "from pathlib import Path\n", "\n", "\n", "@dataclass(frozen=True)\n", "class DataIngestionConfig:\n", " root_dir: Path\n", " source_URL: str\n", " local_data_file: Path\n", " unzip_dir: Path" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from cnnClassfier.constants import *\n", "from cnnClassfier.utils.common import read_yaml, create_directories" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "class ConfigurationManager:\n", " def __init__(\n", " self,\n", " config_filepath = CONFIG_FILE_PATH,\n", " params_filepath = PARAMS_FILE_PATH):\n", "\n", " self.config = read_yaml(config_filepath)\n", " self.params = read_yaml(params_filepath)\n", "\n", " create_directories([self.config.artifacts_root])\n", "\n", "\n", " \n", " def get_data_ingestion_config(self) -> DataIngestionConfig:\n", " config = self.config.data_ingestion\n", "\n", " create_directories([config.root_dir])\n", "\n", " data_ingestion_config = DataIngestionConfig(\n", " root_dir=config.root_dir,\n", " source_URL=config.source_URL,\n", " local_data_file=config.local_data_file,\n", " unzip_dir=config.unzip_dir \n", " )\n", "\n", " return data_ingestion_config\n", " " ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import os\n", "from tqdm.notebook import tqdm\n", "import urllib.request as request\n", "import zipfile\n", "from cnnClassfier import logger\n", "from cnnClassfier.utils.common import get_size" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "class DataIngestion:\n", " def __init__(self, config: DataIngestionConfig):\n", " self.config = config\n", "\n", "\n", " \n", " def download_file(self):\n", " if not os.path.exists(self.config.local_data_file):\n", " filename, headers = request.urlretrieve(\n", " url = self.config.source_URL,\n", " filename = self.config.local_data_file\n", " )\n", " logger.info(f\"{filename} download! with following info: \\n{headers}\")\n", " else:\n", " logger.info(f\"File already exists of size: {get_size(Path(self.config.local_data_file))}\") \n", "\n", "\n", " \n", " def extract_zip_file(self):\n", " \"\"\"\n", " zip_file_path: str\n", " Extracts the zip file into the data directory\n", " Function returns None\n", " \"\"\"\n", " unzip_path = self.config.unzip_dir\n", " os.makedirs(unzip_path, exist_ok=True)\n", " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n", " zip_ref.extractall(unzip_path)\n", "\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2024-07-26 23:35:21,072: INFO: common: yaml file: config\\config.yaml loaded successfully]\n", "[2024-07-26 23:35:21,074: INFO: common: yaml file: params.yaml loaded successfully]\n", "[2024-07-26 23:35:21,075: INFO: common: Created directory at: artifacts]\n", "[2024-07-26 23:35:21,077: INFO: common: Created directory at: artifacts/data_ingestion]\n", "[2024-07-26 23:43:29,635: INFO: 1170291011: artifacts/data_ingestion/data.zip download! with following info: \n", "Connection: close\n", "Content-Length: 11616915\n", "Cache-Control: max-age=300\n", "Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox\n", "Content-Type: application/zip\n", "ETag: \"adf745abc03891fe493c3be264ec012691fe3fa21d861f35a27edbe6d86a76b1\"\n", "Strict-Transport-Security: max-age=31536000\n", "X-Content-Type-Options: nosniff\n", "X-Frame-Options: deny\n", "X-XSS-Protection: 1; mode=block\n", "X-GitHub-Request-Id: 38AD:28F2BB:3FC6CA:4CD38D:66A3DDDF\n", "Accept-Ranges: bytes\n", "Date: Fri, 26 Jul 2024 17:36:12 GMT\n", "Via: 1.1 varnish\n", "X-Served-By: cache-qpg1222-QPG\n", "X-Cache: HIT\n", "X-Cache-Hits: 0\n", "X-Timer: S1722015372.265290,VS0,VE1\n", "Vary: Authorization,Accept-Encoding,Origin\n", "Access-Control-Allow-Origin: *\n", "Cross-Origin-Resource-Policy: cross-origin\n", "X-Fastly-Request-ID: 493e408e866b628ee5d1f857060d32bbaf38002f\n", "Expires: Fri, 26 Jul 2024 17:41:12 GMT\n", "Source-Age: 172\n", "\n", "]\n" ] } ], "source": [ "try:\n", " config = ConfigurationManager()\n", " data_ingestion_config = config.get_data_ingestion_config()\n", " data_ingestion = DataIngestion(config=data_ingestion_config)\n", " data_ingestion.download_file()\n", " data_ingestion.extract_zip_file()\n", "except Exception as e:\n", " raise e" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }