{ "cells": [ { "cell_type": "markdown", "id": "e45dab2a", "metadata": {}, "source": [ "Import important libraries" ] }, { "cell_type": "code", "execution_count": 8, "id": "1f940871", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')\n", "import tensorflow as tf\n", "from keras.preprocessing.image import ImageDataGenerator\n", "import cv2\n", "import imutils\n", "import matplotlib.pyplot as plt\n", "from os import listdir\n", "import time\n", "\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "id": "beac1e35", "metadata": {}, "source": [ "calculate time in Nicely format" ] }, { "cell_type": "code", "execution_count": 9, "id": "32a0851a", "metadata": {}, "outputs": [], "source": [ "# Nicely formatted time string\n", "def hms_string(sec_elapsed):\n", " h = int(sec_elapsed / (60 * 60))\n", " m = int((sec_elapsed % (60 * 60)) / 60)\n", " s = sec_elapsed % 60\n", " return f\"{h}:{m}:{round(s,1)}\"" ] }, { "cell_type": "code", "execution_count": 10, "id": "380d8b93", "metadata": {}, "outputs": [], "source": [ "def augment_data(file_dir, n_generated_samples, save_to_dir):\n", " \"\"\"\n", " Arguments:\n", " file_dir: A string representing the directory where images that we want to augment are found.\n", " n_generated_samples: A string representing the number of generated samples using the given image.\n", " save_to_dir: A string representing the directory in which the generated images will be saved.\n", " \"\"\"\n", " \n", " #from keras.preprocessing.image import ImageDataGenerator\n", " #from os import listdir\n", " \n", " data_gen = ImageDataGenerator(rotation_range=10, \n", " width_shift_range=0.1, \n", " height_shift_range=0.1, \n", " shear_range=0.1, \n", " brightness_range=(0.3, 1.0),\n", " horizontal_flip=True, \n", " vertical_flip=True, \n", " fill_mode='nearest'\n", " )\n", "\n", " \n", " for filename in listdir(file_dir):\n", " # load the image\n", " image = cv2.imread(file_dir + '\\\\' + filename)\n", " # reshape the image\n", " image = image.reshape((1,)+image.shape)\n", " # prefix of the names for the generated sampels.\n", " save_prefix = 'aug_' + filename[:-4]\n", " # generate 'n_generated_samples' sample images\n", " i=0\n", " for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir, \n", " save_prefix=save_prefix, save_format='jpg'):\n", " i += 1\n", " if i > n_generated_samples:\n", " break" ] }, { "cell_type": "code", "execution_count": 11, "id": "7871ee0d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "aug_data Already exists\n" ] } ], "source": [ "#Create augmented images folders\n", "#L = ['aug_train', 'aug_test', 'aug_val']\n", "name = 'aug_data'\n", "if not os.path.exists(\"./\"+name):\n", " os.mkdir(\"./\"+name)\n", " for dir in os.listdir(root):\n", " os.makedirs(\"./\"+name+\"/\"+dir)\n", "else:\n", " print(f\"{name} Already exists\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "6f1fd7ec", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Elapsed time: 0:2:2.3\n" ] } ], "source": [ "import time\n", "start_time = time.time()\n", "\n", "\n", "#Augment data for the examples with the label 'yes' in the training set\n", "augment_data(file_dir='./Dataset/yes',n_generated_samples=6, save_to_dir='./aug_data/yes')\n", "#Augment data for the examples with the label 'no' in the training set\n", "augment_data(file_dir='./Dataset/no', n_generated_samples=9, save_to_dir='./aug_data/no')\n", "\n", "end_time = time.time()\n", "execution_time = (end_time - start_time)\n", "print(f\"Elapsed time: {hms_string(execution_time)}\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "9977a4ac", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "151.41872715950012\n" ] } ], "source": [ "print(execution_time)" ] }, { "cell_type": "code", "execution_count": 16, "id": "f0a30885", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of examples: 2064\n", "Percentage of positive examples: 52.56782945736434%, number of pos examples: 1085\n", "Percentage of negative examples: 47.43217054263566%, number of neg examples: 979\n" ] } ], "source": [ "def data_summary(main_path):\n", " \n", " yes_path = main_path+'yes'\n", " no_path = main_path+'no'\n", " \n", " # number of files (images) that are in the the folder named 'yes' that represent tumorous (positive) examples\n", " m_pos = len(listdir(yes_path))\n", " # number of files (images) that are in the the folder named 'no' that represent non-tumorous (negative) examples\n", " m_neg = len(listdir(no_path))\n", " # number of all examples\n", " m = (m_pos+m_neg)\n", " \n", " pos_prec = (m_pos* 100.0)/ m\n", " neg_prec = (m_neg* 100.0)/ m\n", " \n", " print(f\"Number of examples: {m}\")\n", " print(f\"Percentage of positive examples: {pos_prec}%, number of pos examples: {m_pos}\") \n", " print(f\"Percentage of negative examples: {neg_prec}%, number of neg examples: {m_neg}\") \n", "\n", "\n", "data_summary('aug_data/')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }