{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e45dab2a",
   "metadata": {},
   "source": [
    "Import important libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "1f940871",
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "import tensorflow as tf\n",
    "from keras.preprocessing.image import ImageDataGenerator\n",
    "import cv2\n",
    "import imutils\n",
    "import matplotlib.pyplot as plt\n",
    "from os import listdir\n",
    "import time\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "beac1e35",
   "metadata": {},
   "source": [
    "calculate time in Nicely format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "32a0851a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Nicely formatted time string\n",
    "def hms_string(sec_elapsed):\n",
    "    h = int(sec_elapsed / (60 * 60))\n",
    "    m = int((sec_elapsed % (60 * 60)) / 60)\n",
    "    s = sec_elapsed % 60\n",
    "    return f\"{h}:{m}:{round(s,1)}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "380d8b93",
   "metadata": {},
   "outputs": [],
   "source": [
    "def augment_data(file_dir, n_generated_samples, save_to_dir):\n",
    "    \"\"\"\n",
    "    Arguments:\n",
    "        file_dir: A string representing the directory where images that we want to augment are found.\n",
    "        n_generated_samples: A string representing the number of generated samples using the given image.\n",
    "        save_to_dir: A string representing the directory in which the generated images will be saved.\n",
    "    \"\"\"\n",
    "    \n",
    "    #from keras.preprocessing.image import ImageDataGenerator\n",
    "    #from os import listdir\n",
    "    \n",
    "    data_gen = ImageDataGenerator(rotation_range=10, \n",
    "                                  width_shift_range=0.1, \n",
    "                                  height_shift_range=0.1, \n",
    "                                  shear_range=0.1, \n",
    "                                  brightness_range=(0.3, 1.0),\n",
    "                                  horizontal_flip=True, \n",
    "                                  vertical_flip=True, \n",
    "                                  fill_mode='nearest'\n",
    "                                 )\n",
    "\n",
    "    \n",
    "    for filename in listdir(file_dir):\n",
    "        # load the image\n",
    "        image = cv2.imread(file_dir + '\\\\' + filename)\n",
    "        # reshape the image\n",
    "        image = image.reshape((1,)+image.shape)\n",
    "        # prefix of the names for the generated sampels.\n",
    "        save_prefix = 'aug_' + filename[:-4]\n",
    "        # generate 'n_generated_samples' sample images\n",
    "        i=0\n",
    "        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir, \n",
    "                                           save_prefix=save_prefix, save_format='jpg'):\n",
    "            i += 1\n",
    "            if i > n_generated_samples:\n",
    "                break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7871ee0d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "aug_data Already exists\n"
     ]
    }
   ],
   "source": [
    "#Create augmented images folders\n",
    "#L = ['aug_train', 'aug_test', 'aug_val']\n",
    "name = 'aug_data'\n",
    "if not os.path.exists(\"./\"+name):\n",
    "    os.mkdir(\"./\"+name)\n",
    "    for dir in os.listdir(root):\n",
    "        os.makedirs(\"./\"+name+\"/\"+dir)\n",
    "else:\n",
    "    print(f\"{name} Already exists\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "6f1fd7ec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elapsed time: 0:2:2.3\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "start_time = time.time()\n",
    "\n",
    "\n",
    "#Augment data for the examples with the label 'yes' in the training set\n",
    "augment_data(file_dir='./Dataset/yes',n_generated_samples=6, save_to_dir='./aug_data/yes')\n",
    "#Augment data for the examples with the label 'no' in the training set\n",
    "augment_data(file_dir='./Dataset/no', n_generated_samples=9, save_to_dir='./aug_data/no')\n",
    "\n",
    "end_time = time.time()\n",
    "execution_time = (end_time - start_time)\n",
    "print(f\"Elapsed time: {hms_string(execution_time)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "9977a4ac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "151.41872715950012\n"
     ]
    }
   ],
   "source": [
    "print(execution_time)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "f0a30885",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of examples: 2064\n",
      "Percentage of positive examples: 52.56782945736434%, number of pos examples: 1085\n",
      "Percentage of negative examples: 47.43217054263566%, number of neg examples: 979\n"
     ]
    }
   ],
   "source": [
    "def data_summary(main_path):\n",
    "    \n",
    "    yes_path = main_path+'yes'\n",
    "    no_path = main_path+'no'\n",
    "        \n",
    "    # number of files (images) that are in the the folder named 'yes' that represent tumorous (positive) examples\n",
    "    m_pos = len(listdir(yes_path))\n",
    "    # number of files (images) that are in the the folder named 'no' that represent non-tumorous (negative) examples\n",
    "    m_neg = len(listdir(no_path))\n",
    "    # number of all examples\n",
    "    m = (m_pos+m_neg)\n",
    "    \n",
    "    pos_prec = (m_pos* 100.0)/ m\n",
    "    neg_prec = (m_neg* 100.0)/ m\n",
    "    \n",
    "    print(f\"Number of examples: {m}\")\n",
    "    print(f\"Percentage of positive examples: {pos_prec}%, number of pos examples: {m_pos}\") \n",
    "    print(f\"Percentage of negative examples: {neg_prec}%, number of neg examples: {m_neg}\") \n",
    "\n",
    "\n",
    "data_summary('aug_data/')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}