{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/wej36how/.conda/envs/vit/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import torch\n", "from torch.utils.data import DataLoader\n", "from transformers import AdamW, ViTImageProcessor, ViTForImageClassification\n", "from NWRD_dataset import NWRD\n", "from tqdm import tqdm\n", "import numpy as np\n", "import torch.nn.functional as F\n", "import os\n", "import torch.optim as optim\n", "from torchvision import transforms\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "seed = 42\n", "torch.manual_seed(seed)\n", "np.random.seed(seed)\n", "# If you are using CUDA, set this for further deterministic behavior\n", "if torch.cuda.is_available():\n", " torch.cuda.manual_seed(seed)\n", " torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.\n", " # Below settings are recommended for deterministic behavior when using specific convolution operations,\n", " # but may reduce performance\n", " torch.backends.cudnn.deterministic = True\n", " torch.backends.cudnn.benchmark = False" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cpu\n" ] } ], "source": [ "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n", "CUDA_LAUNCH_BLOCKING=1\n", "TORCH_USE_CUDA_DSA=1\n", "print(device)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "transformations = transforms.Compose([\n", " transforms.Resize((224, 224)), # Resize the image to 224x224\n", " transforms.ToTensor() # Convert the image to a PyTorch tensor\n", "])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: 'C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDprocessed\\\\train\\\\calssification/rust'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m train_ds \u001b[38;5;241m=\u001b[39m \u001b[43mNWRD\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mC:\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mUsers\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mhasee\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mDesktop\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mGermany_2024\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mDataset\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mNWRDprocessed\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mcalssification\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtransform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtransformations\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m val_ds \u001b[38;5;241m=\u001b[39m NWRD(root_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mUsers\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mhasee\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mDesktop\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mGermany_2024\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mNWRDprocessed\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mval\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mcalssification\u001b[39m\u001b[38;5;124m\"\u001b[39m, train\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, transform\u001b[38;5;241m=\u001b[39mtransformations)\n\u001b[1;32m 4\u001b[0m train_loader \u001b[38;5;241m=\u001b[39m DataLoader(train_ds, batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8\u001b[39m, shuffle\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", "File \u001b[0;32m~/codes/crossvit/NWRD_dataset.py:12\u001b[0m, in \u001b[0;36mNWRD.__init__\u001b[0;34m(self, root_dir, transform, train)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mimages \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabels \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/codes/crossvit/NWRD_dataset.py:19\u001b[0m, in \u001b[0;36mNWRD.load_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 16\u001b[0m non_rust_dir \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mroot_dir, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon_rust\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 18\u001b[0m \u001b[38;5;66;03m# Load rust images\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m filename \u001b[38;5;129;01min\u001b[39;00m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlistdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrust_dir\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m 20\u001b[0m filepath \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(rust_dir, filename)\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mimages\u001b[38;5;241m.\u001b[39mappend(filepath)\n", "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDprocessed\\\\train\\\\calssification/rust'" ] } ], "source": [ "train_ds = NWRD(root_dir=\"C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDprocessed\\\\train\\\\calssification\", train=True, transform=transformations)\n", "val_ds = NWRD(root_dir=\"C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDprocessed\\\\val\\\\calssification\", train=False, transform=transformations)\n", " \n", "train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)\n", "val_loader = DataLoader(val_ds, batch_size=8, shuffle=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "mean = [0.485, 0.456, 0.406] # Mean values for RGB channels\n", "std = [0.229, 0.224, 0.225] # Standard deviation values for RGB channels\n", "#processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224',transform={'mean': mean, 'std': std})\n", "processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')\n", "model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')\n", "# processor.image_mean=mean\n", "# processor.image_std=std\n", "#print(processor)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ViTForImageClassification(\n", " (vit): ViTModel(\n", " (embeddings): ViTEmbeddings(\n", " (patch_embeddings): ViTPatchEmbeddings(\n", " (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))\n", " )\n", " (dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " (encoder): ViTEncoder(\n", " (layer): ModuleList(\n", " (0-11): 12 x ViTLayer(\n", " (attention): ViTSdpaAttention(\n", " (attention): ViTSdpaSelfAttention(\n", " (query): Linear(in_features=768, out_features=768, bias=True)\n", " (key): Linear(in_features=768, out_features=768, bias=True)\n", " (value): Linear(in_features=768, out_features=768, bias=True)\n", " (dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " (output): ViTSelfOutput(\n", " (dense): Linear(in_features=768, out_features=768, bias=True)\n", " (dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " )\n", " (intermediate): ViTIntermediate(\n", " (dense): Linear(in_features=768, out_features=3072, bias=True)\n", " (intermediate_act_fn): GELUActivation()\n", " )\n", " (output): ViTOutput(\n", " (dense): Linear(in_features=3072, out_features=768, bias=True)\n", " (dropout): Dropout(p=0.0, inplace=False)\n", " )\n", " (layernorm_before): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " (layernorm_after): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " )\n", " )\n", " )\n", " (layernorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n", " )\n", " (classifier): Linear(in_features=768, out_features=2, bias=True)\n", ")" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.classifier = torch.nn.Linear(model.config.hidden_size, 2)\n", "model.to(device)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Finetuning of the model based on pretraining weights." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# model_weights = torch.load('/home/Hirra/coding_files/crossvit/weights/wandb_vit_base_final_med_val_NWRD_epoch_50_lr_0.000000001_wd_0.001_batch_size_8_unaugmented_unequlaized/49.pth')\n", "# model.load_state_dict(model_weights.state_dict())" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "optimizer = optim.SGD(model.parameters(), lr=0.00000003, weight_decay=0.001)\n", "criterion = torch.nn.CrossEntropyLoss()\n", "weights_directory = 'wandb_vit_base_final_for_time_NWRD_epoch_50_lr_0.000000003_wd_0.001_batch_size_8_unaugmented_training'\n", "weight_loc = f\"weights/{weights_directory}\"\n", "os.makedirs(weight_loc, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mgptautomated\u001b[0m (\u001b[33mtukl_labwork\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: C:\\Users\\hasee\\.netrc\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import wandb, os\n", "#wandb.login()\n", "wandb.login(key=\"4e8a21c26ae61cced8d70053c80bbe1b112fec12\")\n", "#4e8a21c26ae61cced8d70053c80bbe1b112fec12" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "env: WANDB_PROJECT=crossvit_rust_classifier_new\n" ] } ], "source": [ "%env WANDB_PROJECT=crossvit_rust_classifier_new\n", "os.environ[\"WANDB_PROJECT\"] = \"\"\n", "os.environ[\"WANDB_REPORT_TO\"] = \"wandb\"" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Changes to your `wandb` environment variables will be ignored because your `wandb` session has already started. For more information on how to modify your settings with `wandb.init()` arguments, please refer to the W&B docs." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "wandb version 0.17.3 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.17.2" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in c:\\Users\\hasee\\Desktop\\Germany_2024\\codes\\crossvit\\wandb\\run-20240626_161631-bgtm3oyt" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run glamorous-wood-74 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/tukl_labwork/uncategorized" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/tukl_labwork/uncategorized/runs/bgtm3oyt" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/241 [00:00 22\u001b[0m train_losses\u001b[38;5;241m.\u001b[39mappend(\u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 23\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[0;32m 24\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mstep()\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "wandb.init()\n", "\n", "best_epoch = {}\n", "train_losses = []\n", "for epoch in range(50):\n", " model.train\n", " train_losses=[]\n", " loop = tqdm(enumerate(train_loader), total=len(train_loader))\n", " for batch_idx, (images, labels) in loop:\n", " inputs = processor(images=images, return_tensors=\"pt\", do_rescale=False).to(device)\n", " labels = labels.to(device)\n", "\n", " outputs = model(**inputs)\n", " logits = outputs.logits\n", " predication = logits.argmax(axis=1)\n", " \n", " # print(\"logits\", logits)\n", " # print(\"prediction\", predication)\n", " # print(\"labels\", labels)\n", " \n", " loss = criterion(logits, labels)\n", " train_losses.append(loss.item())\n", " loss.backward()\n", " optimizer.step()\n", " loop.set_description(f\"Epoch {epoch} train Loss {np.mean(train_losses):.4f}\")\n", "\n", "\n", " print(\"Epoch \"+str(epoch)+\" Train Loss \"+str(np.mean(train_losses)))\n", " torch.save(model, weight_loc+'/{}.pth'.format(epoch))\n", " wandb.log({\"train_loss\": np.mean(train_losses), \"epoch\": epoch})\n", "\n", " #validation\n", " optimizer.zero_grad()\n", " model.eval\n", " val_losses=[]\n", "\n", " loop = tqdm(enumerate(val_loader), total=len(val_loader))\n", " with torch.no_grad():\n", " for batch_idx, (images, labels) in loop:\n", " inputs = processor(images=images, return_tensors=\"pt\", do_rescale=False).to(device)\n", " labels = labels.to(device)\n", "\n", " outputs = model(**inputs)\n", " logits = outputs.logits\n", " \n", " loss = criterion(logits, labels)\n", " val_losses.append(loss.item())\n", "\n", " predication = logits.argmax(axis=1)\n", "\n", " loss = criterion(logits, labels)\n", " val_losses.append(loss.item())\n", " \n", " loop.set_description(f\"Epoch {epoch} Val Loss {np.mean(val_losses):.4f}\")\n", " wandb.log({\"val_loss\": np.mean(val_losses), \"epoch\": epoch})\n", "torch.cuda.empty_cache()\n" ] } ], "metadata": { "kernelspec": { "display_name": "crossvit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.19" } }, "nbformat": 4, "nbformat_minor": 2 }