Upload torch_audio_classification_demo.ipynb

Browse files

Files changed (1) hide show

Torch_audio_classification/torch_audio_classification_demo.ipynb +443 -0

Torch_audio_classification/torch_audio_classification_demo.ipynb ADDED Viewed

	@@ -0,0 +1,443 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import librosa\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import matplotlib.pyplot as plt\n",
+    "from torch.utils.data import DataLoader, Dataset, random_split\n",
+    "\n",
+    "def get_mfcc(wav_file_path):\n",
+    "    y, sr = librosa.load(wav_file_path, offset=0, duration=30)\n",
+    "    mfcc = np.array(librosa.feature.mfcc(y=y, sr=sr))\n",
+    "    return mfcc\n",
+    "\n",
+    "def get_melspectrogram(wav_file_path):\n",
+    "    y, sr = librosa.load(wav_file_path, offset=0, duration=30)\n",
+    "    melspectrogram = np.array(librosa.feature.melspectrogram(y=y, sr=sr))\n",
+    "    return melspectrogram\n",
+    "\n",
+    "def get_chroma_vector(wav_file_path):\n",
+    "    y, sr = librosa.load(wav_file_path, offset=0 , duration= 30)\n",
+    "    chroma = np.array(librosa.feature.chroma_stft(y=y, sr=sr))\n",
+    "    return chroma\n",
+    "\n",
+    "def get_tonnetz(wav_file_path):\n",
+    "    y, sr = librosa.load(wav_file_path, offset=0, duration= 30)\n",
+    "    tonnetz = np.array(librosa.feature.tonnetz(y=y, sr=sr))\n",
+    "    return tonnetz\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_feature(file_path):\n",
+    "    # Extracting MFCC feature\n",
+    "    mfcc = get_mfcc(file_path)\n",
+    "    mfcc_mean = mfcc.mean(axis=1)\n",
+    "    mfcc_min = mfcc.min(axis=1)\n",
+    "    mfcc_max = mfcc.max(axis=1)\n",
+    "    mfcc_feature = np.concatenate((mfcc_mean, mfcc_min, mfcc_max))\n",
+    "\n",
+    "    # Extracting Mel Spectrogram feature\n",
+    "    melspectrogram = get_melspectrogram(file_path)\n",
+    "    melspectrogram_mean = melspectrogram.mean(axis=1)\n",
+    "    melspectrogram_min = melspectrogram.min(axis=1)\n",
+    "    melspectrogram_max = melspectrogram.max(axis=1)\n",
+    "    melspectrogram_feature = np.concatenate((melspectrogram_mean, melspectrogram_min, melspectrogram_max))\n",
+    "\n",
+    "    # Extracting chroma vector feature\n",
+    "    chroma = get_chroma_vector(file_path)\n",
+    "    chroma_mean = chroma.mean(axis=1)\n",
+    "    chroma_min = chroma.min(axis=1)\n",
+    "    chroma_max = chroma.max(axis=1)\n",
+    "    chroma_feature = np.concatenate((chroma_mean, chroma_min, chroma_max))\n",
+    "\n",
+    "    # Extracting tonnetz feature\n",
+    "    tntz = get_tonnetz(file_path)\n",
+    "    tntz_mean = tntz.mean(axis=1)\n",
+    "    tntz_min = tntz.min(axis=1)\n",
+    "    tntz_max = tntz.max(axis=1)\n",
+    "    tntz_feature = np.concatenate((tntz_mean, tntz_min, tntz_max)) \n",
+    "\n",
+    "    feature = np.concatenate((chroma_feature, melspectrogram_feature, mfcc_feature, tntz_feature))\n",
+    "    \n",
+    "    # Reshape to fixed size (for example, 128x128)\n",
+    "    feature = np.resize(feature, (128, 128))  # Resize to 128x128\n",
+    "    return feature\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define a custom dataset\n",
+    "class AudioDataset(Dataset):\n",
+    "    def __init__(self, directory, genres):\n",
+    "        self.features = []\n",
+    "        self.labels = []\n",
+    "        for genre in genres:\n",
+    "            print(\"Calculating features for genre: \" + genre)\n",
+    "            for file in os.listdir(os.path.join(directory, genre)):\n",
+    "                file_path = os.path.join(directory, genre, file)\n",
+    "                self.features.append(get_feature(file_path))\n",
+    "                label = genres.index(genre)\n",
+    "                self.labels.append(label)\n",
+    "        \n",
+    "        self.features = np.array(self.features)\n",
+    "        self.labels = np.array(self.labels)\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self.labels)\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        feature = torch.tensor(self.features[idx], dtype=torch.float32)\n",
+    "        label = torch.tensor(self.labels[idx], dtype=torch.long)\n",
+    "        feature = feature.unsqueeze(0)  # Add channel dimension\n",
+    "        return feature, label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Calculating features for genre: blues\n",
+      "Calculating features for genre: classical\n",
+      "Calculating features for genre: metal\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Data Preparation\n",
+    "directory = 'd:/Coding/audio_dl_tf/dataset'\n",
+    "genres = ['blues', 'classical', 'metal']\n",
+    "\n",
+    "dataset = AudioDataset(directory, genres)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split dataset\n",
+    "train_size = int(0.6 * len(dataset))\n",
+    "val_size = int(0.2 * len(dataset))\n",
+    "test_size = len(dataset) - train_size - val_size\n",
+    "\n",
+    "train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])\n",
+    "\n",
+    "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
+    "val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)\n",
+    "test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# VGG Model definition\n",
+    "VGG_types = {\n",
+    "    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n",
+    "    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n",
+    "    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],\n",
+    "    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],\n",
+    "}\n",
+    "\n",
+    "class VGG_net(nn.Module):\n",
+    "    def __init__(self, in_channels=1, num_classes=3):\n",
+    "        super(VGG_net, self).__init__()\n",
+    "        self.in_channels = in_channels\n",
+    "        self.conv_layers = self.create_conv_layers(VGG_types['VGG16'])\n",
+    "        \n",
+    "        self.flatten = nn.Flatten()\n",
+    "        \n",
+    "        self.fc1 = nn.Linear(self.calculate_flatten_dim(), 4096)\n",
+    "        self.fc2 = nn.Linear(4096, 4096)\n",
+    "        self.fc3 = nn.Linear(4096, num_classes)\n",
+    "\n",
+    "        self.relu = nn.ReLU()\n",
+    "        self.dropout = nn.Dropout(p=0.5)\n",
+    "    \n",
+    "    def forward(self, x):\n",
+    "        x = self.conv_layers(x)\n",
+    "        x = self.flatten(x)\n",
+    "        x = self.relu(self.fc1(x))\n",
+    "        x = self.dropout(x)\n",
+    "        x = self.relu(self.fc2(x))\n",
+    "        x = self.dropout(x)\n",
+    "        x = self.fc3(x)\n",
+    "        return x\n",
+    "\n",
+    "    def create_conv_layers(self, architecture):\n",
+    "        layers = []\n",
+    "        in_channels = self.in_channels\n",
+    "\n",
+    "        for x in architecture:\n",
+    "            if type(x) == int:\n",
+    "                out_channels = x\n",
+    "\n",
+    "                layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels,\n",
+    "                                     kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),\n",
+    "                           nn.BatchNorm2d(x),\n",
+    "                           nn.ReLU()]\n",
+    "                in_channels = x\n",
+    "            elif x == 'M':\n",
+    "                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]\n",
+    "        return nn.Sequential(*layers)\n",
+    "    \n",
+    "    def calculate_flatten_dim(self):\n",
+    "        with torch.no_grad():\n",
+    "            sample_input = torch.zeros((1, self.in_channels, 128, 128))\n",
+    "            sample_output = self.conv_layers(sample_input)\n",
+    "            return sample_output.numel()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize the model\n",
+    "device = 'cuda' if torch.cuda.is_available() else 'cpu' \n",
+    "model = VGG_net(in_channels=1, num_classes=len(genres)).to(device)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([32, 3])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Test forward pass\n",
+    "sample_data, _ = next(iter(train_loader))\n",
+    "sample_data = sample_data.to(device)  # Add channel dimension\n",
+    "print(model(sample_data).shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Training and evaluation functions\n",
+    "def train(model, dataloader, criterion, optimizer, device):\n",
+    "    model.train()\n",
+    "    running_loss = 0.0\n",
+    "    correct = 0\n",
+    "    total = 0\n",
+    "    for inputs, labels in dataloader:\n",
+    "        inputs, labels = inputs.to(device), labels.to(device)\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(inputs)\n",
+    "        loss = criterion(outputs, labels)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        running_loss += loss.item() * inputs.size(0)\n",
+    "        _, predicted = torch.max(outputs, 1)\n",
+    "        total += labels.size(0)\n",
+    "        correct += (predicted == labels).sum().item()\n",
+    "    epoch_loss = running_loss / len(dataloader.dataset)\n",
+    "    epoch_acc = correct / total\n",
+    "    return epoch_loss, epoch_acc\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate(model, dataloader, criterion, device):\n",
+    "    model.eval()\n",
+    "    running_loss = 0.0\n",
+    "    correct = 0\n",
+    "    total = 0\n",
+    "    with torch.no_grad():\n",
+    "        for inputs, labels in dataloader:\n",
+    "            inputs, labels = inputs.to(device), labels.to(device)\n",
+    "            outputs = model(inputs)\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            running_loss += loss.item() * inputs.size(0)\n",
+    "            _, predicted = torch.max(outputs, 1)\n",
+    "            total += labels.size(0)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "    epoch_loss = running_loss / len(dataloader.dataset)\n",
+    "    epoch_acc = correct / total\n",
+    "    return epoch_loss, epoch_acc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/20\n",
+      "Train Loss: 8.2556, Train Acc: 0.3500\n",
+      "Val Loss: 4557.6196, Val Acc: 0.3000\n",
+      "Model saved!\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[40], line 15\u001b[0m\n\u001b[0;32m     12\u001b[0m best_val_acc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.0\u001b[39m\n\u001b[0;32m     14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m epoch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(num_epochs):\n\u001b[1;32m---> 15\u001b[0m     train_loss, train_acc \u001b[38;5;241m=\u001b[39m \u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain_loader\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcriterion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptimizer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     16\u001b[0m     val_loss, val_acc \u001b[38;5;241m=\u001b[39m evaluate(model, val_loader, criterion, device)\n\u001b[0;32m     18\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEpoch \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mepoch\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m1\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_epochs\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
+      "Cell \u001b[1;32mIn[38], line 10\u001b[0m, in \u001b[0;36mtrain\u001b[1;34m(model, dataloader, criterion, optimizer, device)\u001b[0m\n\u001b[0;32m      8\u001b[0m inputs, labels \u001b[38;5;241m=\u001b[39m inputs\u001b[38;5;241m.\u001b[39mto(device), labels\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[0;32m      9\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m---> 10\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     11\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(outputs, labels)\n\u001b[0;32m     12\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1530\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m   1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1532\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m   1539\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1540\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1541\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   1544\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "Cell \u001b[1;32mIn[35], line 25\u001b[0m, in \u001b[0;36mVGG_net.forward\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m     24\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m---> 25\u001b[0m     x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv_layers\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     26\u001b[0m     x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mflatten(x)\n\u001b[0;32m     27\u001b[0m     x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrelu(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfc1(x))\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1530\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m   1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1532\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m   1539\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1540\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1541\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   1544\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\container.py:217\u001b[0m, in \u001b[0;36mSequential.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m    215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m):\n\u001b[0;32m    216\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[1;32m--> 217\u001b[0m         \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m    218\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28minput\u001b[39m\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1530\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m   1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1532\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m   1539\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1540\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1541\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   1544\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\conv.py:460\u001b[0m, in \u001b[0;36mConv2d.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m    459\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[1;32m--> 460\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_conv_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\conv.py:456\u001b[0m, in \u001b[0;36mConv2d._conv_forward\u001b[1;34m(self, input, weight, bias)\u001b[0m\n\u001b[0;32m    452\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mzeros\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[0;32m    453\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m F\u001b[38;5;241m.\u001b[39mconv2d(F\u001b[38;5;241m.\u001b[39mpad(\u001b[38;5;28minput\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reversed_padding_repeated_twice, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode),\n\u001b[0;32m    454\u001b[0m                     weight, bias, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstride,\n\u001b[0;32m    455\u001b[0m                     _pair(\u001b[38;5;241m0\u001b[39m), \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdilation, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgroups)\n\u001b[1;32m--> 456\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv2d\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    457\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroups\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "import torch.optim as optim\n",
+    "\n",
+    "# Hyperparameters\n",
+    "num_epochs = 20\n",
+    "learning_rate = 0.001\n",
+    "\n",
+    "# Loss and optimizer\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "# Training and validation\n",
+    "best_val_acc = 0.0\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)\n",
+    "    val_loss, val_acc = evaluate(model, val_loader, criterion, device)\n",
+    "\n",
+    "    print(f\"Epoch {epoch+1}/{num_epochs}\")\n",
+    "    print(f\"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}\")\n",
+    "    print(f\"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}\")\n",
+    "\n",
+    "    # Save the best model\n",
+    "    if val_acc > best_val_acc:\n",
+    "        best_val_acc = val_acc\n",
+    "        torch.save(model.state_dict(), 'best_model.pth')\n",
+    "        print(\"Model saved!\")\n",
+    "\n",
+    "# Load the best model\n",
+    "model.load_state_dict(torch.load('best_model.pth'))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train(model, dataloader, criterion, optimizer, device):\n",
+    "    model.train()\n",
+    "    running_loss = 0.0\n",
+    "    correct = 0\n",
+    "    total = 0\n",
+    "    for inputs, labels in dataloader:\n",
+    "        inputs, labels = inputs.to(device), labels.to(device)\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(inputs)\n",
+    "        loss = criterion(outputs, labels)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        running_loss += loss.item() * inputs.size(0)\n",
+    "        _, predicted = torch.max(outputs, 1)\n",
+    "        total += labels.size(0)\n",
+    "        correct += (predicted == labels).sum().item()\n",
+    "    epoch_loss = running_loss / len(dataloader.dataset)\n",
+    "    epoch_acc = correct / total\n",
+    "    return epoch_loss, epoch_acc\n",
+    "\n",
+    "def evaluate(model, dataloader, criterion, device):\n",
+    "    model.eval()\n",
+    "    running_loss = 0.0\n",
+    "    correct = 0\n",
+    "    total = 0\n",
+    "    with torch.no_grad():\n",
+    "        for inputs, labels in dataloader:\n",
+    "            inputs, labels = inputs.to(device), labels.to(device)\n",
+    "            outputs = model(inputs)\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            running_loss += loss.item() * inputs.size(0)\n",
+    "            _, predicted = torch.max(outputs, 1)\n",
+    "            total += labels.size(0)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "    epoch_loss = running_loss / len(dataloader.dataset)\n",
+    "    epoch_acc = correct / total\n",
+    "    return epoch_loss, epoch_acc\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "torch_dl",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}