{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import librosa\n", "import torch.nn as nn\n", "import numpy as np\n", "import os\n", "import matplotlib.pyplot as plt\n", "from torch.utils.data import DataLoader, Dataset, random_split\n", "\n", "def get_mfcc(wav_file_path):\n", " y, sr = librosa.load(wav_file_path, offset=0, duration=30)\n", " mfcc = np.array(librosa.feature.mfcc(y=y, sr=sr))\n", " return mfcc\n", "\n", "def get_melspectrogram(wav_file_path):\n", " y, sr = librosa.load(wav_file_path, offset=0, duration=30)\n", " melspectrogram = np.array(librosa.feature.melspectrogram(y=y, sr=sr))\n", " return melspectrogram\n", "\n", "def get_chroma_vector(wav_file_path):\n", " y, sr = librosa.load(wav_file_path, offset=0 , duration= 30)\n", " chroma = np.array(librosa.feature.chroma_stft(y=y, sr=sr))\n", " return chroma\n", "\n", "def get_tonnetz(wav_file_path):\n", " y, sr = librosa.load(wav_file_path, offset=0, duration= 30)\n", " tonnetz = np.array(librosa.feature.tonnetz(y=y, sr=sr))\n", " return tonnetz\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def get_feature(file_path):\n", " # Extracting MFCC feature\n", " mfcc = get_mfcc(file_path)\n", " mfcc_mean = mfcc.mean(axis=1)\n", " mfcc_min = mfcc.min(axis=1)\n", " mfcc_max = mfcc.max(axis=1)\n", " mfcc_feature = np.concatenate((mfcc_mean, mfcc_min, mfcc_max))\n", "\n", " # Extracting Mel Spectrogram feature\n", " melspectrogram = get_melspectrogram(file_path)\n", " melspectrogram_mean = melspectrogram.mean(axis=1)\n", " melspectrogram_min = melspectrogram.min(axis=1)\n", " melspectrogram_max = melspectrogram.max(axis=1)\n", " melspectrogram_feature = np.concatenate((melspectrogram_mean, melspectrogram_min, melspectrogram_max))\n", "\n", " # Extracting chroma vector feature\n", " chroma = get_chroma_vector(file_path)\n", " chroma_mean = chroma.mean(axis=1)\n", " chroma_min = chroma.min(axis=1)\n", " chroma_max = chroma.max(axis=1)\n", " chroma_feature = np.concatenate((chroma_mean, chroma_min, chroma_max))\n", "\n", " # Extracting tonnetz feature\n", " tntz = get_tonnetz(file_path)\n", " tntz_mean = tntz.mean(axis=1)\n", " tntz_min = tntz.min(axis=1)\n", " tntz_max = tntz.max(axis=1)\n", " tntz_feature = np.concatenate((tntz_mean, tntz_min, tntz_max)) \n", "\n", " feature = np.concatenate((chroma_feature, melspectrogram_feature, mfcc_feature, tntz_feature))\n", " \n", " # Reshape to fixed size (for example, 128x128)\n", " feature = np.resize(feature, (128, 128)) # Resize to 128x128\n", " return feature\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# Define a custom dataset\n", "class AudioDataset(Dataset):\n", " def __init__(self, directory, genres):\n", " self.features = []\n", " self.labels = []\n", " for genre in genres:\n", " print(\"Calculating features for genre: \" + genre)\n", " for file in os.listdir(os.path.join(directory, genre)):\n", " file_path = os.path.join(directory, genre, file)\n", " self.features.append(get_feature(file_path))\n", " label = genres.index(genre)\n", " self.labels.append(label)\n", " \n", " self.features = np.array(self.features)\n", " self.labels = np.array(self.labels)\n", "\n", " def __len__(self):\n", " return len(self.labels)\n", "\n", " def __getitem__(self, idx):\n", " feature = torch.tensor(self.features[idx], dtype=torch.float32)\n", " label = torch.tensor(self.labels[idx], dtype=torch.long)\n", " feature = feature.unsqueeze(0) # Add channel dimension\n", " return feature, label" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Calculating features for genre: blues\n", "Calculating features for genre: classical\n", "Calculating features for genre: metal\n" ] } ], "source": [ "# Data Preparation\n", "directory = 'd:/Coding/audio_dl_tf/dataset'\n", "genres = ['blues', 'classical', 'metal']\n", "\n", "dataset = AudioDataset(directory, genres)\n", "\n" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "# Split dataset\n", "train_size = int(0.6 * len(dataset))\n", "val_size = int(0.2 * len(dataset))\n", "test_size = len(dataset) - train_size - val_size\n", "\n", "train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])\n", "\n", "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n", "val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)\n", "test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "# VGG Model definition\n", "VGG_types = {\n", " 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n", " 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],\n", " 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],\n", " 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],\n", "}\n", "\n", "class VGG_net(nn.Module):\n", " def __init__(self, in_channels=1, num_classes=3):\n", " super(VGG_net, self).__init__()\n", " self.in_channels = in_channels\n", " self.conv_layers = self.create_conv_layers(VGG_types['VGG16'])\n", " \n", " self.flatten = nn.Flatten()\n", " \n", " self.fc1 = nn.Linear(self.calculate_flatten_dim(), 4096)\n", " self.fc2 = nn.Linear(4096, 4096)\n", " self.fc3 = nn.Linear(4096, num_classes)\n", "\n", " self.relu = nn.ReLU()\n", " self.dropout = nn.Dropout(p=0.5)\n", " \n", " def forward(self, x):\n", " x = self.conv_layers(x)\n", " x = self.flatten(x)\n", " x = self.relu(self.fc1(x))\n", " x = self.dropout(x)\n", " x = self.relu(self.fc2(x))\n", " x = self.dropout(x)\n", " x = self.fc3(x)\n", " return x\n", "\n", " def create_conv_layers(self, architecture):\n", " layers = []\n", " in_channels = self.in_channels\n", "\n", " for x in architecture:\n", " if type(x) == int:\n", " out_channels = x\n", "\n", " layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels,\n", " kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),\n", " nn.BatchNorm2d(x),\n", " nn.ReLU()]\n", " in_channels = x\n", " elif x == 'M':\n", " layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]\n", " return nn.Sequential(*layers)\n", " \n", " def calculate_flatten_dim(self):\n", " with torch.no_grad():\n", " sample_input = torch.zeros((1, self.in_channels, 128, 128))\n", " sample_output = self.conv_layers(sample_input)\n", " return sample_output.numel()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "# Initialize the model\n", "device = 'cuda' if torch.cuda.is_available() else 'cpu' \n", "model = VGG_net(in_channels=1, num_classes=len(genres)).to(device)\n", "\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([32, 3])\n" ] } ], "source": [ "# Test forward pass\n", "sample_data, _ = next(iter(train_loader))\n", "sample_data = sample_data.to(device) # Add channel dimension\n", "print(model(sample_data).shape)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "# Training and evaluation functions\n", "def train(model, dataloader, criterion, optimizer, device):\n", " model.train()\n", " running_loss = 0.0\n", " correct = 0\n", " total = 0\n", " for inputs, labels in dataloader:\n", " inputs, labels = inputs.to(device), labels.to(device)\n", " optimizer.zero_grad()\n", " outputs = model(inputs)\n", " loss = criterion(outputs, labels)\n", " loss.backward()\n", " optimizer.step()\n", " running_loss += loss.item() * inputs.size(0)\n", " _, predicted = torch.max(outputs, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", " epoch_loss = running_loss / len(dataloader.dataset)\n", " epoch_acc = correct / total\n", " return epoch_loss, epoch_acc\n", "\n" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "def evaluate(model, dataloader, criterion, device):\n", " model.eval()\n", " running_loss = 0.0\n", " correct = 0\n", " total = 0\n", " with torch.no_grad():\n", " for inputs, labels in dataloader:\n", " inputs, labels = inputs.to(device), labels.to(device)\n", " outputs = model(inputs)\n", " loss = criterion(outputs, labels)\n", " running_loss += loss.item() * inputs.size(0)\n", " _, predicted = torch.max(outputs, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", " epoch_loss = running_loss / len(dataloader.dataset)\n", " epoch_acc = correct / total\n", " return epoch_loss, epoch_acc" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/20\n", "Train Loss: 8.2556, Train Acc: 0.3500\n", "Val Loss: 4557.6196, Val Acc: 0.3000\n", "Model saved!\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[40], line 15\u001b[0m\n\u001b[0;32m 12\u001b[0m best_val_acc \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0.0\u001b[39m\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m epoch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(num_epochs):\n\u001b[1;32m---> 15\u001b[0m train_loss, train_acc \u001b[38;5;241m=\u001b[39m \u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain_loader\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcriterion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptimizer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 16\u001b[0m val_loss, val_acc \u001b[38;5;241m=\u001b[39m evaluate(model, val_loader, criterion, device)\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEpoch \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mepoch\u001b[38;5;241m+\u001b[39m\u001b[38;5;241m1\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_epochs\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", "Cell \u001b[1;32mIn[38], line 10\u001b[0m, in \u001b[0;36mtrain\u001b[1;34m(model, dataloader, criterion, optimizer, device)\u001b[0m\n\u001b[0;32m 8\u001b[0m inputs, labels \u001b[38;5;241m=\u001b[39m inputs\u001b[38;5;241m.\u001b[39mto(device), labels\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[0;32m 9\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m---> 10\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 11\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(outputs, labels)\n\u001b[0;32m 12\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1532\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1539\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1540\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1541\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1544\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "Cell \u001b[1;32mIn[35], line 25\u001b[0m, in \u001b[0;36mVGG_net.forward\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m---> 25\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv_layers\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 26\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mflatten(x)\n\u001b[0;32m 27\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrelu(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfc1(x))\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1532\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1539\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1540\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1541\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1544\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\container.py:217\u001b[0m, in \u001b[0;36mSequential.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m 215\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m):\n\u001b[0;32m 216\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\n\u001b[1;32m--> 217\u001b[0m \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 218\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28minput\u001b[39m\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m 1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1532\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m 1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m 1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m 1539\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m 1540\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1541\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 1544\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\conv.py:460\u001b[0m, in \u001b[0;36mConv2d.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m 459\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[1;32m--> 460\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_conv_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32md:\\Coding\\audio_dl_torch\\torch_dl\\lib\\site-packages\\torch\\nn\\modules\\conv.py:456\u001b[0m, in \u001b[0;36mConv2d._conv_forward\u001b[1;34m(self, input, weight, bias)\u001b[0m\n\u001b[0;32m 452\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mzeros\u001b[39m\u001b[38;5;124m'\u001b[39m:\n\u001b[0;32m 453\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m F\u001b[38;5;241m.\u001b[39mconv2d(F\u001b[38;5;241m.\u001b[39mpad(\u001b[38;5;28minput\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reversed_padding_repeated_twice, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode),\n\u001b[0;32m 454\u001b[0m weight, bias, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstride,\n\u001b[0;32m 455\u001b[0m _pair(\u001b[38;5;241m0\u001b[39m), \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdilation, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgroups)\n\u001b[1;32m--> 456\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv2d\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroups\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "import torch.optim as optim\n", "\n", "# Hyperparameters\n", "num_epochs = 20\n", "learning_rate = 0.001\n", "\n", "# Loss and optimizer\n", "criterion = nn.CrossEntropyLoss()\n", "optimizer = optim.Adam(model.parameters(), lr=learning_rate)\n", "\n", "# Training and validation\n", "best_val_acc = 0.0\n", "\n", "for epoch in range(num_epochs):\n", " train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)\n", " val_loss, val_acc = evaluate(model, val_loader, criterion, device)\n", "\n", " print(f\"Epoch {epoch+1}/{num_epochs}\")\n", " print(f\"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}\")\n", " print(f\"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}\")\n", "\n", " # Save the best model\n", " if val_acc > best_val_acc:\n", " best_val_acc = val_acc\n", " torch.save(model.state_dict(), 'best_model.pth')\n", " print(\"Model saved!\")\n", "\n", "# Load the best model\n", "model.load_state_dict(torch.load('best_model.pth'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def train(model, dataloader, criterion, optimizer, device):\n", " model.train()\n", " running_loss = 0.0\n", " correct = 0\n", " total = 0\n", " for inputs, labels in dataloader:\n", " inputs, labels = inputs.to(device), labels.to(device)\n", " optimizer.zero_grad()\n", " outputs = model(inputs)\n", " loss = criterion(outputs, labels)\n", " loss.backward()\n", " optimizer.step()\n", " running_loss += loss.item() * inputs.size(0)\n", " _, predicted = torch.max(outputs, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", " epoch_loss = running_loss / len(dataloader.dataset)\n", " epoch_acc = correct / total\n", " return epoch_loss, epoch_acc\n", "\n", "def evaluate(model, dataloader, criterion, device):\n", " model.eval()\n", " running_loss = 0.0\n", " correct = 0\n", " total = 0\n", " with torch.no_grad():\n", " for inputs, labels in dataloader:\n", " inputs, labels = inputs.to(device), labels.to(device)\n", " outputs = model(inputs)\n", " loss = criterion(outputs, labels)\n", " running_loss += loss.item() * inputs.size(0)\n", " _, predicted = torch.max(outputs, 1)\n", " total += labels.size(0)\n", " correct += (predicted == labels).sum().item()\n", " epoch_loss = running_loss / len(dataloader.dataset)\n", " epoch_acc = correct / total\n", " return epoch_loss, epoch_acc\n" ] } ], "metadata": { "kernelspec": { "display_name": "torch_dl", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 2 }