AlexTolstenko commited on
Commit
4805274
·
1 Parent(s): fcef97c

Upload model.py

Browse files
Files changed (1) hide show
  1. model.py +237 -0
model.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np # linear algebra
2
+ import pandas as pd
3
+ import os
4
+ import matplotlib.pylab as plt
5
+
6
+ from glob import glob
7
+ import tqdm
8
+ import librosa as lr
9
+ import librosa.display
10
+ import IPython.display as ipd
11
+ from sklearn.model_selection import train_test_split
12
+ from sklearn.preprocessing import OneHotEncoder
13
+ from PIL import Image
14
+ import pathlib
15
+ import time
16
+ import threading
17
+ import queue
18
+
19
+ import torch
20
+ import torch.nn as nn
21
+ import pytorch_lightning as pl
22
+ from torch.utils.data.dataset import Dataset
23
+ from torchmetrics.functional import accuracy
24
+
25
+ from torch.utils.data import DataLoader
26
+
27
+ import gradio
28
+
29
+ import wandb
30
+
31
+ from sklearn.preprocessing import StandardScaler
32
+
33
+ from itertools import cycle
34
+
35
+ # LOAD AUDIO
36
+ SAMPLE_RATE = 16000
37
+ DURATION = 3
38
+
39
+ # GET MFCC
40
+ N_MFCC = 50
41
+ WIN_LENGTH = 2048
42
+ WINDOW = 'hann'
43
+ HOP_LENGTH = 512
44
+
45
+ # GET MELSPEC
46
+ N_FFT = 1024
47
+ WIN_LENGTH_MEL_SPEC = 512
48
+ WINDOW_MEL_SPEC = 'hamming'
49
+ HOP_LENGTH_MEL_SPEC = 256
50
+ N_MELS = 128
51
+ FMAX = SAMPLE_RATE/2
52
+
53
+ # HYPERPARAMS
54
+ EPOCHS = 200
55
+ BATCH_SIZE = 32
56
+ NUM_OF_CLASSES = 14
57
+
58
+ class MFCC_CNN(pl.LightningModule):
59
+ def __init__(self, num_of_classes):
60
+ super(MFCC_CNN, self).__init__()
61
+
62
+ self.example_input_array = torch.Tensor(32, 1, 50, 94)
63
+ self.train_loss_output = []
64
+ self.train_acc_output = []
65
+ self.val_acc_output = []
66
+ self.val_loss_output = []
67
+
68
+ self.number_of_classes = num_of_classes
69
+
70
+ self.conv_1 = nn.Sequential(
71
+ nn.Conv2d(in_channels = 1,
72
+ out_channels = 64,
73
+ kernel_size =3,
74
+ padding = 1,
75
+ stride = 1),
76
+ nn.BatchNorm2d(64),
77
+ nn.LeakyReLU(),
78
+ nn.MaxPool2d(kernel_size=2),
79
+ nn.Dropout(0.1)
80
+ )
81
+
82
+ self.conv_2 = nn.Sequential(
83
+ nn.Conv2d(in_channels = 64,
84
+ out_channels = 128,
85
+ kernel_size = 3,
86
+ padding = 1,
87
+ stride = 1),
88
+ nn.BatchNorm2d(128),
89
+ nn.LeakyReLU(),
90
+ nn.MaxPool2d(kernel_size=2),
91
+ nn.Dropout(0.1)
92
+ )
93
+
94
+ self.conv_3 = nn.Sequential(
95
+ nn.Conv2d(in_channels = 128,
96
+ out_channels = 256,
97
+ kernel_size = 3,
98
+ padding = 1,
99
+ stride = 1),
100
+ nn.BatchNorm2d(256),
101
+ nn.LeakyReLU(),
102
+ nn.MaxPool2d(kernel_size=2),
103
+ nn.Dropout(0.1)
104
+ )
105
+
106
+ self.conv_4 = nn.Sequential(
107
+ nn.Conv2d(in_channels = 256,
108
+ out_channels = 512,
109
+ kernel_size = 3,
110
+ padding = 1,
111
+ stride = 1),
112
+ nn.BatchNorm2d(512),
113
+ nn.LeakyReLU(),
114
+ nn.MaxPool2d(kernel_size=2)
115
+ )
116
+
117
+ self.conv_5 = nn.Sequential(
118
+ nn.Conv2d(in_channels = 512,
119
+ out_channels = 512,
120
+ kernel_size = 2,
121
+ padding = 0,
122
+ stride = 1),
123
+ nn.BatchNorm2d(512),
124
+ nn.LeakyReLU(),
125
+ nn.MaxPool2d(kernel_size=2)
126
+ )
127
+
128
+ self.drop = nn.Dropout(0.1)
129
+ self.lin_1 = nn.Linear(1024, 128)
130
+ self.lin_2 = nn.Linear(128, 64)
131
+ self.lin_3 = nn.Linear(64, num_of_classes)
132
+
133
+ self.relu = nn.ReLU()
134
+ self.softmax = nn.Softmax()
135
+
136
+ def forward(self, x):
137
+ out = self.conv_1(x)
138
+ out = self.conv_2(out)
139
+ out = self.conv_3(out)
140
+ out = self.conv_4(out)
141
+ out = self.conv_5(out)
142
+
143
+ out = torch.flatten(out, start_dim=1)
144
+
145
+ out = self.drop(self.lin_1(self.relu(out)))
146
+ out = self.drop(self.lin_2(self.relu(out)))
147
+ out = self.drop(self.lin_3(self.relu(out)))
148
+
149
+ out = self.softmax(out)
150
+
151
+ return out
152
+
153
+ def loss_fn(self, out, target):
154
+ return nn.CrossEntropyLoss()(input=out.view(-1, self.number_of_classes),
155
+ target=target)
156
+
157
+ def configure_optimizers(self):
158
+ LR=1e-3
159
+ optimizer = torch.optim.Adam(self.parameters(), lr=LR, weight_decay=1e-3)
160
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
161
+ mode='min',
162
+ factor=0.5,
163
+ patience=5,
164
+ verbose=True)
165
+
166
+ return {
167
+ 'optimizer': optimizer,
168
+ "lr_scheduler": {
169
+ "scheduler": scheduler,
170
+ "monitor": "val_loss",
171
+ 'interval': 'epoch',
172
+ 'frequency': 1
173
+ },
174
+ }
175
+
176
+ def training_step(self, batch, batch_idx):
177
+ mfcc, lable = batch
178
+ mfcc = mfcc.view(-1, 1, 50, 94)
179
+ lable = lable.view(-1, self.number_of_classes)
180
+
181
+ out = self(mfcc)
182
+
183
+ loss = self.loss_fn(out=out, target=lable)
184
+
185
+ lable = torch.argmax(lable,dim=1)
186
+ predictions = torch.argmax(out,dim=1)
187
+ accuracy = torch.sum(lable==predictions)/float(len(lable))
188
+
189
+ self.train_acc_output.append(accuracy.detach().numpy())
190
+ self.train_loss_output.append(loss.detach().numpy())
191
+ #wandb.log({'train_accuracy_step': accuracy, 'train_loss_step':loss})\
192
+
193
+ self.log('train_accuracy', accuracy, prog_bar=True, on_epoch=True, on_step=False)
194
+ self.log('train_loss', loss, prog_bar=True, on_epoch=True, on_step=False)
195
+ return loss
196
+
197
+ def validation_step(self, batch, batch_idx):
198
+ mfcc, lable = batch
199
+ mfcc = mfcc.view(-1, 1, 50, 94)
200
+ lable = lable.view(-1, self.number_of_classes)
201
+
202
+ out = self(mfcc)
203
+
204
+ loss = self.loss_fn(out=out, target=lable)
205
+
206
+ lable = torch.argmax(lable,dim=1)
207
+ predictions = torch.argmax(out,dim=1)
208
+ accuracy = torch.sum(lable==predictions)/float(len(lable))
209
+
210
+ self.val_acc_output.append(accuracy.detach().numpy())
211
+ self.val_loss_output.append(loss.detach().numpy())
212
+ #wandb.log({'val_accuracy_step': accuracy, 'val_loss_step':loss})
213
+
214
+ self.log('val_accuracy', accuracy, prog_bar=True, on_epoch=True)
215
+ self.log('val_loss', loss, prog_bar=True, on_epoch=True)
216
+
217
+ return loss
218
+
219
+ def on_train_epoch_end(self):
220
+ train_loss_epoch = self.train_loss_output
221
+ train_acc_epoch = self.train_acc_output
222
+
223
+ wandb.log({'train_loss_epoch':np.mean(train_loss_epoch),
224
+ 'train_acc_epoch':np.mean(train_acc_epoch)})
225
+
226
+ self.train_loss_output.clear()
227
+ self.train_acc_output.clear()
228
+
229
+ def on_validation_epoch_end(self):
230
+ val_loss_epoch = self.val_loss_output
231
+ val_acc_epoch = self.val_acc_output
232
+
233
+ wandb.log({'val_loss_epoch':np.mean(val_loss_epoch),
234
+ 'val_acc_epoch':np.mean(val_acc_epoch)})
235
+
236
+ self.val_acc_output.clear()
237
+ self.val_loss_output.clear()