speech_eMEOWtion / model.py
Tanishq
Upload 9 files
4dee9c4 verified
from torch import nn as nn
# class SpeechEmotionModel(nn.Module):
# def __init__(self):
# super(SpeechEmotionModel, self).__init__()
# self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True)
# self.dropout1 = nn.Dropout(0.2)
# self.fc1 = nn.Linear(256, 128)
# self.dropout2 = nn.Dropout(0.2)
# self.relu1 = nn.ReLU()
# self.fc2 = nn.Linear(128, 64)
# self.dropout3 = nn.Dropout(0.2)
# self.relu2 = nn.ReLU()
# self.fc3 = nn.Linear(64, 7)
# self.softmax = nn.Softmax(dim=1)
#
# def forward(self, x):
# x, _ = self.lstm(x)
# x = x[:, -1, :]
# x = self.dropout1(x)
# x = self.relu1(self.fc1(x))
# x = self.dropout2(x)
# x = self.relu2(self.fc2(x))
# x = self.dropout3(x)
# x = self.fc3(x)
# x = self.softmax(x)
# return x
class SpeechEmotionModel(nn.Module):
def __init__(self):
super(SpeechEmotionModel, self).__init__()
self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True, bidirectional=True)
self.batch_norm1 = nn.BatchNorm1d(512)
self.dropout1 = nn.Dropout(0.2)
self.fc1 = nn.Linear(512, 128)
self.batch_norm2 = nn.BatchNorm1d(128)
self.dropout2 = nn.Dropout(0.2)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, 64)
self.batch_norm3 = nn.BatchNorm1d(64)
self.dropout3 = nn.Dropout(0.2)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(64, 7)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
x, _ = self.lstm(x)
x = x[:, -1, :]
x = self.batch_norm1(x)
x = self.dropout1(x)
x = self.relu1(self.fc1(x))
x = self.batch_norm2(x)
x = self.dropout2(x)
x = self.relu2(self.fc2(x))
x = self.batch_norm3(x)
x = self.dropout3(x)
x = self.fc3(x)
x = self.softmax(x)
return x