from torch import nn as nn # class SpeechEmotionModel(nn.Module): # def __init__(self): # super(SpeechEmotionModel, self).__init__() # self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True) # self.dropout1 = nn.Dropout(0.2) # self.fc1 = nn.Linear(256, 128) # self.dropout2 = nn.Dropout(0.2) # self.relu1 = nn.ReLU() # self.fc2 = nn.Linear(128, 64) # self.dropout3 = nn.Dropout(0.2) # self.relu2 = nn.ReLU() # self.fc3 = nn.Linear(64, 7) # self.softmax = nn.Softmax(dim=1) # # def forward(self, x): # x, _ = self.lstm(x) # x = x[:, -1, :] # x = self.dropout1(x) # x = self.relu1(self.fc1(x)) # x = self.dropout2(x) # x = self.relu2(self.fc2(x)) # x = self.dropout3(x) # x = self.fc3(x) # x = self.softmax(x) # return x class SpeechEmotionModel(nn.Module): def __init__(self): super(SpeechEmotionModel, self).__init__() self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True, bidirectional=True) self.batch_norm1 = nn.BatchNorm1d(512) self.dropout1 = nn.Dropout(0.2) self.fc1 = nn.Linear(512, 128) self.batch_norm2 = nn.BatchNorm1d(128) self.dropout2 = nn.Dropout(0.2) self.relu1 = nn.ReLU() self.fc2 = nn.Linear(128, 64) self.batch_norm3 = nn.BatchNorm1d(64) self.dropout3 = nn.Dropout(0.2) self.relu2 = nn.ReLU() self.fc3 = nn.Linear(64, 7) self.softmax = nn.Softmax(dim=1) def forward(self, x): x, _ = self.lstm(x) x = x[:, -1, :] x = self.batch_norm1(x) x = self.dropout1(x) x = self.relu1(self.fc1(x)) x = self.batch_norm2(x) x = self.dropout2(x) x = self.relu2(self.fc2(x)) x = self.batch_norm3(x) x = self.dropout3(x) x = self.fc3(x) x = self.softmax(x) return x