from torch import nn as nn


# class SpeechEmotionModel(nn.Module):
#     def __init__(self):
#         super(SpeechEmotionModel, self).__init__()
#         self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True)
#         self.dropout1 = nn.Dropout(0.2)
#         self.fc1 = nn.Linear(256, 128)
#         self.dropout2 = nn.Dropout(0.2)
#         self.relu1 = nn.ReLU()
#         self.fc2 = nn.Linear(128, 64)
#         self.dropout3 = nn.Dropout(0.2)
#         self.relu2 = nn.ReLU()
#         self.fc3 = nn.Linear(64, 7)
#         self.softmax = nn.Softmax(dim=1)
#
#     def forward(self, x):
#         x, _ = self.lstm(x)
#         x = x[:, -1, :]
#         x = self.dropout1(x)
#         x = self.relu1(self.fc1(x))
#         x = self.dropout2(x)
#         x = self.relu2(self.fc2(x))
#         x = self.dropout3(x)
#         x = self.fc3(x)
#         x = self.softmax(x)
#         return x

class SpeechEmotionModel(nn.Module):
    def __init__(self):
        super(SpeechEmotionModel, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=256, num_layers=1, batch_first=True, bidirectional=True)
        self.batch_norm1 = nn.BatchNorm1d(512)
        self.dropout1 = nn.Dropout(0.2)
        self.fc1 = nn.Linear(512, 128)
        self.batch_norm2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.2)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.batch_norm3 = nn.BatchNorm1d(64)
        self.dropout3 = nn.Dropout(0.2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(64, 7)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = x[:, -1, :]
        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = self.relu1(self.fc1(x))
        x = self.batch_norm2(x)
        x = self.dropout2(x)
        x = self.relu2(self.fc2(x))
        x = self.batch_norm3(x)
        x = self.dropout3(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x