|
|
import torch |
|
|
import torch.nn as nn |
|
|
from torch import optim |
|
|
from torch.utils.data import DataLoader |
|
|
from NN import OffensiveLanguageClassifier, OffensiveLanguageDataset |
|
|
|
|
|
|
|
|
from process_data import train |
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
|
|
|
|
|
batch_size = 2 |
|
|
vocab_size = 23885 |
|
|
hidden_size = 128 |
|
|
output_size = 3 |
|
|
num_layers = 2 |
|
|
num_epochs = 2 |
|
|
|
|
|
|
|
|
model = OffensiveLanguageClassifier(vocab_size, hidden_size, output_size, num_layers, dropout = 0.3) |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
loss_fn = nn.CrossEntropyLoss() |
|
|
optimizer = optim.Adam(model.parameters()) |
|
|
|
|
|
|
|
|
|
|
|
train_dataset = OffensiveLanguageDataset(train[0], train["class"]) |
|
|
|
|
|
|
|
|
|
|
|
dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) |
|
|
print(type(dataloader)) |
|
|
|
|
|
for epoch in range(num_epochs): |
|
|
|
|
|
|
|
|
for data , labels in dataloader: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data = torch.stack(data) |
|
|
logits = model(data) |
|
|
loss = loss_fn(logits, labels) |
|
|
|
|
|
|
|
|
optimizer.zero_grad() |
|
|
loss.backward() |
|
|
optimizer.step() |
|
|
|
|
|
|
|
|
print(f'Epoch {epoch+1}: loss = {loss:.4f}') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|