import pandas as pd import numpy as np import torch import torch.nn as nn import torch.optim as optim from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer device = torch.device("cuda" if torch.cuda.is_available() else "cpu") df = pd.read_csv("imdb_balanced_10k.csv") texts = df["text"].astype(str).values labels = df["label"].values y = np.array(labels, dtype=np.float32) texts_train, texts_test, y_train, y_test = train_test_split( texts, y, test_size=0.2, random_state=42 ) tfidf = TfidfVectorizer( max_features=15000, stop_words="english", ngram_range=(1, 2), min_df=2, max_df=0.85 ) X_train = tfidf.fit_transform(texts_train).toarray() X_test = tfidf.transform(texts_test).toarray() X_train = torch.tensor(X_train, dtype=torch.float32).to(device) X_test = torch.tensor(X_test, dtype=torch.float32).to(device) y_train = torch.tensor(y_train, dtype=torch.float32).view(-1,1).to(device) y_test = torch.tensor(y_test, dtype=torch.float32).view(-1,1).to(device) class Model(nn.Module): def __init__(self, in_dim): super().__init__() self.fc1 = nn.Linear(in_dim, 256) self.fc2 = nn.Linear(256, 128) self.fc3 = nn.Linear(128, 1) self.drop = nn.Dropout(0.3) def forward(self, x): x = torch.relu(self.fc1(x)) x = self.drop(x) x = torch.relu(self.fc2(x)) x = self.drop(x) return torch.sigmoid(self.fc3(x)) model = Model(X_train.shape[1]).to(device) criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=0.0003) print("Training start...") epochs = 100 for epoch in range(epochs): model.train() pred = model(X_train) loss = criterion(pred, y_train) optimizer.zero_grad() loss.backward() optimizer.step() model.eval() with torch.no_grad(): acc = ((model(X_test) > 0.5) == y_test).float().mean().item() print(f"Epoch {epoch+1:2d} | Loss: {loss.item():.4f} | Acc: {acc:.4f}") print("\n Done!")