Sentimen-Analysis / train_gnn.py
noranisa's picture
Update train_gnn.py
1a5fce3 verified
"""
train_gnn.py β€” Script training GNN untuk deteksi bot/sentiment.
Jalankan secara lokal, BUKAN saat startup app.
"""
if __name__ == "__main__":
import torch
import pandas as pd
import numpy as np
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# ── LOAD DATA ──
df = pd.read_csv("data/train.csv") # kolom: text, label
texts = df["text"].astype(str).tolist()
labels = df["label"].astype(int).tolist()
# ── TF-IDF FEATURES ──
vectorizer = TfidfVectorizer(max_features=300)
X = vectorizer.fit_transform(texts).toarray()
x = torch.tensor(X, dtype=torch.float)
# ── BUILD GRAPH ──
sim = cosine_similarity(X)
edges = []
for i in range(len(texts)):
for j in range(i + 1, len(texts)):
if sim[i][j] > 0.7:
edges.append([i, j])
if not edges:
print("⚠️ Tidak ada edge yang terbentuk. Coba turunkan threshold similarity.")
exit()
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
y = torch.tensor(labels, dtype=torch.long)
data = Data(x=x, edge_index=edge_index, y=y)
# ── MODEL ──
class GCN(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = GCNConv(x.shape[1], 32)
self.conv2 = GCNConv(32, len(set(labels)))
def forward(self, data):
xd, ei = data.x, data.edge_index
xd = torch.relu(self.conv1(xd, ei))
return self.conv2(xd, ei)
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()
# ── TRAINING ──
for epoch in range(100):
model.train()
optimizer.zero_grad()
out = model(data)
loss = loss_fn(out, y)
loss.backward()
optimizer.step()
if epoch % 10 == 0:
print(f"Epoch {epoch:3d} | Loss: {loss.item():.4f}")
# ── SAVE ──
torch.save(model.state_dict(), "model_gnn.pt")
print("βœ… Model saved: model_gnn.pt")