""" train_gnn.py — Script training GNN untuk deteksi bot/sentiment. Jalankan secara lokal, BUKAN saat startup app. """ if __name__ == "__main__": import torch import pandas as pd import numpy as np from torch_geometric.data import Data from torch_geometric.nn import GCNConv from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # ── LOAD DATA ── df = pd.read_csv("data/train.csv") # kolom: text, label texts = df["text"].astype(str).tolist() labels = df["label"].astype(int).tolist() # ── TF-IDF FEATURES ── vectorizer = TfidfVectorizer(max_features=300) X = vectorizer.fit_transform(texts).toarray() x = torch.tensor(X, dtype=torch.float) # ── BUILD GRAPH ── sim = cosine_similarity(X) edges = [] for i in range(len(texts)): for j in range(i + 1, len(texts)): if sim[i][j] > 0.7: edges.append([i, j]) if not edges: print("⚠️ Tidak ada edge yang terbentuk. Coba turunkan threshold similarity.") exit() edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous() y = torch.tensor(labels, dtype=torch.long) data = Data(x=x, edge_index=edge_index, y=y) # ── MODEL ── class GCN(torch.nn.Module): def __init__(self): super().__init__() self.conv1 = GCNConv(x.shape[1], 32) self.conv2 = GCNConv(32, len(set(labels))) def forward(self, data): xd, ei = data.x, data.edge_index xd = torch.relu(self.conv1(xd, ei)) return self.conv2(xd, ei) model = GCN() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) loss_fn = torch.nn.CrossEntropyLoss() # ── TRAINING ── for epoch in range(100): model.train() optimizer.zero_grad() out = model(data) loss = loss_fn(out, y) loss.backward() optimizer.step() if epoch % 10 == 0: print(f"Epoch {epoch:3d} | Loss: {loss.item():.4f}") # ── SAVE ── torch.save(model.state_dict(), "model_gnn.pt") print("✅ Model saved: model_gnn.pt")