Spaces:
Sleeping
Sleeping
| """ | |
| train_gnn.py β Script training GNN untuk deteksi bot/sentiment. | |
| Jalankan secara lokal, BUKAN saat startup app. | |
| """ | |
| if __name__ == "__main__": | |
| import torch | |
| import pandas as pd | |
| import numpy as np | |
| from torch_geometric.data import Data | |
| from torch_geometric.nn import GCNConv | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # ββ LOAD DATA ββ | |
| df = pd.read_csv("data/train.csv") # kolom: text, label | |
| texts = df["text"].astype(str).tolist() | |
| labels = df["label"].astype(int).tolist() | |
| # ββ TF-IDF FEATURES ββ | |
| vectorizer = TfidfVectorizer(max_features=300) | |
| X = vectorizer.fit_transform(texts).toarray() | |
| x = torch.tensor(X, dtype=torch.float) | |
| # ββ BUILD GRAPH ββ | |
| sim = cosine_similarity(X) | |
| edges = [] | |
| for i in range(len(texts)): | |
| for j in range(i + 1, len(texts)): | |
| if sim[i][j] > 0.7: | |
| edges.append([i, j]) | |
| if not edges: | |
| print("β οΈ Tidak ada edge yang terbentuk. Coba turunkan threshold similarity.") | |
| exit() | |
| edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous() | |
| y = torch.tensor(labels, dtype=torch.long) | |
| data = Data(x=x, edge_index=edge_index, y=y) | |
| # ββ MODEL ββ | |
| class GCN(torch.nn.Module): | |
| def __init__(self): | |
| super().__init__() | |
| self.conv1 = GCNConv(x.shape[1], 32) | |
| self.conv2 = GCNConv(32, len(set(labels))) | |
| def forward(self, data): | |
| xd, ei = data.x, data.edge_index | |
| xd = torch.relu(self.conv1(xd, ei)) | |
| return self.conv2(xd, ei) | |
| model = GCN() | |
| optimizer = torch.optim.Adam(model.parameters(), lr=0.01) | |
| loss_fn = torch.nn.CrossEntropyLoss() | |
| # ββ TRAINING ββ | |
| for epoch in range(100): | |
| model.train() | |
| optimizer.zero_grad() | |
| out = model(data) | |
| loss = loss_fn(out, y) | |
| loss.backward() | |
| optimizer.step() | |
| if epoch % 10 == 0: | |
| print(f"Epoch {epoch:3d} | Loss: {loss.item():.4f}") | |
| # ββ SAVE ββ | |
| torch.save(model.state_dict(), "model_gnn.pt") | |
| print("β Model saved: model_gnn.pt") |