noranisa commited on
Commit
1a5fce3
Β·
verified Β·
1 Parent(s): c37360f

Update train_gnn.py

Browse files
Files changed (1) hide show
  1. train_gnn.py +70 -80
train_gnn.py CHANGED
@@ -1,80 +1,70 @@
1
- import torch
2
- from torch_geometric.data import Data
3
- from torch_geometric.nn import GCNConv
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
- from sklearn.metrics.pairwise import cosine_similarity
6
- import pandas as pd
7
- import numpy as np
8
-
9
- # =========================
10
- # LOAD DATA
11
- # =========================
12
- df = pd.read_csv("data/train.csv") # kolom: text, label
13
-
14
- texts = df["text"].astype(str).tolist()
15
- labels = df["label"].astype(int).tolist()
16
-
17
- # =========================
18
- # TF-IDF β†’ FEATURE NODE
19
- # =========================
20
- vectorizer = TfidfVectorizer(max_features=300)
21
- X = vectorizer.fit_transform(texts).toarray()
22
-
23
- x = torch.tensor(X, dtype=torch.float)
24
-
25
- # =========================
26
- # BUILD GRAPH (SIMILARITY)
27
- # =========================
28
- sim = cosine_similarity(X)
29
-
30
- edges = []
31
- for i in range(len(texts)):
32
- for j in range(i+1, len(texts)):
33
- if sim[i][j] > 0.7:
34
- edges.append([i, j])
35
-
36
- edge_index = torch.tensor(edges).t().contiguous()
37
-
38
- y = torch.tensor(labels, dtype=torch.long)
39
-
40
- data = Data(x=x, edge_index=edge_index, y=y)
41
-
42
- # =========================
43
- # MODEL GNN
44
- # =========================
45
- class GCN(torch.nn.Module):
46
- def __init__(self):
47
- super().__init__()
48
- self.conv1 = GCNConv(x.shape[1], 32)
49
- self.conv2 = GCNConv(32, 2)
50
-
51
- def forward(self, data):
52
- x, edge_index = data.x, data.edge_index
53
- x = self.conv1(x, edge_index)
54
- x = torch.relu(x)
55
- x = self.conv2(x, edge_index)
56
- return x
57
-
58
- model = GCN()
59
- optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
60
- loss_fn = torch.nn.CrossEntropyLoss()
61
-
62
- # =========================
63
- # TRAINING
64
- # =========================
65
- for epoch in range(50):
66
- model.train()
67
- optimizer.zero_grad()
68
-
69
- out = model(data)
70
- loss = loss_fn(out, y)
71
-
72
- loss.backward()
73
- optimizer.step()
74
-
75
- print(f"Epoch {epoch} Loss: {loss.item():.4f}")
76
-
77
- # =========================
78
- # SAVE MODEL
79
- # =========================
80
- torch.save(model.state_dict(), "model_gnn.pt")
 
1
+ """
2
+ train_gnn.py β€” Script training GNN untuk deteksi bot/sentiment.
3
+ Jalankan secara lokal, BUKAN saat startup app.
4
+ """
5
+
6
+ if __name__ == "__main__":
7
+ import torch
8
+ import pandas as pd
9
+ import numpy as np
10
+ from torch_geometric.data import Data
11
+ from torch_geometric.nn import GCNConv
12
+ from sklearn.feature_extraction.text import TfidfVectorizer
13
+ from sklearn.metrics.pairwise import cosine_similarity
14
+
15
+ # ── LOAD DATA ──
16
+ df = pd.read_csv("data/train.csv") # kolom: text, label
17
+ texts = df["text"].astype(str).tolist()
18
+ labels = df["label"].astype(int).tolist()
19
+
20
+ # ── TF-IDF FEATURES ──
21
+ vectorizer = TfidfVectorizer(max_features=300)
22
+ X = vectorizer.fit_transform(texts).toarray()
23
+ x = torch.tensor(X, dtype=torch.float)
24
+
25
+ # ── BUILD GRAPH ──
26
+ sim = cosine_similarity(X)
27
+ edges = []
28
+ for i in range(len(texts)):
29
+ for j in range(i + 1, len(texts)):
30
+ if sim[i][j] > 0.7:
31
+ edges.append([i, j])
32
+
33
+ if not edges:
34
+ print("⚠️ Tidak ada edge yang terbentuk. Coba turunkan threshold similarity.")
35
+ exit()
36
+
37
+ edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
38
+ y = torch.tensor(labels, dtype=torch.long)
39
+ data = Data(x=x, edge_index=edge_index, y=y)
40
+
41
+ # ── MODEL ──
42
+ class GCN(torch.nn.Module):
43
+ def __init__(self):
44
+ super().__init__()
45
+ self.conv1 = GCNConv(x.shape[1], 32)
46
+ self.conv2 = GCNConv(32, len(set(labels)))
47
+
48
+ def forward(self, data):
49
+ xd, ei = data.x, data.edge_index
50
+ xd = torch.relu(self.conv1(xd, ei))
51
+ return self.conv2(xd, ei)
52
+
53
+ model = GCN()
54
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
55
+ loss_fn = torch.nn.CrossEntropyLoss()
56
+
57
+ # ── TRAINING ──
58
+ for epoch in range(100):
59
+ model.train()
60
+ optimizer.zero_grad()
61
+ out = model(data)
62
+ loss = loss_fn(out, y)
63
+ loss.backward()
64
+ optimizer.step()
65
+ if epoch % 10 == 0:
66
+ print(f"Epoch {epoch:3d} | Loss: {loss.item():.4f}")
67
+
68
+ # ── SAVE ──
69
+ torch.save(model.state_dict(), "model_gnn.pt")
70
+ print("βœ… Model saved: model_gnn.pt")