Spaces:
Sleeping
Sleeping
Update train_gnn.py
Browse files- train_gnn.py +70 -80
train_gnn.py
CHANGED
|
@@ -1,80 +1,70 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
import
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
x = torch.tensor(X, dtype=torch.float)
|
| 24 |
-
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
y
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
loss.backward()
|
| 73 |
-
optimizer.step()
|
| 74 |
-
|
| 75 |
-
print(f"Epoch {epoch} Loss: {loss.item():.4f}")
|
| 76 |
-
|
| 77 |
-
# =========================
|
| 78 |
-
# SAVE MODEL
|
| 79 |
-
# =========================
|
| 80 |
-
torch.save(model.state_dict(), "model_gnn.pt")
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
train_gnn.py β Script training GNN untuk deteksi bot/sentiment.
|
| 3 |
+
Jalankan secara lokal, BUKAN saat startup app.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
if __name__ == "__main__":
|
| 7 |
+
import torch
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
from torch_geometric.data import Data
|
| 11 |
+
from torch_geometric.nn import GCNConv
|
| 12 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 13 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 14 |
+
|
| 15 |
+
# ββ LOAD DATA ββ
|
| 16 |
+
df = pd.read_csv("data/train.csv") # kolom: text, label
|
| 17 |
+
texts = df["text"].astype(str).tolist()
|
| 18 |
+
labels = df["label"].astype(int).tolist()
|
| 19 |
+
|
| 20 |
+
# ββ TF-IDF FEATURES ββ
|
| 21 |
+
vectorizer = TfidfVectorizer(max_features=300)
|
| 22 |
+
X = vectorizer.fit_transform(texts).toarray()
|
| 23 |
+
x = torch.tensor(X, dtype=torch.float)
|
| 24 |
+
|
| 25 |
+
# ββ BUILD GRAPH ββ
|
| 26 |
+
sim = cosine_similarity(X)
|
| 27 |
+
edges = []
|
| 28 |
+
for i in range(len(texts)):
|
| 29 |
+
for j in range(i + 1, len(texts)):
|
| 30 |
+
if sim[i][j] > 0.7:
|
| 31 |
+
edges.append([i, j])
|
| 32 |
+
|
| 33 |
+
if not edges:
|
| 34 |
+
print("β οΈ Tidak ada edge yang terbentuk. Coba turunkan threshold similarity.")
|
| 35 |
+
exit()
|
| 36 |
+
|
| 37 |
+
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
|
| 38 |
+
y = torch.tensor(labels, dtype=torch.long)
|
| 39 |
+
data = Data(x=x, edge_index=edge_index, y=y)
|
| 40 |
+
|
| 41 |
+
# ββ MODEL ββ
|
| 42 |
+
class GCN(torch.nn.Module):
|
| 43 |
+
def __init__(self):
|
| 44 |
+
super().__init__()
|
| 45 |
+
self.conv1 = GCNConv(x.shape[1], 32)
|
| 46 |
+
self.conv2 = GCNConv(32, len(set(labels)))
|
| 47 |
+
|
| 48 |
+
def forward(self, data):
|
| 49 |
+
xd, ei = data.x, data.edge_index
|
| 50 |
+
xd = torch.relu(self.conv1(xd, ei))
|
| 51 |
+
return self.conv2(xd, ei)
|
| 52 |
+
|
| 53 |
+
model = GCN()
|
| 54 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
| 55 |
+
loss_fn = torch.nn.CrossEntropyLoss()
|
| 56 |
+
|
| 57 |
+
# ββ TRAINING ββ
|
| 58 |
+
for epoch in range(100):
|
| 59 |
+
model.train()
|
| 60 |
+
optimizer.zero_grad()
|
| 61 |
+
out = model(data)
|
| 62 |
+
loss = loss_fn(out, y)
|
| 63 |
+
loss.backward()
|
| 64 |
+
optimizer.step()
|
| 65 |
+
if epoch % 10 == 0:
|
| 66 |
+
print(f"Epoch {epoch:3d} | Loss: {loss.item():.4f}")
|
| 67 |
+
|
| 68 |
+
# ββ SAVE ββ
|
| 69 |
+
torch.save(model.state_dict(), "model_gnn.pt")
|
| 70 |
+
print("β
Model saved: model_gnn.pt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|