Spaces:
Sleeping
Sleeping
File size: 2,271 Bytes
1a5fce3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | """
train_gnn.py β Script training GNN untuk deteksi bot/sentiment.
Jalankan secara lokal, BUKAN saat startup app.
"""
if __name__ == "__main__":
import torch
import pandas as pd
import numpy as np
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# ββ LOAD DATA ββ
df = pd.read_csv("data/train.csv") # kolom: text, label
texts = df["text"].astype(str).tolist()
labels = df["label"].astype(int).tolist()
# ββ TF-IDF FEATURES ββ
vectorizer = TfidfVectorizer(max_features=300)
X = vectorizer.fit_transform(texts).toarray()
x = torch.tensor(X, dtype=torch.float)
# ββ BUILD GRAPH ββ
sim = cosine_similarity(X)
edges = []
for i in range(len(texts)):
for j in range(i + 1, len(texts)):
if sim[i][j] > 0.7:
edges.append([i, j])
if not edges:
print("β οΈ Tidak ada edge yang terbentuk. Coba turunkan threshold similarity.")
exit()
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
y = torch.tensor(labels, dtype=torch.long)
data = Data(x=x, edge_index=edge_index, y=y)
# ββ MODEL ββ
class GCN(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = GCNConv(x.shape[1], 32)
self.conv2 = GCNConv(32, len(set(labels)))
def forward(self, data):
xd, ei = data.x, data.edge_index
xd = torch.relu(self.conv1(xd, ei))
return self.conv2(xd, ei)
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()
# ββ TRAINING ββ
for epoch in range(100):
model.train()
optimizer.zero_grad()
out = model(data)
loss = loss_fn(out, y)
loss.backward()
optimizer.step()
if epoch % 10 == 0:
print(f"Epoch {epoch:3d} | Loss: {loss.item():.4f}")
# ββ SAVE ββ
torch.save(model.state_dict(), "model_gnn.pt")
print("β
Model saved: model_gnn.pt") |