File size: 2,271 Bytes
1a5fce3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
train_gnn.py β€” Script training GNN untuk deteksi bot/sentiment.
Jalankan secara lokal, BUKAN saat startup app.
"""

if __name__ == "__main__":
    import torch
    import pandas as pd
    import numpy as np
    from torch_geometric.data import Data
    from torch_geometric.nn import GCNConv
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity

    # ── LOAD DATA ──
    df     = pd.read_csv("data/train.csv")   # kolom: text, label
    texts  = df["text"].astype(str).tolist()
    labels = df["label"].astype(int).tolist()

    # ── TF-IDF FEATURES ──
    vectorizer = TfidfVectorizer(max_features=300)
    X = vectorizer.fit_transform(texts).toarray()
    x = torch.tensor(X, dtype=torch.float)

    # ── BUILD GRAPH ──
    sim   = cosine_similarity(X)
    edges = []
    for i in range(len(texts)):
        for j in range(i + 1, len(texts)):
            if sim[i][j] > 0.7:
                edges.append([i, j])

    if not edges:
        print("⚠️  Tidak ada edge yang terbentuk. Coba turunkan threshold similarity.")
        exit()

    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
    y          = torch.tensor(labels, dtype=torch.long)
    data       = Data(x=x, edge_index=edge_index, y=y)

    # ── MODEL ──
    class GCN(torch.nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = GCNConv(x.shape[1], 32)
            self.conv2 = GCNConv(32, len(set(labels)))

        def forward(self, data):
            xd, ei = data.x, data.edge_index
            xd = torch.relu(self.conv1(xd, ei))
            return self.conv2(xd, ei)

    model     = GCN()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_fn   = torch.nn.CrossEntropyLoss()

    # ── TRAINING ──
    for epoch in range(100):
        model.train()
        optimizer.zero_grad()
        out  = model(data)
        loss = loss_fn(out, y)
        loss.backward()
        optimizer.step()
        if epoch % 10 == 0:
            print(f"Epoch {epoch:3d} | Loss: {loss.item():.4f}")

    # ── SAVE ──
    torch.save(model.state_dict(), "model_gnn.pt")
    print("βœ… Model saved: model_gnn.pt")