vortexa64 commited on
Commit
d958fc1
·
verified ·
1 Parent(s): 352ec94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -117
app.py CHANGED
@@ -1,128 +1,124 @@
 
 
 
1
  import numpy as np
 
2
  import gradio as gr
3
 
4
- # ======== LOAD DcATASET ========
5
  with open("dataset.txt", "r", encoding="utf-8") as f:
6
  text = f.read().lower()
7
 
8
  chars = sorted(list(set(text)))
9
  vocab_size = len(chars)
10
- stoi = {ch: i for i, ch in enumerate(chars)}
11
- itos = {i: ch for i, ch in enumerate(chars)}
12
 
13
- def encode(s): return [stoi[c] for c in s if c in stoi]
14
  def decode(l): return "".join([itos[i] for i in l])
15
 
16
- data = np.array(encode(text), dtype=np.int32)
17
-
18
- # ======== MODEL SETUP ========
19
- n_hidden = 128
20
- Wxh = np.random.randn(n_hidden, vocab_size) * 0.01
21
- Whh = np.random.randn(n_hidden, n_hidden) * 0.01
22
- Why = np.random.randn(vocab_size, n_hidden) * 0.01
23
- bh = np.zeros((n_hidden, 1))
24
- by = np.zeros((vocab_size, 1))
25
-
26
- def softmax(x):
27
- e = np.exp(x - np.max(x))
28
- return e / np.sum(e)
29
-
30
- def sample(p, temperature=1.0):
31
- p = np.log(p + 1e-9) / temperature
32
- p = np.exp(p) / np.sum(np.exp(p))
33
- return np.random.choice(len(p), p=p)
34
-
35
- # ======== TRAIN FUNCTION ========
36
- def train_model(epochs=50):
37
- global Wxh, Whh, Why, bh, by
38
- seq_len = 25
39
- lr = 1e-1
40
- for epoch in range(epochs):
41
- idx = np.random.randint(0, len(data) - seq_len - 1)
42
- inputs = data[idx:idx + seq_len]
43
- targets = data[idx + 1:idx + seq_len + 1]
44
-
45
- hs = {-1: np.zeros((n_hidden, 1))}
46
- loss = 0
47
- xs, ys, ps = {}, {}, {}
48
-
49
- # Forward
50
- for t in range(seq_len):
51
- xs[t] = np.zeros((vocab_size, 1))
52
- xs[t][inputs[t]] = 1
53
- hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t - 1]) + bh)
54
- ys[t] = np.dot(Why, hs[t]) + by
55
- ps[t] = softmax(ys[t])
56
- loss += -np.log(ps[t][targets[t], 0])
57
-
58
- # Backward
59
- dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
60
- dbh, dby = np.zeros_like(bh), np.zeros_like(by)
61
- dhnext = np.zeros_like(hs[0])
62
-
63
- for t in reversed(range(seq_len)):
64
- dy = np.copy(ps[t])
65
- dy[targets[t]] -= 1
66
- dWhy += np.dot(dy, hs[t].T)
67
- dby += dy
68
- dh = np.dot(Why.T, dy) + dhnext
69
- dhraw = (1 - hs[t] ** 2) * dh
70
- dbh += dhraw
71
- dWxh += np.dot(dhraw, xs[t].T)
72
- dWhh += np.dot(dhraw, hs[t - 1].T)
73
- dhnext = np.dot(Whh.T, dhraw)
74
-
75
- for param, dparam in zip([Wxh, Whh, Why, bh, by],
76
- [dWxh, dWhh, dWhy, dbh, dby]):
77
- param -= lr * dparam
78
-
79
- return f"✅ Training selesai ({epochs} epoch)"
80
-
81
- # ======== GENERATE FUNCTION ========
82
- def generate_text(seed="halo ", length=100, temperature=0.8):
83
- h = np.zeros((n_hidden, 1))
84
- x = np.zeros((vocab_size, 1))
85
- for c in seed:
86
- if c in stoi:
87
- x = np.zeros((vocab_size, 1))
88
- x[stoi[c]] = 1
89
- h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
90
-
91
- out = seed
92
- for _ in range(length):
93
- y = np.dot(Why, h) + by
94
- p = softmax(y)
95
- ix = sample(p.ravel(), temperature)
96
- x = np.zeros((vocab_size, 1))
97
- x[ix] = 1
98
- h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
99
- out += itos[ix]
100
-
101
- return out
102
-
103
- # ======== GRADIO UI ========
104
- with gr.Blocks() as demo:
105
- gr.Markdown("## 🧠 Karakter-Level RNN Text Generator")
106
- with gr.Row():
107
- seed_input = gr.Textbox(label="Seed Text", value="aku pengen ")
108
- length_input = gr.Slider(20, 300, value=100, step=10, label="Generated Length")
109
- temp_input = gr.Slider(0.1, 1.5, value=0.8, step=0.1, label="Temperature")
110
-
111
- gen_button = gr.Button("🎉 Generate Text")
112
- output_text = gr.Textbox(label="Generated Output", lines=10)
113
-
114
- with gr.Row():
115
- epoch_slider = gr.Slider(10, 500, value=50, step=10, label="Training Epochs")
116
- train_button = gr.Button("🧠 Train Model")
117
-
118
- train_output = gr.Textbox(label="Training Status")
119
-
120
- gen_button.click(fn=generate_text,
121
- inputs=[seed_input, length_input, temp_input],
122
- outputs=output_text)
123
-
124
- train_button.click(fn=train_model,
125
- inputs=epoch_slider,
126
- outputs=train_output)
127
-
128
- demo.launch()
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
  import numpy as np
5
+ import math
6
  import gradio as gr
7
 
8
+ # ===== DATASET =====
9
  with open("dataset.txt", "r", encoding="utf-8") as f:
10
  text = f.read().lower()
11
 
12
  chars = sorted(list(set(text)))
13
  vocab_size = len(chars)
14
+ stoi = {ch:i for i,ch in enumerate(chars)}
15
+ itos = {i:ch for i,ch in enumerate(chars)}
16
 
17
+ def encode(s): return [stoi.get(c, 0) for c in s]
18
  def decode(l): return "".join([itos[i] for i in l])
19
 
20
+ # ===== GPT-Style Transformer Decoder =====
21
+ class GPTBlock(nn.Module):
22
+ def __init__(self, d_model, nhead, dim_feedforward, dropout):
23
+ super().__init__()
24
+ self.attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
25
+ self.ff = nn.Sequential(
26
+ nn.Linear(d_model, dim_feedforward),
27
+ nn.GELU(),
28
+ nn.Linear(dim_feedforward, d_model),
29
+ nn.Dropout(dropout),
30
+ )
31
+ self.ln1 = nn.LayerNorm(d_model)
32
+ self.ln2 = nn.LayerNorm(d_model)
33
+
34
+ def forward(self, x, mask=None):
35
+ attn_out, _ = self.attn(x, x, x, attn_mask=mask)
36
+ x = self.ln1(x + attn_out)
37
+ ff_out = self.ff(x)
38
+ x = self.ln2(x + ff_out)
39
+ return x
40
+
41
+ class GPTModel(nn.Module):
42
+ def __init__(self, vocab_size, d_model=128, nhead=8, num_layers=4, dim_feedforward=512, max_len=5000, dropout=0.1):
43
+ super().__init__()
44
+ self.token_emb = nn.Embedding(vocab_size, d_model)
45
+ self.pos_emb = nn.Parameter(torch.zeros(1, max_len, d_model))
46
+ self.blocks = nn.ModuleList([GPTBlock(d_model, nhead, dim_feedforward, dropout) for _ in range(num_layers)])
47
+ self.ln_f = nn.LayerNorm(d_model)
48
+ self.head = nn.Linear(d_model, vocab_size)
49
+
50
+ def forward(self, x):
51
+ seq_len = x.size(1)
52
+ token_embeddings = self.token_emb(x) # (batch, seq_len, d_model)
53
+ pos_embeddings = self.pos_emb[:, :seq_len, :] # (1, seq_len, d_model)
54
+ x = token_embeddings + pos_embeddings
55
+ x = x.transpose(0, 1) # for MultiheadAttention: (seq_len, batch, d_model)
56
+
57
+ # causal mask (upper triangular)
58
+ mask = torch.triu(torch.ones(seq_len, seq_len) * float('-inf'), diagonal=1).to(x.device)
59
+
60
+ for block in self.blocks:
61
+ x = block(x, mask)
62
+
63
+ x = x.transpose(0, 1) # back to (batch, seq_len, d_model)
64
+ x = self.ln_f(x)
65
+ logits = self.head(x) # (batch, seq_len, vocab_size)
66
+ return logits
67
+
68
+ # ===== TRAINING =====
69
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
70
+ model = GPTModel(vocab_size).to(device)
71
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
72
+ criterion = nn.CrossEntropyLoss()
73
+
74
+ seq_len = 25
75
+ batch_size = 1
76
+ epochs = 300
77
+
78
+ data_tensor = torch.tensor(encode(text), dtype=torch.long)
79
+
80
+ for epoch in range(epochs):
81
+ model.train()
82
+ idx = np.random.randint(0, len(data_tensor) - seq_len - 1)
83
+ chunk = data_tensor[idx:idx+seq_len+1].unsqueeze(0).to(device) # (1, seq_len+1)
84
+ input_seq = chunk[:, :-1]
85
+ target_seq = chunk[:, 1:]
86
+
87
+ optimizer.zero_grad()
88
+ logits = model(input_seq)
89
+ loss = criterion(logits.view(-1, vocab_size), target_seq.view(-1))
90
+ loss.backward()
91
+ optimizer.step()
92
+
93
+ if epoch % 50 == 0:
94
+ print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
95
+
96
+ # ===== TEXT GENERATION =====
97
+ def generate_text(model, seed, max_len=100):
98
+ model.eval()
99
+ input_ids = torch.tensor(encode(seed), dtype=torch.long).unsqueeze(0).to(device)
100
+ generated = seed
101
+
102
+ with torch.no_grad():
103
+ for _ in range(max_len):
104
+ logits = model(input_ids)
105
+ probs = F.softmax(logits[0, -1], dim=-1).cpu().numpy()
106
+ next_id = np.random.choice(len(probs), p=probs)
107
+ generated += itos[next_id]
108
+ next_token = torch.tensor([[next_id]], device=device)
109
+ input_ids = torch.cat([input_ids, next_token], dim=1)
110
+
111
+ return generated
112
+
113
+ # ===== GRADIO CHAT =====
114
+ def chat_with_ai(inp):
115
+ return generate_text(model, inp, max_len=100)[len(inp):]
116
+
117
+ import gradio as gr
118
+ iface = gr.Interface(fn=chat_with_ai,
119
+ inputs=gr.Textbox(lines=1, placeholder="Ketik chat kamu..."),
120
+ outputs="text",
121
+ title="Chat AI Transformer GPT Style",
122
+ description="Chat AI pake model Transformer GPT-style sederhana")
123
+
124
+ iface.launch()