ASomeoneWhoInterestedWithAI commited on
Commit
769e80b
·
verified ·
1 Parent(s): 402ca49

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -94
README.md CHANGED
@@ -17,7 +17,8 @@ There's many space to experimenting like deeper architecture, another activation
17
 
18
  # Code
19
 
20
- ## Original Code
 
21
  import torch
22
  import torch.nn as nn
23
 
@@ -54,8 +55,8 @@ class LookThem(nn.Module):
54
  out_mod2_j = self.mod2[j](x[:, j]) + 1e-7
55
  out_mod2_i = self.mod2[i](x[:, i]) + 1e-7
56
 
57
- compare = self.mod1[i](x[:, i]) / out_mod2_j
58
- compare2 = self.mod1[j](x[:, j]) / out_mod2_i
59
 
60
  # Transformasi hasil interaksi
61
  interaksi = (self.transform[j](compare) * x[:, i] + self.transform[j](compare2) * x[:, j]) / 2
@@ -74,97 +75,9 @@ class LookThem(nn.Module):
74
  x_new = torch.cat(new_x, dim=1)
75
 
76
  return self.mlp(x_new)
77
-
78
  ## Vectorized
79
- import torch
80
- import torch.nn as nn
81
- import math
82
-
83
- class LookThemVectorized(nn.Module):
84
- def __init__(self, num_tokens=5, in_features=1, hidden_dim=5):
85
- super(LookThemVectorized, self).__init__()
86
-
87
- self.num_tokens = num_tokens
88
- self.in_features = in_features
89
- self.hidden_dim = hidden_dim
90
-
91
- # 1. Batched Parameters untuk Mod1
92
- # Shape: [num_tokens, in_features, hidden_dim]
93
- self.mod1_w1 = nn.Parameter(torch.randn(num_tokens, in_features, hidden_dim))
94
- self.mod1_b1 = nn.Parameter(torch.zeros(num_tokens, hidden_dim))
95
- # Shape: [num_tokens, hidden_dim, 1]
96
- self.mod1_w2 = nn.Parameter(torch.randn(num_tokens, hidden_dim, 1))
97
- self.mod1_b2 = nn.Parameter(torch.zeros(num_tokens, 1))
98
-
99
- # 2. Batched Parameters untuk Mod2
100
- self.mod2_w1 = nn.Parameter(torch.randn(num_tokens, in_features, hidden_dim))
101
- self.mod2_b1 = nn.Parameter(torch.zeros(num_tokens, hidden_dim))
102
- self.mod2_w2 = nn.Parameter(torch.randn(num_tokens, hidden_dim, 1))
103
- self.mod2_b2 = nn.Parameter(torch.zeros(num_tokens, 1))
104
-
105
- # 3. Batched Parameters untuk Transformasi Linear j
106
- self.trans_w = nn.Parameter(torch.randn(num_tokens, 1, 1))
107
- self.trans_b = nn.Parameter(torch.zeros(num_tokens, 1))
108
-
109
- # 4. MLP Final disesuaikan dengan jumlah token yang dinamis
110
- self.mlp = nn.Sequential(
111
- nn.Linear(num_tokens, num_tokens * 2),
112
- nn.ReLU(),
113
- nn.Linear(num_tokens * 2, num_tokens)
114
- )
115
-
116
- self._init_weights()
117
-
118
- def _init_weights(self):
119
- # Inisialisasi Kaiming Uniform agar training stabil
120
- for w in [self.mod1_w1, self.mod2_w1]:
121
- nn.init.kaiming_uniform_(w, a=math.sqrt(5))
122
- for w in [self.mod1_w2, self.mod2_w2, self.trans_w]:
123
- nn.init.kaiming_uniform_(w, a=math.sqrt(5))
124
-
125
- def forward(self, x):
126
- # x shape sekarang: [Batch, num_tokens, in_features]
127
- batch_size = x.size(0)
128
- N = self.num_tokens
129
-
130
- # 1. Jalankan Mod1 dan Mod2 secara paralel untuk semua token
131
- h1 = torch.einsum('bti,tij->btj', x, self.mod1_w1) + self.mod1_b1
132
- out_m1 = torch.einsum('btj,tjk->btk', torch.relu(h1), self.mod1_w2) + self.mod1_b2 # [Batch, N, 1]
133
-
134
- h2 = torch.einsum('bti,tij->btj', x, self.mod2_w1) + self.mod2_b1
135
- out_m2 = torch.einsum('btj,tjk->btk', torch.relu(h2), self.mod2_w2) + self.mod2_b2 # [Batch, N, 1]
136
-
137
- # 2. Hitung Rasio Kombinasi i dan j via Broadcasting
138
- out_m2_safe = out_m2 + 1e-7
139
- compare = out_m1.unsqueeze(2) / out_m2_safe.unsqueeze(1) # [Batch, N, N, 1]
140
- compare2 = out_m1.unsqueeze(1) / out_m2_safe.unsqueeze(2) # [Batch, N, N, 1]
141
-
142
- # 3. Transformasikan hasil berdasar indeks j
143
- # View khusus untuk bias agar nge-broadcast pas di koordinat j
144
- bias_reshaped = self.trans_b.view(1, 1, N, 1)
145
- trans_compare = torch.einsum('bije,jef->bijf', compare, self.trans_w) + bias_reshaped
146
- trans_compare2 = torch.einsum('bije,jef->bijf', compare2, self.trans_w) + bias_reshaped
147
-
148
- # 4. Hitung Interaksi Berbobot Fitur menggunakan fitur asli dari x
149
- # x.unsqueeze(2) -> fitur token i, x.unsqueeze(1) -> fitur token j
150
- interaksi = (trans_compare * x.unsqueeze(2) + trans_compare2 * x.unsqueeze(1)) / 2 # [Batch, N, N, in_features]
151
-
152
- # 5. Buat Masking untuk mengabaikan Diri Sendiri (i == j)
153
- mask = 1.0 - torch.eye(N, device=x.device)
154
- interaksi_masked = interaksi * mask.view(1, N, N, 1) # Sesuai ukuran matriks interaksi
155
-
156
- # 6. Rata-ratakan interaksi (dibagi N - 1 karena diri sendiri di-skip)
157
- # Kita lakukan sum pada dimensi j (dim=2), lalu dirata-rata ke dimensi fitur terdalam
158
- out_i = interaksi_masked.sum(dim=2) / (N - 1.0) # [Batch, N, in_features]
159
-
160
- # 7. Siapkan tensor untuk masuk ke MLP final
161
- # Kita rata-ratakan dimensi in_features agar menjadi [Batch, N] sebelum masuk MLP
162
- x_new = out_i.mean(dim=-1)
163
-
164
- return self.mlp(x_new)
165
-
166
- ## Enhanced code (used in Tiny-ImageNet training)
167
-
168
  class LookThemLayer(nn.Module):
169
  def __init__(self, num_tokens, in_features, hidden_dim):
170
  super(LookThemLayer, self).__init__()
@@ -218,7 +131,7 @@ class LookThemLayer(nn.Module):
218
  interaksi_masked = interaksi * mask.view(1, N, N, 1)
219
 
220
  return interaksi_masked.sum(dim=2) / (N - 1.0)
221
-
222
 
223
  ## Colab notebook in this repo
224
 
 
17
 
18
  # Code
19
 
20
+ ## Base Code
21
+ ```
22
  import torch
23
  import torch.nn as nn
24
 
 
55
  out_mod2_j = self.mod2[j](x[:, j]) + 1e-7
56
  out_mod2_i = self.mod2[i](x[:, i]) + 1e-7
57
 
58
+ compare = torch.tanh(self.mod1[i](x[:, i]) / out_mod2_j)
59
+ compare2 = torch.tanh(self.mod1[j](x[:, j]) / out_mod2_i)
60
 
61
  # Transformasi hasil interaksi
62
  interaksi = (self.transform[j](compare) * x[:, i] + self.transform[j](compare2) * x[:, j]) / 2
 
75
  x_new = torch.cat(new_x, dim=1)
76
 
77
  return self.mlp(x_new)
78
+ ```
79
  ## Vectorized
80
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  class LookThemLayer(nn.Module):
82
  def __init__(self, num_tokens, in_features, hidden_dim):
83
  super(LookThemLayer, self).__init__()
 
131
  interaksi_masked = interaksi * mask.view(1, N, N, 1)
132
 
133
  return interaksi_masked.sum(dim=2) / (N - 1.0)
134
+ ```
135
 
136
  ## Colab notebook in this repo
137