muooon commited on
Commit
6ba385b
·
verified ·
1 Parent(s): 273c585

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. README.md +4 -0
  3. README_JA.md +5 -1
  4. bpc_mask.png +3 -0
  5. drna.py +20 -4
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ bpc_mask.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -155,8 +155,12 @@ The previously static knowledge (existing weights) begins to synchronize with th
155
 
156
  BPC Comparison Chart
157
 
 
158
  <img width="800" alt="bpc_only" src="bpc_only.png" />
159
 
 
 
 
160
  ---
161
 
162
  License:
 
155
 
156
  BPC Comparison Chart
157
 
158
+ non-mask
159
  <img width="800" alt="bpc_only" src="bpc_only.png" />
160
 
161
+ use-mask
162
+ <img width="800" alt="bpc_only" src="bpc_mask.png" />
163
+
164
  ---
165
 
166
  License:
README_JA.md CHANGED
@@ -146,10 +146,14 @@ class DRNA_ResonantBlock(nn.Module):
146
 
147
  ---
148
 
149
- BPC 比較図
150
 
 
151
  <img width="800" alt="bpc_only" src="bpc_only.png" />
152
 
 
 
 
153
  ---
154
 
155
  ライセンス:
 
146
 
147
  ---
148
 
149
+ BPC Comparison Chart
150
 
151
+ non-mask
152
  <img width="800" alt="bpc_only" src="bpc_only.png" />
153
 
154
+ use-mask
155
+ <img width="800" alt="bpc_only" src="bpc_mask.png" />
156
+
157
  ---
158
 
159
  ライセンス:
bpc_mask.png ADDED

Git LFS Details

  • SHA256: 8f22245cc3d74d38638bc8949c25c7919e076402ac3b25d85563efa84482c243
  • Pointer size: 131 Bytes
  • Size of remote file: 107 kB
drna.py CHANGED
@@ -4,7 +4,7 @@ import torch.nn.functional as F
4
  import math
5
 
6
  '''
7
- D‑RNA: Dual‑Helix Resonance Neural Architecture (DRNA) 260420
8
  Transformerの全接続性を継承しつつ、二重らせん(Dual-Helix)構造による
9
  「共鳴収縮」(Resonant Contraction)を物理的に再現したニューラルアーキテクチャです
10
  螺旋の同期: Attention(文脈の回想)とMLP(知識の定着)を直列に配置し、情報を一段ずつ絞り込む
@@ -56,7 +56,7 @@ class DRNA_Block(nn.Module):
56
  self.norm2 = nn.LayerNorm(d_model)
57
  self.dropout = nn.Dropout(dropout)
58
 
59
- def forward(self, x, cos, sin):
60
  b, s, d = x.shape
61
 
62
  # --- らせんA (Attention Resonance) ---
@@ -67,6 +67,12 @@ class DRNA_Block(nn.Module):
67
 
68
  # Scaled Dot-Product Attention
69
  attn = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(self.d_head))
 
 
 
 
 
 
70
  attn = F.softmax(attn, dim=-1)
71
 
72
  a_out = (attn @ v).transpose(1, 2).reshape(b, s, d)
@@ -93,12 +99,22 @@ class DRNA_Model(nn.Module):
93
 
94
  self.output_head = nn.Linear(d_model, vocab_size)
95
 
96
- def forward(self, x):
 
 
 
 
 
 
 
 
 
97
  cos, sin = self.rope(x, x.size(1))
98
  x = self.embed(x)
99
 
100
  for layer in self.layers:
101
- x = layer(x, cos, sin)
 
102
 
103
  return self.output_head(x)
104
 
 
4
  import math
5
 
6
  '''
7
+ D‑RNA: Dual‑Helix Resonance Neural Architecture (DRNA) 260422
8
  Transformerの全接続性を継承しつつ、二重らせん(Dual-Helix)構造による
9
  「共鳴収縮」(Resonant Contraction)を物理的に再現したニューラルアーキテクチャです
10
  螺旋の同期: Attention(文脈の回想)とMLP(知識の定着)を直列に配置し、情報を一段ずつ絞り込む
 
56
  self.norm2 = nn.LayerNorm(d_model)
57
  self.dropout = nn.Dropout(dropout)
58
 
59
+ def forward(self, x, cos, sin, mask=None):
60
  b, s, d = x.shape
61
 
62
  # --- らせんA (Attention Resonance) ---
 
67
 
68
  # Scaled Dot-Product Attention
69
  attn = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(self.d_head))
70
+
71
+ # マスク適用(ブロードキャスト対応)
72
+ if mask is not None:
73
+ # mask形状: (s, s) 又は (b, n_heads, s, s) 等に対応可能
74
+ attn = attn + mask
75
+
76
  attn = F.softmax(attn, dim=-1)
77
 
78
  a_out = (attn @ v).transpose(1, 2).reshape(b, s, d)
 
99
 
100
  self.output_head = nn.Linear(d_model, vocab_size)
101
 
102
+ def forward(self, x, mask=None):
103
+ b, s = x.shape
104
+
105
+ # もし外部からマスクが与えられず、かつGPT的な動作をさせたい場合
106
+ # ここでは「汎用GPT型」として、デフォルトで因果マスクを生成するようにします
107
+ if mask is None:
108
+ # 未来を隠すマスク (右上三角形が-inf)
109
+ # 形状: (s, s)
110
+ mask = torch.triu(torch.ones(s, s, device=x.device) * float('-inf'), diagonal=1)
111
+
112
  cos, sin = self.rope(x, x.size(1))
113
  x = self.embed(x)
114
 
115
  for layer in self.layers:
116
+ # 各層に共通のマスクを伝播
117
+ x = layer(x, cos, sin, mask=mask)
118
 
119
  return self.output_head(x)
120