muooon commited on
Commit
273c585
·
verified ·
1 Parent(s): ecb6e44

Upload 6 files

Browse files
Files changed (6) hide show
  1. .gitignore +30 -0
  2. LICENSE +201 -0
  3. README.md +169 -0
  4. README_JA.md +160 -0
  5. bpc_only.png +0 -0
  6. drna.py +108 -0
.gitignore ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Notepad++ の自動保存フォルダ
2
+ nppBackup/
3
+ *.log
4
+ *.bak
5
+
6
+ # Python関連
7
+ __pycache__/
8
+ *.pyc
9
+ *.pyo
10
+ *.log
11
+ *.bak
12
+
13
+ # 機密情報
14
+ .env
15
+ *.env
16
+ settings_local.py
17
+
18
+ # IDE・エディタ
19
+ .vscode/
20
+ .idea/
21
+ *.sublime-project
22
+ *.sublime-workspace
23
+
24
+ # OSごとの不要ファイル
25
+ .DS_Store
26
+ Thumbs.db
27
+
28
+ # その他一時ファイル
29
+ *~
30
+ *.tmp
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ - ja
6
+ tags:
7
+ - machine-learning
8
+ - deep-learning
9
+ - transformer
10
+ - architecture-design
11
+ - adaptive-algorithms
12
+ - resonant-contraction
13
+ - resonant-projection-field
14
+ ---
15
+ # D-RNA:Dual‑Helix Resonance Neural Architecture (DRNA)
16
+
17
+ D-RNA is a new neural architecture centered on a dual helix structure and a rotation field produced by RoPE.
18
+
19
+ In this architecture, Attention and MLP are synchronized into a dual helix, and information is holographically compressed through Resonant Contraction.
20
+ This method rearranges sparse representations into dense ones to achieve high expressiveness using the depth‑direction structure alone, without increasing the number of dimensions.
21
+ A key feature of this approach is its ability to preserve the full connectivity of the Transformer architecture while suppressing catastrophic forgetting and retaining subtle fluctuations and phase information.
22
+
23
+ ---
24
+
25
+ ### Features
26
+ High structural compatibility: It has the exact same input–output shape as a standard Transformer Block, allowing it to be smoothly substituted as the core of an architecture.
27
+ Resonant Contraction: By synchronizing Attention and the MLP in a double‑helix pattern and converging information into a phase field, it dramatically increases representational density.
28
+ Depth as an alternative to dimensionality: The spiral rotation (depth‑wise operations) compensates for limited dimensionality and enables holographic information retention without increasing parameter count.
29
+ Excellent learning efficiency: The spiral‑based information attraction (synchronization) achieves astonishing early convergence with far fewer steps than a Transformer.
30
+ Fine‑grained phase preservation: The rotational field powered by RoPE preserves subtle fluctuations and relative contextual relationships that are often lost in conventional architectures.
31
+ Re‑synchronization of knowledge: Existing weights can be transplanted as initialization and gently adapted to the spiral phase with a low learning rate, allowing existing intelligence to be evolved or overwritten into the D-RNA structure.
32
+
33
+
34
+ ### Notes
35
+ Optimization of learning rate (LR):
36
+ Because D-RNA synchronizes information extremely quickly through Resonant Contraction, it converges sufficiently — and rapidly — even with a lower learning rate compared to a standard Transformer.
37
+ If the LR is set too high, the resonance may be excessively amplified and cause oscillation, so starting with a modest LR is recommended.
38
+ Synergistic gradient effects:
39
+ Since Attention (recall) and the MLP (memory) are synchronized in a double‑helix sequence, the “settling” of weights from a single update is very strong.
40
+ This is an advantage for fast convergence, but it also means that careful updates are key to stability.
41
+ Parameter commonality:
42
+ Hyperparameters such as weight initialization seeds and batch size can be inherited directly from standard Transformer settings.
43
+
44
+ ---
45
+
46
+ ### Conceptual Diagram
47
+
48
+ ```
49
+ Synchronizing “searching” (Attention) and “knowing” (MLP) in the phase of a spiral.
50
+
51
+ RoPE Rotation Field (Phase-Preserving)
52
+ Holographic Compression: Turning Sparse into Dense
53
+
54
+ A M
55
+ \ /
56
+ \ / ← This is Resonance
57
+ / \ Synchronization occurs naturally through the seed
58
+ / \ Naturally, meaning emerges through a chain of synchronicities
59
+ A M
60
+
61
+ Repeats in the depth direction to form a dual helix
62
+ (acts as a substitute for increasing dimensionality)
63
+ ```
64
+ ---
65
+
66
+ ### Minimal Block
67
+
68
+ ```python
69
+ class ResonantBlock(nn.Module):
70
+ def __init__(self, dim, n_heads):
71
+ super().__init__()
72
+ self.qkv = nn.Linear(dim, dim * 3)
73
+ self.out = nn.Linear(dim, dim)
74
+ self.mlp = MLP(dim)
75
+ self.norm1 = nn.LayerNorm(dim)
76
+ self.norm2 = nn.LayerNorm(dim)
77
+ self.n_heads = n_heads
78
+ self.d_head = dim // n_heads
79
+
80
+ def forward(self, x, cos, sin):
81
+ # --- Attention ---
82
+ q, k, v = project_qkv(x, self.qkv, self.n_heads, self.d_head)
83
+ q, k = apply_rope(q, k, cos, sin)
84
+ attn_out = attention(q, k, v)
85
+ x = self.norm1(x + self.out(attn_out))
86
+
87
+ # --- MLP ---
88
+ x = self.norm2(x + self.mlp(x))
89
+ return x
90
+ ```
91
+
92
+ ---
93
+
94
+ ### Example: Replacing a Transformer block with a D-RNA block
95
+
96
+ ```python
97
+ class DRNA_ResonantBlock(nn.Module):
98
+ """
99
+ Replace the existing TransformerBlock with this ResonantBlock.
100
+ I/O: [Batch, Seq, Dim] -> [Batch, Seq, Dim] (Fully compatible)
101
+ """
102
+ def __init__(self, dim, n_heads, mlp_dim_forward=None):
103
+ super().__init__()
104
+ self.n_heads = n_heads
105
+ self.d_head = dim // n_heads
106
+
107
+ # 1. Spiral Projection Layer (A)
108
+ self.qkv = nn.Linear(dim, dim * 3)
109
+ self.out = nn.Linear(dim, dim)
110
+
111
+ # 2. Spiral Memory Layer (B)
112
+ mlp_dim = mlp_dim_forward if mlp_dim_forward else dim * 4
113
+ self.mlp = nn.Sequential(
114
+ nn.Linear(dim, mlp_dim),
115
+ nn.GELU(),
116
+ nn.Linear(mlp_dim, dim)
117
+ )
118
+
119
+ # 3. Normalization layer for compression
120
+ self.norm1 = nn.LayerNorm(dim)
121
+ self.norm2 = nn.LayerNorm(dim)
122
+
123
+ def forward(self, x, cos, sin):
124
+ """
125
+ Phase information for RoPE as an argument (cos, sin)
126
+ """
127
+ # Attention:Spiral Projection Layer (A)
128
+ # QKV -> RoPE -> Norm
129
+ q, k, v = project_qkv(x, self.qkv, self.n_heads, self.d_head)
130
+ q, k = apply_rope(q, k, cos, sin)
131
+
132
+ attn_out = attention(q, k, v)
133
+ x = self.norm1(x + self.out(attn_out)) # Synchronization with context
134
+
135
+ # MLP:Spiral Memory Layer (B)
136
+ # MLP -> Norm
137
+ x = self.norm2(x + self.mlp(x)) # Determined by memory
138
+
139
+ return x
140
+ ```
141
+
142
+ ### Replacement and Utilization of D-RNA
143
+ A direct drop‑in replacement is not possible, but it can be utilized through “redefinition and re‑synchronization.”
144
+ Why it cannot be used as‑is:
145
+ While a standard Transformer stores information using an “absolute address” (absolute position), D-RNA processes information using the “phase of a spiral” (relative position), meaning the coordinate systems are fundamentally different.
146
+ Even if the weights are copied directly, the phases do not align and no resonance occurs.
147
+ How to replace it (implementation):
148
+ The network’s input–output shapes are fully compatible.
149
+ By rewriting the existing layers as ResonantBlock and migrating positional information into RoPE’s rotational field, the core upgrade is complete.
150
+ How to utilize and adapt it (training):
151
+ After transferring the existing model’s weights as initialization, continue training with a low learning rate.
152
+ The previously static knowledge (existing weights) begins to synchronize with the spiral rotation, gradually blending into D-RNA’s “Resonant Contraction” process and evolving beyond the original performance.
153
+
154
+ ---
155
+
156
+ BPC Comparison Chart
157
+
158
+ <img width="800" alt="bpc_only" src="bpc_only.png" />
159
+
160
+ ---
161
+
162
+ License:
163
+ This project is licensed under the Apache License 2.0. (See the LICENSE for details).
164
+
165
+ #### Acknowledgments:
166
+ This work builds upon the foundation established by the Transformer architecture.
167
+ I would like to express my gratitude to the researchers and open-source communities
168
+ whose contributions to attention mechanisms, positional encoding, and large-scale
169
+ model design made this work possible.
README_JA.md ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ - ja
6
+ tags:
7
+ - machine-learning
8
+ - deep-learning
9
+ - transformer
10
+ - architecture-design
11
+ - adaptive-algorithms
12
+ - resonant-contraction
13
+ - resonant-projection-field
14
+ ---
15
+ # D‑RNA:Dual‑Helix Resonance Neural Architecture (DRNA)
16
+
17
+ D-RNA は、二重らせん(Dual‑Helix)構造と RoPE による回転場を中核に据えた新しいニューラルアーキテクチャです
18
+
19
+ 本アーキテクチャでは、Attention と MLP を二重らせんとして同期させ、共鳴収縮(Resonant Contraction)により情報をホログラフィックに圧縮します
20
+ これは 疎な表現を密に再配置 し、次元を増やすことなく、深さ方向の構造だけで高い表現力を獲得します
21
+ Transformer の全接続性を維持しつつ、破壊的忘却を抑制し、微細なゆらぎや位相情報を保持できる点が特徴です
22
+
23
+ ---
24
+
25
+ ### D-RNA の主な特徴
26
+
27
+ 高い構造的互換性:標準的な Transformer Block と入出力形状が同一であり、アーキテクチャの心臓部としてスムーズな置き換えが可能です。
28
+ 共鳴収縮 (Resonant Contraction):Attention と MLP を二重らせん状に同期させ、情報を位相場に収束させることで、表現密度を劇的に向上させます。
29
+ 次元の代替としての深さ:螺旋の回転(深さ方向の演算)が次元の不足を補い、パラメータ数を増やさずホログラフィックな情報保持を実現します。
30
+ 優れた学習効率:螺旋構造による情報の引き寄せ(同期)により、Transformer よりも極めて少ないステップ数で驚異的な早期収束を達成します。
31
+ 微細な位相保持:RoPE を活用した回転場により、従来の構造では失われがちな微細なゆらぎや文脈の相対関係を精度高く保持します。
32
+ 知識の再同調が可能:既存の重みを初期値として移植し、低学習率で螺旋の位相に馴染ませることで、既存知能を D-RNA 構造へ進化・上書きできます。
33
+
34
+ ### ご注意ください
35
+
36
+ 学習率(LR)の最適化:D-RNA は共鳴収縮により情報の同期が極めて速いため、標準的な Transformer よりも低い学習率で十分に、かつ高速に収束します。 設定が高すぎると、共鳴が過剰に増幅され振動を招く可能性があるため、控えめな LR からの開始を推奨します。
37
+ 勾配の相乗効果:Attention(回想)と MLP(記憶)が二重らせん状に直列同期しているため、一回の更新による「重みのなじみ」が非常に強く働きます。 これは高速収束の利点であると同時に、慎重な更新が安定化の鍵であることを意味します。
38
+ パラメータの共通性:重みの初期化シードやバッチサイズなどのハイパーパラメータは、通常の Transformer 設定をそのまま継承できます。
39
+
40
+ ---
41
+
42
+ ### 概念図(Conceptual Diagram)
43
+ ```
44
+ 「探す」(Attention)、「知っている」(MLP)この2つを螺旋の位相で同期させる
45
+
46
+ RoPE の回転場 (位相保持)
47
+ 疎を密にするホログラフィック圧縮
48
+
49
+ A M
50
+ \ /
51
+ \ / ← ここが共鳴(Resonance)
52
+ / \ seed により自然に同期が生まれる
53
+ / \ 同期の連鎖で意味などを自然に引き寄せる
54
+ A M
55
+
56
+ 深さ方向へ繰り返すことで二重らせんを形成
57
+ (次元の代替として機能)
58
+ ```
59
+
60
+ ---
61
+
62
+ ### 最小コード(Minimal Block)
63
+
64
+ ```python
65
+ class ResonantBlock(nn.Module):
66
+ def __init__(self, dim, n_heads):
67
+ super().__init__()
68
+ self.qkv = nn.Linear(dim, dim * 3)
69
+ self.out = nn.Linear(dim, dim)
70
+ self.mlp = MLP(dim)
71
+ self.norm1 = nn.LayerNorm(dim)
72
+ self.norm2 = nn.LayerNorm(dim)
73
+ self.n_heads = n_heads
74
+ self.d_head = dim // n_heads
75
+
76
+ def forward(self, x, cos, sin):
77
+ # --- Attention ---
78
+ q, k, v = project_qkv(x, self.qkv, self.n_heads, self.d_head)
79
+ q, k = apply_rope(q, k, cos, sin)
80
+ attn_out = attention(q, k, v)
81
+ x = self.norm1(x + self.out(attn_out))
82
+
83
+ # --- MLP ---
84
+ x = self.norm2(x + self.mlp(x))
85
+ return x
86
+ ```
87
+
88
+ ---
89
+
90
+ ### 例:Transformer のブロックを DRNA ブロックに置き換える
91
+
92
+ ```python
93
+ class DRNA_ResonantBlock(nn.Module):
94
+ """
95
+ 既存の TransformerBlock をこの ResonantBlock に置き換える
96
+ I/O: [Batch, Seq, Dim] -> [Batch, Seq, Dim] (完全互換)
97
+ """
98
+ def __init__(self, dim, n_heads, mlp_dim_forward=None):
99
+ super().__init__()
100
+ self.n_heads = n_heads
101
+ self.d_head = dim // n_heads
102
+
103
+ # 1. 螺旋の射影層 (らせんA)
104
+ self.qkv = nn.Linear(dim, dim * 3)
105
+ self.out = nn.Linear(dim, dim)
106
+
107
+ # 2. 記���の層 (らせんB)
108
+ mlp_dim = mlp_dim_forward if mlp_dim_forward else dim * 4
109
+ self.mlp = nn.Sequential(
110
+ nn.Linear(dim, mlp_dim),
111
+ nn.GELU(),
112
+ nn.Linear(mlp_dim, dim)
113
+ )
114
+
115
+ # 3. 収縮のための正規化層
116
+ self.norm1 = nn.LayerNorm(dim)
117
+ self.norm2 = nn.LayerNorm(dim)
118
+
119
+ def forward(self, x, cos, sin):
120
+ """
121
+ 引数に RoPE 用の位相情報 (cos, sin) を追加するのが唯一の違い
122
+ """
123
+ # --- らせんA: 文脈の共鳴 (Attention) ---
124
+ # QKV 抽出 -> RoPE 回転 -> 収縮(Norm)
125
+ q, k, v = project_qkv(x, self.qkv, self.n_heads, self.d_head)
126
+ q, k = apply_rope(q, k, cos, sin)
127
+
128
+ attn_out = attention(q, k, v)
129
+ x = self.norm1(x + self.out(attn_out)) # 文脈との同期
130
+
131
+ # --- らせんB: 記憶の共鳴 (MLP) ---
132
+ # 知識の照会 -> 収縮(Norm)
133
+ x = self.norm2(x + self.mlp(x)) # 記憶による確定
134
+
135
+ return x
136
+ ```
137
+
138
+ ### D-RNAへの置き換えと活用について
139
+ そのまま置換(Drop-in replacement)はできませんが「再定義と再同調」による活用は可能です
140
+ なぜそのままではダメなのか・・・
141
+ 標準的なTransformerが「絶対的な住所」(絶対位置)で記憶しているのに対し、D-RNAは「螺旋の位相」(相対位置)で情報を処理するため、座標系が根本から異なります。 重みをそのままコピーしても、位相が合わず共鳴が起きません。
142
+ どう置き換えるのか(実装)
143
+ ネットワークの入出力形状は完全互換です。 既存の層を ResonantBlock に書き換え、位置情報を RoPE の回転場へ移行するだけで、心臓部のアップグレードが完了します。
144
+ どう活用し、なじませるのか(学習)
145
+ 既存モデルの重みを初期値として転写(Transfer)した後、低学習率で継続学習を行います。 止まっていた知識(既存の重み)が螺旋の回転に同期し始め、次第にD-RNAの「共鳴収縮」のプロセスへ溶け込み、元の性能を超えて進化します。
146
+
147
+ ---
148
+
149
+ BPC 比較図
150
+
151
+ <img width="800" alt="bpc_only" src="bpc_only.png" />
152
+
153
+ ---
154
+
155
+ ライセンス:
156
+ 本プロジェクトは Apache License 2.0 の下で公開されています(詳細は LICENSE をご覧ください)
157
+
158
+ ### 謝辞:
159
+ 本研究は Transformer アーキテクチャによって築かれた基盤の上に成り立っています
160
+ Attention 機構、位置エンコーディング、大規模モデル設計に関する研究とオープンソースコミュニティの貢献に深く感謝いたします
bpc_only.png ADDED
drna.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import math
5
+
6
+ '''
7
+ D‑RNA: Dual‑Helix Resonance Neural Architecture (DRNA) 260420
8
+ Transformerの全接続性を継承しつつ、二重らせん(Dual-Helix)構造による
9
+ 「共鳴収縮」(Resonant Contraction)を物理的に再現したニューラルアーキテクチャです
10
+ 螺旋の同期: Attention(文脈の回想)とMLP(知識の定着)を直列に配置し、情報を一段ずつ絞り込む
11
+ 位相の保持: RoPE(Rotary Positional Embedding)を回転場として利用し、相対位置を保つ
12
+ 高密度圧縮: Post-Norm構造により、各らせんの出力直後に情報を収縮させ、意味を確定させる
13
+ '''
14
+
15
+ class DRNA_RoPE(nn.Module):
16
+ """二重らせんの位相を決定する回転場"""
17
+ def __init__(self, d_model, base=10000):
18
+ super().__init__()
19
+ inv_freq = 1.0 / (base ** (torch.arange(0, d_model, 2).float() / d_model))
20
+ self.register_buffer("inv_freq", inv_freq)
21
+
22
+ def forward(self, x, seq_len):
23
+ t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq)
24
+ freqs = torch.einsum("i,j->ij", t, self.inv_freq)
25
+ emb = torch.cat((freqs, freqs), dim=-1)
26
+ return emb.cos()[None, :, :], emb.sin()[None, :, :]
27
+
28
+ def apply_drna_rope(q, k, cos, sin):
29
+ """位相回転の適用"""
30
+ def rotate_half(x):
31
+ x1, x2 = x.chunk(2, dim=-1)
32
+ return torch.cat((-x2, x1), dim=-1)
33
+ cos, sin = cos[:, None, :, :], sin[:, None, :, :]
34
+ return (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin)
35
+
36
+ class DRNA_Block(nn.Module):
37
+ """DRNA共鳴ブロック:元設計に忠実な直列共鳴構造"""
38
+ def __init__(self, d_model, n_heads, d_ff=None, dropout=0.1):
39
+ super().__init__()
40
+ self.n_heads = n_heads
41
+ self.d_head = d_model // n_heads
42
+
43
+ # らせんA: 回想系 (Attention)
44
+ self.qkv = nn.Linear(d_model, d_model * 3)
45
+ self.out_proj = nn.Linear(d_model, d_model)
46
+ self.norm1 = nn.LayerNorm(d_model)
47
+
48
+ # らせんB: 記憶系 (MLP)
49
+ d_ff = d_ff or d_model * 4
50
+ self.mlp = nn.Sequential(
51
+ nn.Linear(d_model, d_ff),
52
+ nn.GELU(),
53
+ nn.Dropout(dropout),
54
+ nn.Linear(d_ff, d_model)
55
+ )
56
+ self.norm2 = nn.LayerNorm(d_model)
57
+ self.dropout = nn.Dropout(dropout)
58
+
59
+ def forward(self, x, cos, sin):
60
+ b, s, d = x.shape
61
+
62
+ # --- らせんA (Attention Resonance) ---
63
+ qkv = self.qkv(x).reshape(b, s, 3, self.n_heads, self.d_head).permute(2, 0, 3, 1, 4)
64
+ q, k, v = qkv[0], qkv[1], qkv[2]
65
+
66
+ q, k = apply_drna_rope(q, k, cos, sin)
67
+
68
+ # Scaled Dot-Product Attention
69
+ attn = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(self.d_head))
70
+ attn = F.softmax(attn, dim=-1)
71
+
72
+ a_out = (attn @ v).transpose(1, 2).reshape(b, s, d)
73
+
74
+ # 収縮統合1
75
+ x = self.norm1(x + self.dropout(self.out_proj(a_out)))
76
+
77
+ # --- らせんB (MLP Resonance) ---
78
+ # 収縮統合2
79
+ x = self.norm2(x + self.dropout(self.mlp(x)))
80
+
81
+ return x
82
+
83
+ class DRNA_Model(nn.Module):
84
+ """汎用 DRNA モデルコンテナ"""
85
+ def __init__(self, vocab_size, d_model=256, n_layers=16, n_heads=8, d_ff=1024):
86
+ super().__init__()
87
+ self.embed = nn.Embedding(vocab_size, d_model)
88
+ self.rope = DRNA_RoPE(d_model // n_heads)
89
+
90
+ self.layers = nn.ModuleList([
91
+ DRNA_Block(d_model, n_heads, d_ff) for _ in range(n_layers)
92
+ ])
93
+
94
+ self.output_head = nn.Linear(d_model, vocab_size)
95
+
96
+ def forward(self, x):
97
+ cos, sin = self.rope(x, x.size(1))
98
+ x = self.embed(x)
99
+
100
+ for layer in self.layers:
101
+ x = layer(x, cos, sin)
102
+
103
+ return self.output_head(x)
104
+
105
+ '''
106
+ 汎用型 D-RNA コード License: Apache License 2.0
107
+ https://github.com/muooon/DRNA
108
+ '''