Upload 6 files
Browse files- .gitignore +30 -0
- LICENSE +201 -0
- README.md +169 -0
- README_JA.md +160 -0
- bpc_only.png +0 -0
- drna.py +108 -0
.gitignore
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Notepad++ の自動保存フォルダ
|
| 2 |
+
nppBackup/
|
| 3 |
+
*.log
|
| 4 |
+
*.bak
|
| 5 |
+
|
| 6 |
+
# Python関連
|
| 7 |
+
__pycache__/
|
| 8 |
+
*.pyc
|
| 9 |
+
*.pyo
|
| 10 |
+
*.log
|
| 11 |
+
*.bak
|
| 12 |
+
|
| 13 |
+
# 機密情報
|
| 14 |
+
.env
|
| 15 |
+
*.env
|
| 16 |
+
settings_local.py
|
| 17 |
+
|
| 18 |
+
# IDE・エディタ
|
| 19 |
+
.vscode/
|
| 20 |
+
.idea/
|
| 21 |
+
*.sublime-project
|
| 22 |
+
*.sublime-workspace
|
| 23 |
+
|
| 24 |
+
# OSごとの不要ファイル
|
| 25 |
+
.DS_Store
|
| 26 |
+
Thumbs.db
|
| 27 |
+
|
| 28 |
+
# その他一時ファイル
|
| 29 |
+
*~
|
| 30 |
+
*.tmp
|
LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
README.md
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
- ja
|
| 6 |
+
tags:
|
| 7 |
+
- machine-learning
|
| 8 |
+
- deep-learning
|
| 9 |
+
- transformer
|
| 10 |
+
- architecture-design
|
| 11 |
+
- adaptive-algorithms
|
| 12 |
+
- resonant-contraction
|
| 13 |
+
- resonant-projection-field
|
| 14 |
+
---
|
| 15 |
+
# D-RNA:Dual‑Helix Resonance Neural Architecture (DRNA)
|
| 16 |
+
|
| 17 |
+
D-RNA is a new neural architecture centered on a dual helix structure and a rotation field produced by RoPE.
|
| 18 |
+
|
| 19 |
+
In this architecture, Attention and MLP are synchronized into a dual helix, and information is holographically compressed through Resonant Contraction.
|
| 20 |
+
This method rearranges sparse representations into dense ones to achieve high expressiveness using the depth‑direction structure alone, without increasing the number of dimensions.
|
| 21 |
+
A key feature of this approach is its ability to preserve the full connectivity of the Transformer architecture while suppressing catastrophic forgetting and retaining subtle fluctuations and phase information.
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
### Features
|
| 26 |
+
High structural compatibility: It has the exact same input–output shape as a standard Transformer Block, allowing it to be smoothly substituted as the core of an architecture.
|
| 27 |
+
Resonant Contraction: By synchronizing Attention and the MLP in a double‑helix pattern and converging information into a phase field, it dramatically increases representational density.
|
| 28 |
+
Depth as an alternative to dimensionality: The spiral rotation (depth‑wise operations) compensates for limited dimensionality and enables holographic information retention without increasing parameter count.
|
| 29 |
+
Excellent learning efficiency: The spiral‑based information attraction (synchronization) achieves astonishing early convergence with far fewer steps than a Transformer.
|
| 30 |
+
Fine‑grained phase preservation: The rotational field powered by RoPE preserves subtle fluctuations and relative contextual relationships that are often lost in conventional architectures.
|
| 31 |
+
Re‑synchronization of knowledge: Existing weights can be transplanted as initialization and gently adapted to the spiral phase with a low learning rate, allowing existing intelligence to be evolved or overwritten into the D-RNA structure.
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
### Notes
|
| 35 |
+
Optimization of learning rate (LR):
|
| 36 |
+
Because D-RNA synchronizes information extremely quickly through Resonant Contraction, it converges sufficiently — and rapidly — even with a lower learning rate compared to a standard Transformer.
|
| 37 |
+
If the LR is set too high, the resonance may be excessively amplified and cause oscillation, so starting with a modest LR is recommended.
|
| 38 |
+
Synergistic gradient effects:
|
| 39 |
+
Since Attention (recall) and the MLP (memory) are synchronized in a double‑helix sequence, the “settling” of weights from a single update is very strong.
|
| 40 |
+
This is an advantage for fast convergence, but it also means that careful updates are key to stability.
|
| 41 |
+
Parameter commonality:
|
| 42 |
+
Hyperparameters such as weight initialization seeds and batch size can be inherited directly from standard Transformer settings.
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
### Conceptual Diagram
|
| 47 |
+
|
| 48 |
+
```
|
| 49 |
+
Synchronizing “searching” (Attention) and “knowing” (MLP) in the phase of a spiral.
|
| 50 |
+
|
| 51 |
+
RoPE Rotation Field (Phase-Preserving)
|
| 52 |
+
Holographic Compression: Turning Sparse into Dense
|
| 53 |
+
|
| 54 |
+
A M
|
| 55 |
+
\ /
|
| 56 |
+
\ / ← This is Resonance
|
| 57 |
+
/ \ Synchronization occurs naturally through the seed
|
| 58 |
+
/ \ Naturally, meaning emerges through a chain of synchronicities
|
| 59 |
+
A M
|
| 60 |
+
|
| 61 |
+
Repeats in the depth direction to form a dual helix
|
| 62 |
+
(acts as a substitute for increasing dimensionality)
|
| 63 |
+
```
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
### Minimal Block
|
| 67 |
+
|
| 68 |
+
```python
|
| 69 |
+
class ResonantBlock(nn.Module):
|
| 70 |
+
def __init__(self, dim, n_heads):
|
| 71 |
+
super().__init__()
|
| 72 |
+
self.qkv = nn.Linear(dim, dim * 3)
|
| 73 |
+
self.out = nn.Linear(dim, dim)
|
| 74 |
+
self.mlp = MLP(dim)
|
| 75 |
+
self.norm1 = nn.LayerNorm(dim)
|
| 76 |
+
self.norm2 = nn.LayerNorm(dim)
|
| 77 |
+
self.n_heads = n_heads
|
| 78 |
+
self.d_head = dim // n_heads
|
| 79 |
+
|
| 80 |
+
def forward(self, x, cos, sin):
|
| 81 |
+
# --- Attention ---
|
| 82 |
+
q, k, v = project_qkv(x, self.qkv, self.n_heads, self.d_head)
|
| 83 |
+
q, k = apply_rope(q, k, cos, sin)
|
| 84 |
+
attn_out = attention(q, k, v)
|
| 85 |
+
x = self.norm1(x + self.out(attn_out))
|
| 86 |
+
|
| 87 |
+
# --- MLP ---
|
| 88 |
+
x = self.norm2(x + self.mlp(x))
|
| 89 |
+
return x
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
### Example: Replacing a Transformer block with a D-RNA block
|
| 95 |
+
|
| 96 |
+
```python
|
| 97 |
+
class DRNA_ResonantBlock(nn.Module):
|
| 98 |
+
"""
|
| 99 |
+
Replace the existing TransformerBlock with this ResonantBlock.
|
| 100 |
+
I/O: [Batch, Seq, Dim] -> [Batch, Seq, Dim] (Fully compatible)
|
| 101 |
+
"""
|
| 102 |
+
def __init__(self, dim, n_heads, mlp_dim_forward=None):
|
| 103 |
+
super().__init__()
|
| 104 |
+
self.n_heads = n_heads
|
| 105 |
+
self.d_head = dim // n_heads
|
| 106 |
+
|
| 107 |
+
# 1. Spiral Projection Layer (A)
|
| 108 |
+
self.qkv = nn.Linear(dim, dim * 3)
|
| 109 |
+
self.out = nn.Linear(dim, dim)
|
| 110 |
+
|
| 111 |
+
# 2. Spiral Memory Layer (B)
|
| 112 |
+
mlp_dim = mlp_dim_forward if mlp_dim_forward else dim * 4
|
| 113 |
+
self.mlp = nn.Sequential(
|
| 114 |
+
nn.Linear(dim, mlp_dim),
|
| 115 |
+
nn.GELU(),
|
| 116 |
+
nn.Linear(mlp_dim, dim)
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# 3. Normalization layer for compression
|
| 120 |
+
self.norm1 = nn.LayerNorm(dim)
|
| 121 |
+
self.norm2 = nn.LayerNorm(dim)
|
| 122 |
+
|
| 123 |
+
def forward(self, x, cos, sin):
|
| 124 |
+
"""
|
| 125 |
+
Phase information for RoPE as an argument (cos, sin)
|
| 126 |
+
"""
|
| 127 |
+
# Attention:Spiral Projection Layer (A)
|
| 128 |
+
# QKV -> RoPE -> Norm
|
| 129 |
+
q, k, v = project_qkv(x, self.qkv, self.n_heads, self.d_head)
|
| 130 |
+
q, k = apply_rope(q, k, cos, sin)
|
| 131 |
+
|
| 132 |
+
attn_out = attention(q, k, v)
|
| 133 |
+
x = self.norm1(x + self.out(attn_out)) # Synchronization with context
|
| 134 |
+
|
| 135 |
+
# MLP:Spiral Memory Layer (B)
|
| 136 |
+
# MLP -> Norm
|
| 137 |
+
x = self.norm2(x + self.mlp(x)) # Determined by memory
|
| 138 |
+
|
| 139 |
+
return x
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
### Replacement and Utilization of D-RNA
|
| 143 |
+
A direct drop‑in replacement is not possible, but it can be utilized through “redefinition and re‑synchronization.”
|
| 144 |
+
Why it cannot be used as‑is:
|
| 145 |
+
While a standard Transformer stores information using an “absolute address” (absolute position), D-RNA processes information using the “phase of a spiral” (relative position), meaning the coordinate systems are fundamentally different.
|
| 146 |
+
Even if the weights are copied directly, the phases do not align and no resonance occurs.
|
| 147 |
+
How to replace it (implementation):
|
| 148 |
+
The network’s input–output shapes are fully compatible.
|
| 149 |
+
By rewriting the existing layers as ResonantBlock and migrating positional information into RoPE’s rotational field, the core upgrade is complete.
|
| 150 |
+
How to utilize and adapt it (training):
|
| 151 |
+
After transferring the existing model’s weights as initialization, continue training with a low learning rate.
|
| 152 |
+
The previously static knowledge (existing weights) begins to synchronize with the spiral rotation, gradually blending into D-RNA’s “Resonant Contraction” process and evolving beyond the original performance.
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
BPC Comparison Chart
|
| 157 |
+
|
| 158 |
+
<img width="800" alt="bpc_only" src="bpc_only.png" />
|
| 159 |
+
|
| 160 |
+
---
|
| 161 |
+
|
| 162 |
+
License:
|
| 163 |
+
This project is licensed under the Apache License 2.0. (See the LICENSE for details).
|
| 164 |
+
|
| 165 |
+
#### Acknowledgments:
|
| 166 |
+
This work builds upon the foundation established by the Transformer architecture.
|
| 167 |
+
I would like to express my gratitude to the researchers and open-source communities
|
| 168 |
+
whose contributions to attention mechanisms, positional encoding, and large-scale
|
| 169 |
+
model design made this work possible.
|
README_JA.md
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
- ja
|
| 6 |
+
tags:
|
| 7 |
+
- machine-learning
|
| 8 |
+
- deep-learning
|
| 9 |
+
- transformer
|
| 10 |
+
- architecture-design
|
| 11 |
+
- adaptive-algorithms
|
| 12 |
+
- resonant-contraction
|
| 13 |
+
- resonant-projection-field
|
| 14 |
+
---
|
| 15 |
+
# D‑RNA:Dual‑Helix Resonance Neural Architecture (DRNA)
|
| 16 |
+
|
| 17 |
+
D-RNA は、二重らせん(Dual‑Helix)構造と RoPE による回転場を中核に据えた新しいニューラルアーキテクチャです
|
| 18 |
+
|
| 19 |
+
本アーキテクチャでは、Attention と MLP を二重らせんとして同期させ、共鳴収縮(Resonant Contraction)により情報をホログラフィックに圧縮します
|
| 20 |
+
これは 疎な表現を密に再配置 し、次元を増やすことなく、深さ方向の構造だけで高い表現力を獲得します
|
| 21 |
+
Transformer の全接続性を維持しつつ、破壊的忘却を抑制し、微細なゆらぎや位相情報を保持できる点が特徴です
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
### D-RNA の主な特徴
|
| 26 |
+
|
| 27 |
+
高い構造的互換性:標準的な Transformer Block と入出力形状が同一であり、アーキテクチャの心臓部としてスムーズな置き換えが可能です。
|
| 28 |
+
共鳴収縮 (Resonant Contraction):Attention と MLP を二重らせん状に同期させ、情報を位相場に収束させることで、表現密度を劇的に向上させます。
|
| 29 |
+
次元の代替としての深さ:螺旋の回転(深さ方向の演算)が次元の不足を補い、パラメータ数を増やさずホログラフィックな情報保持を実現します。
|
| 30 |
+
優れた学習効率:螺旋構造による情報の引き寄せ(同期)により、Transformer よりも極めて少ないステップ数で驚異的な早期収束を達成します。
|
| 31 |
+
微細な位相保持:RoPE を活用した回転場により、従来の構造では失われがちな微細なゆらぎや文脈の相対関係を精度高く保持します。
|
| 32 |
+
知識の再同調が可能:既存の重みを初期値として移植し、低学習率で螺旋の位相に馴染ませることで、既存知能を D-RNA 構造へ進化・上書きできます。
|
| 33 |
+
|
| 34 |
+
### ご注意ください
|
| 35 |
+
|
| 36 |
+
学習率(LR)の最適化:D-RNA は共鳴収縮により情報の同期が極めて速いため、標準的な Transformer よりも低い学習率で十分に、かつ高速に収束します。 設定が高すぎると、共鳴が過剰に増幅され振動を招く可能性があるため、控えめな LR からの開始を推奨します。
|
| 37 |
+
勾配の相乗効果:Attention(回想)と MLP(記憶)が二重らせん状に直列同期しているため、一回の更新による「重みのなじみ」が非常に強く働きます。 これは高速収束の利点であると同時に、慎重な更新が安定化の鍵であることを意味します。
|
| 38 |
+
パラメータの共通性:重みの初期化シードやバッチサイズなどのハイパーパラメータは、通常の Transformer 設定をそのまま継承できます。
|
| 39 |
+
|
| 40 |
+
---
|
| 41 |
+
|
| 42 |
+
### 概念図(Conceptual Diagram)
|
| 43 |
+
```
|
| 44 |
+
「探す」(Attention)、「知っている」(MLP)この2つを螺旋の位相で同期させる
|
| 45 |
+
|
| 46 |
+
RoPE の回転場 (位相保持)
|
| 47 |
+
疎を密にするホログラフィック圧縮
|
| 48 |
+
|
| 49 |
+
A M
|
| 50 |
+
\ /
|
| 51 |
+
\ / ← ここが共鳴(Resonance)
|
| 52 |
+
/ \ seed により自然に同期が生まれる
|
| 53 |
+
/ \ 同期の連鎖で意味などを自然に引き寄せる
|
| 54 |
+
A M
|
| 55 |
+
|
| 56 |
+
深さ方向へ繰り返すことで二重らせんを形成
|
| 57 |
+
(次元の代替として機能)
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
### 最小コード(Minimal Block)
|
| 63 |
+
|
| 64 |
+
```python
|
| 65 |
+
class ResonantBlock(nn.Module):
|
| 66 |
+
def __init__(self, dim, n_heads):
|
| 67 |
+
super().__init__()
|
| 68 |
+
self.qkv = nn.Linear(dim, dim * 3)
|
| 69 |
+
self.out = nn.Linear(dim, dim)
|
| 70 |
+
self.mlp = MLP(dim)
|
| 71 |
+
self.norm1 = nn.LayerNorm(dim)
|
| 72 |
+
self.norm2 = nn.LayerNorm(dim)
|
| 73 |
+
self.n_heads = n_heads
|
| 74 |
+
self.d_head = dim // n_heads
|
| 75 |
+
|
| 76 |
+
def forward(self, x, cos, sin):
|
| 77 |
+
# --- Attention ---
|
| 78 |
+
q, k, v = project_qkv(x, self.qkv, self.n_heads, self.d_head)
|
| 79 |
+
q, k = apply_rope(q, k, cos, sin)
|
| 80 |
+
attn_out = attention(q, k, v)
|
| 81 |
+
x = self.norm1(x + self.out(attn_out))
|
| 82 |
+
|
| 83 |
+
# --- MLP ---
|
| 84 |
+
x = self.norm2(x + self.mlp(x))
|
| 85 |
+
return x
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
---
|
| 89 |
+
|
| 90 |
+
### 例:Transformer のブロックを DRNA ブロックに置き換える
|
| 91 |
+
|
| 92 |
+
```python
|
| 93 |
+
class DRNA_ResonantBlock(nn.Module):
|
| 94 |
+
"""
|
| 95 |
+
既存の TransformerBlock をこの ResonantBlock に置き換える
|
| 96 |
+
I/O: [Batch, Seq, Dim] -> [Batch, Seq, Dim] (完全互換)
|
| 97 |
+
"""
|
| 98 |
+
def __init__(self, dim, n_heads, mlp_dim_forward=None):
|
| 99 |
+
super().__init__()
|
| 100 |
+
self.n_heads = n_heads
|
| 101 |
+
self.d_head = dim // n_heads
|
| 102 |
+
|
| 103 |
+
# 1. 螺旋の射影層 (らせんA)
|
| 104 |
+
self.qkv = nn.Linear(dim, dim * 3)
|
| 105 |
+
self.out = nn.Linear(dim, dim)
|
| 106 |
+
|
| 107 |
+
# 2. 記���の層 (らせんB)
|
| 108 |
+
mlp_dim = mlp_dim_forward if mlp_dim_forward else dim * 4
|
| 109 |
+
self.mlp = nn.Sequential(
|
| 110 |
+
nn.Linear(dim, mlp_dim),
|
| 111 |
+
nn.GELU(),
|
| 112 |
+
nn.Linear(mlp_dim, dim)
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# 3. 収縮のための正規化層
|
| 116 |
+
self.norm1 = nn.LayerNorm(dim)
|
| 117 |
+
self.norm2 = nn.LayerNorm(dim)
|
| 118 |
+
|
| 119 |
+
def forward(self, x, cos, sin):
|
| 120 |
+
"""
|
| 121 |
+
引数に RoPE 用の位相情報 (cos, sin) を追加するのが唯一の違い
|
| 122 |
+
"""
|
| 123 |
+
# --- らせんA: 文脈の共鳴 (Attention) ---
|
| 124 |
+
# QKV 抽出 -> RoPE 回転 -> 収縮(Norm)
|
| 125 |
+
q, k, v = project_qkv(x, self.qkv, self.n_heads, self.d_head)
|
| 126 |
+
q, k = apply_rope(q, k, cos, sin)
|
| 127 |
+
|
| 128 |
+
attn_out = attention(q, k, v)
|
| 129 |
+
x = self.norm1(x + self.out(attn_out)) # 文脈との同期
|
| 130 |
+
|
| 131 |
+
# --- らせんB: 記憶の共鳴 (MLP) ---
|
| 132 |
+
# 知識の照会 -> 収縮(Norm)
|
| 133 |
+
x = self.norm2(x + self.mlp(x)) # 記憶による確定
|
| 134 |
+
|
| 135 |
+
return x
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
### D-RNAへの置き換えと活用について
|
| 139 |
+
そのまま置換(Drop-in replacement)はできませんが「再定義と再同調」による活用は可能です
|
| 140 |
+
なぜそのままではダメなのか・・・
|
| 141 |
+
標準的なTransformerが「絶対的な住所」(絶対位置)で記憶しているのに対し、D-RNAは「螺旋の位相」(相対位置)で情報を処理するため、座標系が根本から異なります。 重みをそのままコピーしても、位相が合わず共鳴が起きません。
|
| 142 |
+
どう置き換えるのか(実装)
|
| 143 |
+
ネットワークの入出力形状は完全互換です。 既存の層を ResonantBlock に書き換え、位置情報を RoPE の回転場へ移行するだけで、心臓部のアップグレードが完了します。
|
| 144 |
+
どう活用し、なじませるのか(学習)
|
| 145 |
+
既存モデルの重みを初期値として転写(Transfer)した後、低学習率で継続学習を行います。 止まっていた知識(既存の重み)が螺旋の回転に同期し始め、次第にD-RNAの「共鳴収縮」のプロセスへ溶け込み、元の性能を超えて進化します。
|
| 146 |
+
|
| 147 |
+
---
|
| 148 |
+
|
| 149 |
+
BPC 比較図
|
| 150 |
+
|
| 151 |
+
<img width="800" alt="bpc_only" src="bpc_only.png" />
|
| 152 |
+
|
| 153 |
+
---
|
| 154 |
+
|
| 155 |
+
ライセンス:
|
| 156 |
+
本プロジェクトは Apache License 2.0 の下で公開されています(詳細は LICENSE をご覧ください)
|
| 157 |
+
|
| 158 |
+
### 謝辞:
|
| 159 |
+
本研究は Transformer アーキテクチャによって築かれた基盤の上に成り立っています
|
| 160 |
+
Attention 機構、位置エンコーディング、大規模モデル設計に関する研究とオープンソースコミュニティの貢献に深く感謝いたします
|
bpc_only.png
ADDED
|
drna.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
import math
|
| 5 |
+
|
| 6 |
+
'''
|
| 7 |
+
D‑RNA: Dual‑Helix Resonance Neural Architecture (DRNA) 260420
|
| 8 |
+
Transformerの全接続性を継承しつつ、二重らせん(Dual-Helix)構造による
|
| 9 |
+
「共鳴収縮」(Resonant Contraction)を物理的に再現したニューラルアーキテクチャです
|
| 10 |
+
螺旋の同期: Attention(文脈の回想)とMLP(知識の定着)を直列に配置し、情報を一段ずつ絞り込む
|
| 11 |
+
位相の保持: RoPE(Rotary Positional Embedding)を回転場として利用し、相対位置を保つ
|
| 12 |
+
高密度圧縮: Post-Norm構造により、各らせんの出力直後に情報を収縮させ、意味を確定させる
|
| 13 |
+
'''
|
| 14 |
+
|
| 15 |
+
class DRNA_RoPE(nn.Module):
|
| 16 |
+
"""二重らせんの位相を決定する回転場"""
|
| 17 |
+
def __init__(self, d_model, base=10000):
|
| 18 |
+
super().__init__()
|
| 19 |
+
inv_freq = 1.0 / (base ** (torch.arange(0, d_model, 2).float() / d_model))
|
| 20 |
+
self.register_buffer("inv_freq", inv_freq)
|
| 21 |
+
|
| 22 |
+
def forward(self, x, seq_len):
|
| 23 |
+
t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq)
|
| 24 |
+
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
|
| 25 |
+
emb = torch.cat((freqs, freqs), dim=-1)
|
| 26 |
+
return emb.cos()[None, :, :], emb.sin()[None, :, :]
|
| 27 |
+
|
| 28 |
+
def apply_drna_rope(q, k, cos, sin):
|
| 29 |
+
"""位相回転の適用"""
|
| 30 |
+
def rotate_half(x):
|
| 31 |
+
x1, x2 = x.chunk(2, dim=-1)
|
| 32 |
+
return torch.cat((-x2, x1), dim=-1)
|
| 33 |
+
cos, sin = cos[:, None, :, :], sin[:, None, :, :]
|
| 34 |
+
return (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin)
|
| 35 |
+
|
| 36 |
+
class DRNA_Block(nn.Module):
|
| 37 |
+
"""DRNA共鳴ブロック:元設計に忠実な直列共鳴構造"""
|
| 38 |
+
def __init__(self, d_model, n_heads, d_ff=None, dropout=0.1):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.n_heads = n_heads
|
| 41 |
+
self.d_head = d_model // n_heads
|
| 42 |
+
|
| 43 |
+
# らせんA: 回想系 (Attention)
|
| 44 |
+
self.qkv = nn.Linear(d_model, d_model * 3)
|
| 45 |
+
self.out_proj = nn.Linear(d_model, d_model)
|
| 46 |
+
self.norm1 = nn.LayerNorm(d_model)
|
| 47 |
+
|
| 48 |
+
# らせんB: 記憶系 (MLP)
|
| 49 |
+
d_ff = d_ff or d_model * 4
|
| 50 |
+
self.mlp = nn.Sequential(
|
| 51 |
+
nn.Linear(d_model, d_ff),
|
| 52 |
+
nn.GELU(),
|
| 53 |
+
nn.Dropout(dropout),
|
| 54 |
+
nn.Linear(d_ff, d_model)
|
| 55 |
+
)
|
| 56 |
+
self.norm2 = nn.LayerNorm(d_model)
|
| 57 |
+
self.dropout = nn.Dropout(dropout)
|
| 58 |
+
|
| 59 |
+
def forward(self, x, cos, sin):
|
| 60 |
+
b, s, d = x.shape
|
| 61 |
+
|
| 62 |
+
# --- らせんA (Attention Resonance) ---
|
| 63 |
+
qkv = self.qkv(x).reshape(b, s, 3, self.n_heads, self.d_head).permute(2, 0, 3, 1, 4)
|
| 64 |
+
q, k, v = qkv[0], qkv[1], qkv[2]
|
| 65 |
+
|
| 66 |
+
q, k = apply_drna_rope(q, k, cos, sin)
|
| 67 |
+
|
| 68 |
+
# Scaled Dot-Product Attention
|
| 69 |
+
attn = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(self.d_head))
|
| 70 |
+
attn = F.softmax(attn, dim=-1)
|
| 71 |
+
|
| 72 |
+
a_out = (attn @ v).transpose(1, 2).reshape(b, s, d)
|
| 73 |
+
|
| 74 |
+
# 収縮統合1
|
| 75 |
+
x = self.norm1(x + self.dropout(self.out_proj(a_out)))
|
| 76 |
+
|
| 77 |
+
# --- らせんB (MLP Resonance) ---
|
| 78 |
+
# 収縮統合2
|
| 79 |
+
x = self.norm2(x + self.dropout(self.mlp(x)))
|
| 80 |
+
|
| 81 |
+
return x
|
| 82 |
+
|
| 83 |
+
class DRNA_Model(nn.Module):
|
| 84 |
+
"""汎用 DRNA モデルコンテナ"""
|
| 85 |
+
def __init__(self, vocab_size, d_model=256, n_layers=16, n_heads=8, d_ff=1024):
|
| 86 |
+
super().__init__()
|
| 87 |
+
self.embed = nn.Embedding(vocab_size, d_model)
|
| 88 |
+
self.rope = DRNA_RoPE(d_model // n_heads)
|
| 89 |
+
|
| 90 |
+
self.layers = nn.ModuleList([
|
| 91 |
+
DRNA_Block(d_model, n_heads, d_ff) for _ in range(n_layers)
|
| 92 |
+
])
|
| 93 |
+
|
| 94 |
+
self.output_head = nn.Linear(d_model, vocab_size)
|
| 95 |
+
|
| 96 |
+
def forward(self, x):
|
| 97 |
+
cos, sin = self.rope(x, x.size(1))
|
| 98 |
+
x = self.embed(x)
|
| 99 |
+
|
| 100 |
+
for layer in self.layers:
|
| 101 |
+
x = layer(x, cos, sin)
|
| 102 |
+
|
| 103 |
+
return self.output_head(x)
|
| 104 |
+
|
| 105 |
+
'''
|
| 106 |
+
汎用型 D-RNA コード License: Apache License 2.0
|
| 107 |
+
https://github.com/muooon/DRNA
|
| 108 |
+
'''
|