Update configuration_neollm.py
Browse files- configuration_neollm.py +1 -11
configuration_neollm.py
CHANGED
|
@@ -279,14 +279,6 @@ class NeoLLMConfig(PretrainedConfig):
|
|
| 279 |
values, such as ``32``, are valid memory-saving variants but are
|
| 280 |
not the full paper-faithful default. Ignored when
|
| 281 |
``use_token_generator=False``.
|
| 282 |
-
generator_khronos_chunk_size (:obj:`int`, *optional*, defaults to 16):
|
| 283 |
-
Number of seed dimensions evaluated at once inside the Leviathan
|
| 284 |
-
KHRONOS tensor product. This does not change the mathematical
|
| 285 |
-
function: it computes the same product by accumulating
|
| 286 |
-
``Ξ£_d log|phi_d|`` over chunks. The default ``16`` avoids
|
| 287 |
-
materializing the full ``[N, d_seed, krank]`` tensor during
|
| 288 |
-
torch.compile/AOTAutograd while preserving the article's separable
|
| 289 |
-
product.
|
| 290 |
use_jtokm (:obj:`bool`, *optional*, defaults to ``False``):
|
| 291 |
Enable the **Leviathan-JTok-M** token-indexed modulation module
|
| 292 |
(Yang et al., 2026; fused with Leviathan geometry).
|
|
@@ -707,14 +699,13 @@ class NeoLLMConfig(PretrainedConfig):
|
|
| 707 |
# ββ Embedding input normalization βββββββββββββββββββββββββββββββββ
|
| 708 |
use_embedding_input_norm=True,
|
| 709 |
# ββ Leviathan continuous token generator ββββββββββββββββββββββββββ
|
| 710 |
-
use_token_generator=
|
| 711 |
generator_d_seed=128,
|
| 712 |
generator_num_modes=8,
|
| 713 |
generator_num_knots=32,
|
| 714 |
generator_spline_degree=2,
|
| 715 |
generator_k=3,
|
| 716 |
generator_krank=64,
|
| 717 |
-
generator_khronos_chunk_size=16,
|
| 718 |
# ββ Leviathan-JTok-M token-indexed modulation βββββββββββββββββββββ
|
| 719 |
use_jtokm=False,
|
| 720 |
jtokm_num_experts=4,
|
|
@@ -1047,7 +1038,6 @@ class NeoLLMConfig(PretrainedConfig):
|
|
| 1047 |
self.generator_spline_degree = generator_spline_degree
|
| 1048 |
self.generator_k = generator_k
|
| 1049 |
self.generator_krank = generator_krank
|
| 1050 |
-
self.generator_khronos_chunk_size = generator_khronos_chunk_size
|
| 1051 |
|
| 1052 |
# ββ Leviathan-JTok-M βββββββββββββββββββββββββββββββββββββββββββββ
|
| 1053 |
self.use_jtokm = use_jtokm
|
|
|
|
| 279 |
values, such as ``32``, are valid memory-saving variants but are
|
| 280 |
not the full paper-faithful default. Ignored when
|
| 281 |
``use_token_generator=False``.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
use_jtokm (:obj:`bool`, *optional*, defaults to ``False``):
|
| 283 |
Enable the **Leviathan-JTok-M** token-indexed modulation module
|
| 284 |
(Yang et al., 2026; fused with Leviathan geometry).
|
|
|
|
| 699 |
# ββ Embedding input normalization βββββββββββββββββββββββββββββββββ
|
| 700 |
use_embedding_input_norm=True,
|
| 701 |
# ββ Leviathan continuous token generator ββββββββββββββββββββββββββ
|
| 702 |
+
use_token_generator=True,
|
| 703 |
generator_d_seed=128,
|
| 704 |
generator_num_modes=8,
|
| 705 |
generator_num_knots=32,
|
| 706 |
generator_spline_degree=2,
|
| 707 |
generator_k=3,
|
| 708 |
generator_krank=64,
|
|
|
|
| 709 |
# ββ Leviathan-JTok-M token-indexed modulation βββββββββββββββββββββ
|
| 710 |
use_jtokm=False,
|
| 711 |
jtokm_num_experts=4,
|
|
|
|
| 1038 |
self.generator_spline_degree = generator_spline_degree
|
| 1039 |
self.generator_k = generator_k
|
| 1040 |
self.generator_krank = generator_krank
|
|
|
|
| 1041 |
|
| 1042 |
# ββ Leviathan-JTok-M βββββββββββββββββββββββββββββββββββββββββββββ
|
| 1043 |
self.use_jtokm = use_jtokm
|