Epoch 0 - Val loss -0.9678
Browse files- config.json +2 -1
- model.safetensors +2 -2
config.json
CHANGED
|
@@ -25,7 +25,8 @@
|
|
| 25 |
"residual_gate_slot_status_type": "mean",
|
| 26 |
"residual_gate_type": "elementwise",
|
| 27 |
"residual_per_slot_gate": true,
|
| 28 |
-
"
|
|
|
|
| 29 |
"stm_size": 4096,
|
| 30 |
"use_flash_attention": true,
|
| 31 |
"use_gated_residual": true,
|
|
|
|
| 25 |
"residual_gate_slot_status_type": "mean",
|
| 26 |
"residual_gate_type": "elementwise",
|
| 27 |
"residual_per_slot_gate": true,
|
| 28 |
+
"rope_base": 100000,
|
| 29 |
+
"seq_len": 8192,
|
| 30 |
"stm_size": 4096,
|
| 31 |
"use_flash_attention": true,
|
| 32 |
"use_gated_residual": true,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d1fac900a7c9256fbc94994934bb0ead9487bf4fbf0fb572fd54027cd04e7b8
|
| 3 |
+
size 133048240
|