stage1 epoch 20
Browse files- stage1/adapter_model.safetensors +1 -1
- stage1/optimizer.pt +1 -1
- stage1/scheduler.pt +1 -1
- stage1/trainable_token_rows.pt +1 -1
- stage1/training_state.json +13 -13
stage1/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 708349400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f35905a07609ddde747e97be5ceb7e0738908256e38c8d9d7b5857053b31c63e
|
| 3 |
size 708349400
|
stage1/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 166022334
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fa257722d0a73880a55c43953fdd62220df40ef2db2114e13c54958946e1faa
|
| 3 |
size 166022334
|
stage1/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:389cdf6e6e309dd071b9392da85e2833c0cdb6cda76c36033ccfc332178c7427
|
| 3 |
size 1465
|
stage1/trainable_token_rows.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2117389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5063cbda36e57794780b1c292303700227f7cebec47c76cd6aa20eb797413c11
|
| 3 |
size 2117389
|
stage1/training_state.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"stage": "stage1",
|
| 3 |
"label": "latest",
|
| 4 |
-
"epoch_completed":
|
| 5 |
"total_epochs": 20,
|
| 6 |
-
"avg_loss": 2.
|
| 7 |
-
"val_loss": 85.
|
| 8 |
-
"global_step":
|
| 9 |
"trainable_token_count": 514,
|
| 10 |
"peft_meta": {
|
| 11 |
"peft_strategy": "trainable_token_indices",
|
|
@@ -13,18 +13,18 @@
|
|
| 13 |
"lm_head_rows_enabled": false
|
| 14 |
},
|
| 15 |
"stage_summary": {
|
| 16 |
-
"train_loss": 2.
|
| 17 |
-
"train_sentence_loss": 2.
|
| 18 |
"train_replay_loss": 0.0,
|
| 19 |
"train_denoise_loss": 0.0,
|
| 20 |
-
"train_motion_exact_acc": 0.
|
| 21 |
-
"train_motion_near_acc": 0.
|
| 22 |
"replay_step_fraction": 0.0,
|
| 23 |
-
"val_loss": 85.
|
| 24 |
-
"val_motion_exact_acc": 0.
|
| 25 |
-
"val_motion_near_acc": 0.
|
| 26 |
"val_num_samples": 2989,
|
| 27 |
-
"val_worst_group_loss": 85.
|
| 28 |
},
|
| 29 |
-
"saved_at": "2026-04-
|
| 30 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"stage": "stage1",
|
| 3 |
"label": "latest",
|
| 4 |
+
"epoch_completed": 20,
|
| 5 |
"total_epochs": 20,
|
| 6 |
+
"avg_loss": 2.573339675837022,
|
| 7 |
+
"val_loss": 85.64452281274218,
|
| 8 |
+
"global_step": 14960,
|
| 9 |
"trainable_token_count": 514,
|
| 10 |
"peft_meta": {
|
| 11 |
"peft_strategy": "trainable_token_indices",
|
|
|
|
| 13 |
"lm_head_rows_enabled": false
|
| 14 |
},
|
| 15 |
"stage_summary": {
|
| 16 |
+
"train_loss": 2.573339675837022,
|
| 17 |
+
"train_sentence_loss": 2.573339675837022,
|
| 18 |
"train_replay_loss": 0.0,
|
| 19 |
"train_denoise_loss": 0.0,
|
| 20 |
+
"train_motion_exact_acc": 0.15830803552936024,
|
| 21 |
+
"train_motion_near_acc": 0.19017735092048502,
|
| 22 |
"replay_step_fraction": 0.0,
|
| 23 |
+
"val_loss": 85.64452281274218,
|
| 24 |
+
"val_motion_exact_acc": 0.15793188775914851,
|
| 25 |
+
"val_motion_near_acc": 0.19021144298336057,
|
| 26 |
"val_num_samples": 2989,
|
| 27 |
+
"val_worst_group_loss": 85.64452281274218
|
| 28 |
},
|
| 29 |
+
"saved_at": "2026-04-09T12:27:31.167841Z"
|
| 30 |
}
|