rdz-falcon commited on
Commit
d63e502
·
verified ·
1 Parent(s): 9320e8d

stage1 epoch 20

Browse files
stage1/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b46e5aa585153e1d941e65ee5f2dc1fc9d75cad7feefb94ec68bab07f03f84a5
3
  size 708349400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f35905a07609ddde747e97be5ceb7e0738908256e38c8d9d7b5857053b31c63e
3
  size 708349400
stage1/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73ac1ab28dac38b8c4319611ff2f731346a68b381e4e51867c91676e4c25b64d
3
  size 166022334
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fa257722d0a73880a55c43953fdd62220df40ef2db2114e13c54958946e1faa
3
  size 166022334
stage1/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9efe520d4f3a74273c6f8fdaaeb61eef641b06de86d486636ab10996ac09631f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389cdf6e6e309dd071b9392da85e2833c0cdb6cda76c36033ccfc332178c7427
3
  size 1465
stage1/trainable_token_rows.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30cdf5c0b358cb518f1d7f6a7e8bdff0102ff543c560fd7e975dffe90e80a303
3
  size 2117389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5063cbda36e57794780b1c292303700227f7cebec47c76cd6aa20eb797413c11
3
  size 2117389
stage1/training_state.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "stage": "stage1",
3
  "label": "latest",
4
- "epoch_completed": 18,
5
  "total_epochs": 20,
6
- "avg_loss": 2.5752096784943563,
7
- "val_loss": 85.74403594353001,
8
- "global_step": 13464,
9
  "trainable_token_count": 514,
10
  "peft_meta": {
11
  "peft_strategy": "trainable_token_indices",
@@ -13,18 +13,18 @@
13
  "lm_head_rows_enabled": false
14
  },
15
  "stage_summary": {
16
- "train_loss": 2.5752096784943563,
17
- "train_sentence_loss": 2.5752096784943563,
18
  "train_replay_loss": 0.0,
19
  "train_denoise_loss": 0.0,
20
- "train_motion_exact_acc": 0.15785435171541795,
21
- "train_motion_near_acc": 0.18983807432918903,
22
  "replay_step_fraction": 0.0,
23
- "val_loss": 85.74403594353001,
24
- "val_motion_exact_acc": 0.15781292133940328,
25
- "val_motion_near_acc": 0.19072696413558998,
26
  "val_num_samples": 2989,
27
- "val_worst_group_loss": 85.74403594353001
28
  },
29
- "saved_at": "2026-04-09T11:20:27.435527Z"
30
  }
 
1
  {
2
  "stage": "stage1",
3
  "label": "latest",
4
+ "epoch_completed": 20,
5
  "total_epochs": 20,
6
+ "avg_loss": 2.573339675837022,
7
+ "val_loss": 85.64452281274218,
8
+ "global_step": 14960,
9
  "trainable_token_count": 514,
10
  "peft_meta": {
11
  "peft_strategy": "trainable_token_indices",
 
13
  "lm_head_rows_enabled": false
14
  },
15
  "stage_summary": {
16
+ "train_loss": 2.573339675837022,
17
+ "train_sentence_loss": 2.573339675837022,
18
  "train_replay_loss": 0.0,
19
  "train_denoise_loss": 0.0,
20
+ "train_motion_exact_acc": 0.15830803552936024,
21
+ "train_motion_near_acc": 0.19017735092048502,
22
  "replay_step_fraction": 0.0,
23
+ "val_loss": 85.64452281274218,
24
+ "val_motion_exact_acc": 0.15793188775914851,
25
+ "val_motion_near_acc": 0.19021144298336057,
26
  "val_num_samples": 2989,
27
+ "val_worst_group_loss": 85.64452281274218
28
  },
29
+ "saved_at": "2026-04-09T12:27:31.167841Z"
30
  }