sergioalves commited on
Commit
bc448c5
·
verified ·
1 Parent(s): c83b1b1

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:498b3687ee450e2eff26e720212ce6d52064c73b38c0fec60caaedc096c40552
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daacd75a106553a8fdc57b058edd252f244815f468339f9e09bae292c750a813
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:037a6f497e86b65b8c8b2d14930e65bad5f3ff7a37e25366a8385d20d78a3de5
3
  size 335810490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07719814aa9994e539f7d4094fab8e942ed9edc78425140c14daf38196388aa3
3
  size 335810490
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f1c23e1e5fb41aca194dd42c51b4d1ef7c5b2c45f66230306d2c0a25ec25f6c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74e1d166bd94c8d5feb6a09f0334c405dc805c0289ac58657df651915eaf6e15
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46d68217ddf3e899bf1495ee33b9ec14e2c91ed912ffcb460a7bb01192b04b7d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e9cc72c20ddd925ef39b6005e82a4d8730b1dde32cfcd070d74c83a8a3564a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2420746088027954,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-20",
4
- "epoch": 0.004927322000492732,
5
  "eval_steps": 5,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -89,6 +89,50 @@
89
  "eval_samples_per_second": 6.133,
90
  "eval_steps_per_second": 3.068,
91
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  }
93
  ],
94
  "logging_steps": 3,
@@ -112,12 +156,12 @@
112
  "should_evaluate": false,
113
  "should_log": false,
114
  "should_save": true,
115
- "should_training_stop": false
116
  },
117
  "attributes": {}
118
  }
119
  },
120
- "total_flos": 9459062865395712.0,
121
  "train_batch_size": 2,
122
  "trial_name": null,
123
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.224576473236084,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-30",
4
+ "epoch": 0.007390983000739098,
5
  "eval_steps": 5,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
89
  "eval_samples_per_second": 6.133,
90
  "eval_steps_per_second": 3.068,
91
  "step": 20
92
+ },
93
+ {
94
+ "epoch": 0.005173688100517369,
95
+ "grad_norm": 0.4509369730949402,
96
+ "learning_rate": 4.2178276747988446e-05,
97
+ "loss": 1.15,
98
+ "step": 21
99
+ },
100
+ {
101
+ "epoch": 0.005912786400591279,
102
+ "grad_norm": 0.4589945375919342,
103
+ "learning_rate": 2.061073738537635e-05,
104
+ "loss": 1.2316,
105
+ "step": 24
106
+ },
107
+ {
108
+ "epoch": 0.006159152500615915,
109
+ "eval_loss": 1.2277593612670898,
110
+ "eval_runtime": 278.7654,
111
+ "eval_samples_per_second": 6.131,
112
+ "eval_steps_per_second": 3.067,
113
+ "step": 25
114
+ },
115
+ {
116
+ "epoch": 0.0066518847006651885,
117
+ "grad_norm": 0.428680419921875,
118
+ "learning_rate": 5.449673790581611e-06,
119
+ "loss": 1.2175,
120
+ "step": 27
121
+ },
122
+ {
123
+ "epoch": 0.007390983000739098,
124
+ "grad_norm": 0.5584115386009216,
125
+ "learning_rate": 0.0,
126
+ "loss": 1.1603,
127
+ "step": 30
128
+ },
129
+ {
130
+ "epoch": 0.007390983000739098,
131
+ "eval_loss": 1.224576473236084,
132
+ "eval_runtime": 278.8171,
133
+ "eval_samples_per_second": 6.129,
134
+ "eval_steps_per_second": 3.067,
135
+ "step": 30
136
  }
137
  ],
138
  "logging_steps": 3,
 
156
  "should_evaluate": false,
157
  "should_log": false,
158
  "should_save": true,
159
+ "should_training_stop": true
160
  },
161
  "attributes": {}
162
  }
163
  },
164
+ "total_flos": 1.437406611898368e+16,
165
  "train_batch_size": 2,
166
  "trial_name": null,
167
  "trial_params": null