error577 commited on
Commit
2379bfb
·
verified ·
1 Parent(s): c4c171b

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:691f994acfc59c7aca800d00d8b8e367902fb73123ddc0b51f8e84f579ecf237
3
  size 528526760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff35e84e01f1386ebac587ae0cb8f62443ce9bfbcaf664da61c9fb02006570c0
3
  size 528526760
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9657073a58fefe7c01ef61fb07920f3088ef5ed4b3bf0ebb5157b21f93e50935
3
  size 141172038
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e22ca59118b10b5eb52fb341cd120c1968f04534206e934f766d9a6e8dec217
3
  size 141172038
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c45f92507a90bf7fbfb993e80ae13367813ee0eb21f64eb3e7039d42171134db
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3048ecfad89600ff1c80bedbf89688a52e92cfd26e80e4cb6be8f6aab7986d5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4609325e348c0ce76168f551d26f091fd1a57160aa92a5ffe7283ff60aa63ba8
3
  size 2080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53869905448aa5dcb85f704ccd358f2990095cda7a0cb40de61a3fe333d56cc8
3
  size 2080
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.450434923171997,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1200",
4
- "epoch": 0.05431567104101803,
5
  "eval_steps": 100,
6
- "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1107,6 +1107,84 @@
1107
  "eval_samples_per_second": 4.015,
1108
  "eval_steps_per_second": 2.017,
1109
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1110
  }
1111
  ],
1112
  "logging_steps": 10,
@@ -1121,7 +1199,7 @@
1121
  "early_stopping_threshold": 0.0
1122
  },
1123
  "attributes": {
1124
- "early_stopping_patience_counter": 2
1125
  }
1126
  },
1127
  "TrainerControl": {
@@ -1130,12 +1208,12 @@
1130
  "should_evaluate": false,
1131
  "should_log": false,
1132
  "should_save": true,
1133
- "should_training_stop": false
1134
  },
1135
  "attributes": {}
1136
  }
1137
  },
1138
- "total_flos": 2.0812918834222694e+17,
1139
  "train_batch_size": 2,
1140
  "trial_name": null,
1141
  "trial_params": null
 
1
  {
2
  "best_metric": 2.450434923171997,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1200",
4
+ "epoch": 0.05819536182966217,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1107
  "eval_samples_per_second": 4.015,
1108
  "eval_steps_per_second": 2.017,
1109
  "step": 1400
1110
+ },
1111
+ {
1112
+ "epoch": 0.05470364011988244,
1113
+ "grad_norm": 4.387486934661865,
1114
+ "learning_rate": 0.00019984273239970207,
1115
+ "loss": 10.1819,
1116
+ "step": 1410
1117
+ },
1118
+ {
1119
+ "epoch": 0.05509160919874686,
1120
+ "grad_norm": 5.688685894012451,
1121
+ "learning_rate": 0.00019984044774901122,
1122
+ "loss": 8.8118,
1123
+ "step": 1420
1124
+ },
1125
+ {
1126
+ "epoch": 0.055479578277611276,
1127
+ "grad_norm": 5.5436835289001465,
1128
+ "learning_rate": 0.00019983814854640514,
1129
+ "loss": 9.8809,
1130
+ "step": 1430
1131
+ },
1132
+ {
1133
+ "epoch": 0.055867547356475686,
1134
+ "grad_norm": 6.585811614990234,
1135
+ "learning_rate": 0.00019983583479188383,
1136
+ "loss": 9.4094,
1137
+ "step": 1440
1138
+ },
1139
+ {
1140
+ "epoch": 0.0562555164353401,
1141
+ "grad_norm": 9.564101219177246,
1142
+ "learning_rate": 0.00019983349193353206,
1143
+ "loss": 10.8473,
1144
+ "step": 1450
1145
+ },
1146
+ {
1147
+ "epoch": 0.05664348551420452,
1148
+ "grad_norm": 3.8079373836517334,
1149
+ "learning_rate": 0.00019983113452326506,
1150
+ "loss": 9.3833,
1151
+ "step": 1460
1152
+ },
1153
+ {
1154
+ "epoch": 0.05703145459306893,
1155
+ "grad_norm": 4.716315269470215,
1156
+ "learning_rate": 0.00019982877711299807,
1157
+ "loss": 9.0148,
1158
+ "step": 1470
1159
+ },
1160
+ {
1161
+ "epoch": 0.057419423671933346,
1162
+ "grad_norm": 5.183149337768555,
1163
+ "learning_rate": 0.0001998263760469854,
1164
+ "loss": 10.1636,
1165
+ "step": 1480
1166
+ },
1167
+ {
1168
+ "epoch": 0.05780739275079776,
1169
+ "grad_norm": 7.416577339172363,
1170
+ "learning_rate": 0.00019982398953288794,
1171
+ "loss": 10.1869,
1172
+ "step": 1490
1173
+ },
1174
+ {
1175
+ "epoch": 0.05819536182966217,
1176
+ "grad_norm": 10.7798433303833,
1177
+ "learning_rate": 0.0001998215593630448,
1178
+ "loss": 10.5461,
1179
+ "step": 1500
1180
+ },
1181
+ {
1182
+ "epoch": 0.05819536182966217,
1183
+ "eval_loss": 2.462498426437378,
1184
+ "eval_runtime": 51.1983,
1185
+ "eval_samples_per_second": 4.043,
1186
+ "eval_steps_per_second": 2.031,
1187
+ "step": 1500
1188
  }
1189
  ],
1190
  "logging_steps": 10,
 
1199
  "early_stopping_threshold": 0.0
1200
  },
1201
  "attributes": {
1202
+ "early_stopping_patience_counter": 3
1203
  }
1204
  },
1205
  "TrainerControl": {
 
1208
  "should_evaluate": false,
1209
  "should_log": false,
1210
  "should_save": true,
1211
+ "should_training_stop": true
1212
  },
1213
  "attributes": {}
1214
  }
1215
  },
1216
+ "total_flos": 2.2296408461785498e+17,
1217
  "train_batch_size": 2,
1218
  "trial_name": null,
1219
  "trial_params": null