irishprancer commited on
Commit
5febd74
·
verified ·
1 Parent(s): 46f9f3f

Training in progress, step 5400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce8cb0c1636a06b1e76c546c4b0282f02b71ccad43c283d3d33d43185c64edec
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4eaaf03b78bd99375228f4e3780fd0588fc02582773008769bf7177550d4b48
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70afac3958025bed818f692a236ab6bf6b28db45140796294f475309149762a5
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87baf48f45b21dd7d9a1576417255bf546e8558ecd18cd75468fb9ffa32e54f8
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7de188e422eb0da886da3c865f1df00995a0a219ebff0d43a41d74c3b9d38d5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde3dd12f91204388f748ef22c42d0af6362a11af96ae2767080c430a3556fd7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cc08ef7615af7896731786745ca416272561837649d6bc1ff644d72a48c9b0d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d8a06f6e764a4c806b3b6aa6930ec3c05d14769ecbf5db87f5122a0c04e591e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 228.2608695652174,
5
  "eval_steps": 150,
6
- "global_step": 5250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5082,6 +5082,151 @@
5082
  "EMA_steps_per_second": 25.783,
5083
  "epoch": 228.2608695652174,
5084
  "step": 5250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5085
  }
5086
  ],
5087
  "logging_steps": 10,
@@ -5101,7 +5246,7 @@
5101
  "attributes": {}
5102
  }
5103
  },
5104
- "total_flos": 1.3495580841170534e+17,
5105
  "train_batch_size": 4,
5106
  "trial_name": null,
5107
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 234.7826086956522,
5
  "eval_steps": 150,
6
+ "global_step": 5400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5082
  "EMA_steps_per_second": 25.783,
5083
  "epoch": 228.2608695652174,
5084
  "step": 5250
5085
+ },
5086
+ {
5087
+ "epoch": 228.69565217391303,
5088
+ "grad_norm": 3.2444028854370117,
5089
+ "learning_rate": 1.085552546442246e-07,
5090
+ "loss": 0.2128,
5091
+ "step": 5260
5092
+ },
5093
+ {
5094
+ "epoch": 229.1304347826087,
5095
+ "grad_norm": 2.6189353466033936,
5096
+ "learning_rate": 2.171105092884492e-07,
5097
+ "loss": 0.2259,
5098
+ "step": 5270
5099
+ },
5100
+ {
5101
+ "epoch": 229.56521739130434,
5102
+ "grad_norm": 2.1571950912475586,
5103
+ "learning_rate": 3.2566576393267376e-07,
5104
+ "loss": 0.2638,
5105
+ "step": 5280
5106
+ },
5107
+ {
5108
+ "epoch": 230.0,
5109
+ "grad_norm": 2.62457013130188,
5110
+ "learning_rate": 4.342210185768984e-07,
5111
+ "loss": 0.2062,
5112
+ "step": 5290
5113
+ },
5114
+ {
5115
+ "epoch": 230.43478260869566,
5116
+ "grad_norm": 1.7154628038406372,
5117
+ "learning_rate": 5.42776273221123e-07,
5118
+ "loss": 0.2114,
5119
+ "step": 5300
5120
+ },
5121
+ {
5122
+ "epoch": 230.8695652173913,
5123
+ "grad_norm": 2.018242835998535,
5124
+ "learning_rate": 6.513315278653475e-07,
5125
+ "loss": 0.24,
5126
+ "step": 5310
5127
+ },
5128
+ {
5129
+ "epoch": 231.30434782608697,
5130
+ "grad_norm": 2.380286693572998,
5131
+ "learning_rate": 7.598867825095721e-07,
5132
+ "loss": 0.214,
5133
+ "step": 5320
5134
+ },
5135
+ {
5136
+ "epoch": 231.7391304347826,
5137
+ "grad_norm": 1.5191930532455444,
5138
+ "learning_rate": 8.684420371537968e-07,
5139
+ "loss": 0.2008,
5140
+ "step": 5330
5141
+ },
5142
+ {
5143
+ "epoch": 232.17391304347825,
5144
+ "grad_norm": 2.8159291744232178,
5145
+ "learning_rate": 9.769972917980214e-07,
5146
+ "loss": 0.2521,
5147
+ "step": 5340
5148
+ },
5149
+ {
5150
+ "epoch": 232.6086956521739,
5151
+ "grad_norm": 2.023869752883911,
5152
+ "learning_rate": 1.085552546442246e-06,
5153
+ "loss": 0.2095,
5154
+ "step": 5350
5155
+ },
5156
+ {
5157
+ "epoch": 233.04347826086956,
5158
+ "grad_norm": 1.9053815603256226,
5159
+ "learning_rate": 1.0855524388726596e-06,
5160
+ "loss": 0.2192,
5161
+ "step": 5360
5162
+ },
5163
+ {
5164
+ "epoch": 233.47826086956522,
5165
+ "grad_norm": 2.135075092315674,
5166
+ "learning_rate": 1.0855521161639428e-06,
5167
+ "loss": 0.2301,
5168
+ "step": 5370
5169
+ },
5170
+ {
5171
+ "epoch": 233.91304347826087,
5172
+ "grad_norm": 2.302385091781616,
5173
+ "learning_rate": 1.0855515783162238e-06,
5174
+ "loss": 0.2402,
5175
+ "step": 5380
5176
+ },
5177
+ {
5178
+ "epoch": 234.34782608695653,
5179
+ "grad_norm": 2.3211662769317627,
5180
+ "learning_rate": 1.0855508253297159e-06,
5181
+ "loss": 0.2156,
5182
+ "step": 5390
5183
+ },
5184
+ {
5185
+ "epoch": 234.7826086956522,
5186
+ "grad_norm": 2.5585744380950928,
5187
+ "learning_rate": 1.0855498572047172e-06,
5188
+ "loss": 0.2164,
5189
+ "step": 5400
5190
+ },
5191
+ {
5192
+ "epoch": 234.7826086956522,
5193
+ "eval_loss": 0.9845991134643555,
5194
+ "eval_runtime": 0.3916,
5195
+ "eval_samples_per_second": 25.539,
5196
+ "eval_steps_per_second": 25.539,
5197
+ "step": 5400
5198
+ },
5199
+ {
5200
+ "Start_State_loss": 0.8609819412231445,
5201
+ "Start_State_runtime": 0.3948,
5202
+ "Start_State_samples_per_second": 25.328,
5203
+ "Start_State_steps_per_second": 25.328,
5204
+ "epoch": 234.7826086956522,
5205
+ "step": 5400
5206
+ },
5207
+ {
5208
+ "Raw_Model_loss": 0.9845991134643555,
5209
+ "Raw_Model_runtime": 0.3961,
5210
+ "Raw_Model_samples_per_second": 25.244,
5211
+ "Raw_Model_steps_per_second": 25.244,
5212
+ "epoch": 234.7826086956522,
5213
+ "step": 5400
5214
+ },
5215
+ {
5216
+ "SWA_loss": 0.8302789926528931,
5217
+ "SWA_runtime": 0.3853,
5218
+ "SWA_samples_per_second": 25.953,
5219
+ "SWA_steps_per_second": 25.953,
5220
+ "epoch": 234.7826086956522,
5221
+ "step": 5400
5222
+ },
5223
+ {
5224
+ "EMA_loss": 0.8594452142715454,
5225
+ "EMA_runtime": 0.3912,
5226
+ "EMA_samples_per_second": 25.563,
5227
+ "EMA_steps_per_second": 25.563,
5228
+ "epoch": 234.7826086956522,
5229
+ "step": 5400
5230
  }
5231
  ],
5232
  "logging_steps": 10,
 
5246
  "attributes": {}
5247
  }
5248
  },
5249
+ "total_flos": 1.3884011525792563e+17,
5250
  "train_batch_size": 4,
5251
  "trial_name": null,
5252
  "trial_params": null