irishprancer commited on
Commit
9931e6f
·
verified ·
1 Parent(s): 793c2fb

Training in progress, step 4350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:880fd56a3f8de1ea8da94daf1c2a4ae51100a00332912dd6360718788d991f3e
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21408a8c0f804aa69ccdc098361ffb0c38ee5eae2c774310be1c3d5f3d0c89cb
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b0f4a093ed328add3e959161df7d638c8c11def0b0aa41283d1549f7fc07bb9
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bcaa1a30846eab83cb0fb8aeeb387ec463414d9ba20d28e1e9aa81c65bf4680
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8fde804448728c0f3c5740097a588b9bc938edec8f5ff4ab3791e696a0e04dd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a9ea00016d252bf419fc0794eade190f54eb50118e2e0be5b9c332ef3c36fc0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65b74f49a3daa98af42d6e544ec1a2f4a5627b7dc9aa14dada3f91ea7451360b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e7ae917a4132ef2fbbbdadfebce9aa687102db21112c728e0ebfe527b807e8a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 182.6086956521739,
5
  "eval_steps": 150,
6
- "global_step": 4200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4067,6 +4067,151 @@
4067
  "EMA_steps_per_second": 19.558,
4068
  "epoch": 182.6086956521739,
4069
  "step": 4200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4070
  }
4071
  ],
4072
  "logging_steps": 10,
@@ -4086,7 +4231,7 @@
4086
  "attributes": {}
4087
  }
4088
  },
4089
- "total_flos": 1.0822098760143667e+17,
4090
  "train_batch_size": 4,
4091
  "trial_name": null,
4092
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 189.1304347826087,
5
  "eval_steps": 150,
6
+ "global_step": 4350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4067
  "EMA_steps_per_second": 19.558,
4068
  "epoch": 182.6086956521739,
4069
  "step": 4200
4070
+ },
4071
+ {
4072
+ "epoch": 183.04347826086956,
4073
+ "grad_norm": 1.614182472229004,
4074
+ "learning_rate": 1.5299748734455397e-07,
4075
+ "loss": 0.211,
4076
+ "step": 4210
4077
+ },
4078
+ {
4079
+ "epoch": 183.47826086956522,
4080
+ "grad_norm": 1.6402850151062012,
4081
+ "learning_rate": 3.0599497468910794e-07,
4082
+ "loss": 0.2423,
4083
+ "step": 4220
4084
+ },
4085
+ {
4086
+ "epoch": 183.91304347826087,
4087
+ "grad_norm": 2.002448558807373,
4088
+ "learning_rate": 4.589924620336618e-07,
4089
+ "loss": 0.209,
4090
+ "step": 4230
4091
+ },
4092
+ {
4093
+ "epoch": 184.34782608695653,
4094
+ "grad_norm": 2.5236711502075195,
4095
+ "learning_rate": 6.119899493782159e-07,
4096
+ "loss": 0.2611,
4097
+ "step": 4240
4098
+ },
4099
+ {
4100
+ "epoch": 184.7826086956522,
4101
+ "grad_norm": 1.6767892837524414,
4102
+ "learning_rate": 7.649874367227698e-07,
4103
+ "loss": 0.2195,
4104
+ "step": 4250
4105
+ },
4106
+ {
4107
+ "epoch": 185.2173913043478,
4108
+ "grad_norm": 2.1687870025634766,
4109
+ "learning_rate": 9.179849240673236e-07,
4110
+ "loss": 0.2131,
4111
+ "step": 4260
4112
+ },
4113
+ {
4114
+ "epoch": 185.65217391304347,
4115
+ "grad_norm": 1.5472371578216553,
4116
+ "learning_rate": 1.0709824114118776e-06,
4117
+ "loss": 0.2283,
4118
+ "step": 4270
4119
+ },
4120
+ {
4121
+ "epoch": 186.08695652173913,
4122
+ "grad_norm": 2.4626262187957764,
4123
+ "learning_rate": 1.2239798987564317e-06,
4124
+ "loss": 0.2744,
4125
+ "step": 4280
4126
+ },
4127
+ {
4128
+ "epoch": 186.52173913043478,
4129
+ "grad_norm": 1.382519245147705,
4130
+ "learning_rate": 1.3769773861009856e-06,
4131
+ "loss": 0.2086,
4132
+ "step": 4290
4133
+ },
4134
+ {
4135
+ "epoch": 186.95652173913044,
4136
+ "grad_norm": 1.9738290309906006,
4137
+ "learning_rate": 1.5299748734455395e-06,
4138
+ "loss": 0.2302,
4139
+ "step": 4300
4140
+ },
4141
+ {
4142
+ "epoch": 187.3913043478261,
4143
+ "grad_norm": 2.3739991188049316,
4144
+ "learning_rate": 1.5299747218372504e-06,
4145
+ "loss": 0.2658,
4146
+ "step": 4310
4147
+ },
4148
+ {
4149
+ "epoch": 187.82608695652175,
4150
+ "grad_norm": 1.786908745765686,
4151
+ "learning_rate": 1.529974267012443e-06,
4152
+ "loss": 0.1938,
4153
+ "step": 4320
4154
+ },
4155
+ {
4156
+ "epoch": 188.2608695652174,
4157
+ "grad_norm": 2.060818672180176,
4158
+ "learning_rate": 1.5299735089712976e-06,
4159
+ "loss": 0.2269,
4160
+ "step": 4330
4161
+ },
4162
+ {
4163
+ "epoch": 188.69565217391303,
4164
+ "grad_norm": 1.7497200965881348,
4165
+ "learning_rate": 1.5299724477141145e-06,
4166
+ "loss": 0.2566,
4167
+ "step": 4340
4168
+ },
4169
+ {
4170
+ "epoch": 189.1304347826087,
4171
+ "grad_norm": 1.7452894449234009,
4172
+ "learning_rate": 1.5299710832413148e-06,
4173
+ "loss": 0.2235,
4174
+ "step": 4350
4175
+ },
4176
+ {
4177
+ "epoch": 189.1304347826087,
4178
+ "eval_loss": 0.9762633442878723,
4179
+ "eval_runtime": 0.4891,
4180
+ "eval_samples_per_second": 20.445,
4181
+ "eval_steps_per_second": 20.445,
4182
+ "step": 4350
4183
+ },
4184
+ {
4185
+ "Start_State_loss": 0.8609819412231445,
4186
+ "Start_State_runtime": 0.3959,
4187
+ "Start_State_samples_per_second": 25.26,
4188
+ "Start_State_steps_per_second": 25.26,
4189
+ "epoch": 189.1304347826087,
4190
+ "step": 4350
4191
+ },
4192
+ {
4193
+ "Raw_Model_loss": 0.9762633442878723,
4194
+ "Raw_Model_runtime": 0.3923,
4195
+ "Raw_Model_samples_per_second": 25.488,
4196
+ "Raw_Model_steps_per_second": 25.488,
4197
+ "epoch": 189.1304347826087,
4198
+ "step": 4350
4199
+ },
4200
+ {
4201
+ "SWA_loss": 0.8078572154045105,
4202
+ "SWA_runtime": 0.4156,
4203
+ "SWA_samples_per_second": 24.059,
4204
+ "SWA_steps_per_second": 24.059,
4205
+ "epoch": 189.1304347826087,
4206
+ "step": 4350
4207
+ },
4208
+ {
4209
+ "EMA_loss": 0.8598043322563171,
4210
+ "EMA_runtime": 0.3915,
4211
+ "EMA_samples_per_second": 25.542,
4212
+ "EMA_steps_per_second": 25.542,
4213
+ "epoch": 189.1304347826087,
4214
+ "step": 4350
4215
  }
4216
  ],
4217
  "logging_steps": 10,
 
4231
  "attributes": {}
4232
  }
4233
  },
4234
+ "total_flos": 1.1202954752684851e+17,
4235
  "train_batch_size": 4,
4236
  "trial_name": null,
4237
  "trial_params": null