CocoRoF commited on
Commit
d2d93d9
·
verified ·
1 Parent(s): 702fd2d

Training in progress, step 21340, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2345f93cb689f8d7f41eab40d0cef18241e972878e7fb6948d71f1371719ca8
3
  size 613004648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f542db4dc4bdfef70ff8a6ef414b9efe0e9422b96365de4fab56a2eb00cf0528
3
  size 613004648
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88aacbb6072c3cfcd0a072fac3b759771484a894347ed77a4b36afa5c1d0bc3b
3
  size 1226096954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d39973e68bd2ab323de4ccae017a93bc78950b3db9a2f7db5ee5b9e19e93cb2
3
  size 1226096954
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28d77b9708e785984189dd87311c593d951d08be1862b45c82e09f23e0a264bc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94eef74bedfbfbaf29c17e811e443f9ec61d689a2aeb015aed498e7127930e57
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a12c30886420598486baa82bdd0616396462f1a93af3275146e2f56424c6d27
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237512d340213f3c8e92a41e3f1546a67ec9ffb4ffbb161bf53b27ba33f0fe09
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.840674789128398,
5
  "eval_steps": 250,
6
- "global_step": 21000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -16051,6 +16051,260 @@
16051
  "eval_spearman_manhattan": 0.7463752489757238,
16052
  "eval_steps_per_second": 35.33,
16053
  "step": 21000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16054
  }
16055
  ],
16056
  "logging_steps": 10,
@@ -16065,7 +16319,7 @@
16065
  "should_evaluate": false,
16066
  "should_log": false,
16067
  "should_save": true,
16068
- "should_training_stop": false
16069
  },
16070
  "attributes": {}
16071
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 250,
6
+ "global_step": 21340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
16051
  "eval_spearman_manhattan": 0.7463752489757238,
16052
  "eval_steps_per_second": 35.33,
16053
  "step": 21000
16054
+ },
16055
+ {
16056
+ "epoch": 9.845360824742269,
16057
+ "grad_norm": 1.0928393602371216,
16058
+ "learning_rate": 8.769329896907216e-06,
16059
+ "loss": 0.0467,
16060
+ "step": 21010
16061
+ },
16062
+ {
16063
+ "epoch": 9.850046860356139,
16064
+ "grad_norm": 1.111713171005249,
16065
+ "learning_rate": 8.768744142455483e-06,
16066
+ "loss": 0.0621,
16067
+ "step": 21020
16068
+ },
16069
+ {
16070
+ "epoch": 9.85473289597001,
16071
+ "grad_norm": 1.0577993392944336,
16072
+ "learning_rate": 8.768158388003749e-06,
16073
+ "loss": 0.0547,
16074
+ "step": 21030
16075
+ },
16076
+ {
16077
+ "epoch": 9.85941893158388,
16078
+ "grad_norm": 1.453430414199829,
16079
+ "learning_rate": 8.767572633552015e-06,
16080
+ "loss": 0.0476,
16081
+ "step": 21040
16082
+ },
16083
+ {
16084
+ "epoch": 9.86410496719775,
16085
+ "grad_norm": 1.527876377105713,
16086
+ "learning_rate": 8.766986879100282e-06,
16087
+ "loss": 0.0476,
16088
+ "step": 21050
16089
+ },
16090
+ {
16091
+ "epoch": 9.868791002811621,
16092
+ "grad_norm": 1.6117839813232422,
16093
+ "learning_rate": 8.766401124648548e-06,
16094
+ "loss": 0.06,
16095
+ "step": 21060
16096
+ },
16097
+ {
16098
+ "epoch": 9.873477038425492,
16099
+ "grad_norm": 1.4520413875579834,
16100
+ "learning_rate": 8.765815370196815e-06,
16101
+ "loss": 0.049,
16102
+ "step": 21070
16103
+ },
16104
+ {
16105
+ "epoch": 9.878163074039362,
16106
+ "grad_norm": 0.7348192930221558,
16107
+ "learning_rate": 8.76522961574508e-06,
16108
+ "loss": 0.0531,
16109
+ "step": 21080
16110
+ },
16111
+ {
16112
+ "epoch": 9.882849109653232,
16113
+ "grad_norm": 2.0447332859039307,
16114
+ "learning_rate": 8.764643861293347e-06,
16115
+ "loss": 0.0499,
16116
+ "step": 21090
16117
+ },
16118
+ {
16119
+ "epoch": 9.887535145267105,
16120
+ "grad_norm": 1.4770324230194092,
16121
+ "learning_rate": 8.764058106841614e-06,
16122
+ "loss": 0.0467,
16123
+ "step": 21100
16124
+ },
16125
+ {
16126
+ "epoch": 9.892221180880975,
16127
+ "grad_norm": 2.313798189163208,
16128
+ "learning_rate": 8.763472352389879e-06,
16129
+ "loss": 0.0553,
16130
+ "step": 21110
16131
+ },
16132
+ {
16133
+ "epoch": 9.896907216494846,
16134
+ "grad_norm": 1.1835334300994873,
16135
+ "learning_rate": 8.762886597938146e-06,
16136
+ "loss": 0.0554,
16137
+ "step": 21120
16138
+ },
16139
+ {
16140
+ "epoch": 9.901593252108716,
16141
+ "grad_norm": 1.580556035041809,
16142
+ "learning_rate": 8.762300843486411e-06,
16143
+ "loss": 0.0507,
16144
+ "step": 21130
16145
+ },
16146
+ {
16147
+ "epoch": 9.906279287722587,
16148
+ "grad_norm": 0.9451290965080261,
16149
+ "learning_rate": 8.761715089034676e-06,
16150
+ "loss": 0.0451,
16151
+ "step": 21140
16152
+ },
16153
+ {
16154
+ "epoch": 9.910965323336457,
16155
+ "grad_norm": 1.0342776775360107,
16156
+ "learning_rate": 8.761129334582945e-06,
16157
+ "loss": 0.0497,
16158
+ "step": 21150
16159
+ },
16160
+ {
16161
+ "epoch": 9.915651358950328,
16162
+ "grad_norm": 1.1484373807907104,
16163
+ "learning_rate": 8.76054358013121e-06,
16164
+ "loss": 0.0466,
16165
+ "step": 21160
16166
+ },
16167
+ {
16168
+ "epoch": 9.920337394564198,
16169
+ "grad_norm": 2.1361167430877686,
16170
+ "learning_rate": 8.759957825679475e-06,
16171
+ "loss": 0.0631,
16172
+ "step": 21170
16173
+ },
16174
+ {
16175
+ "epoch": 9.925023430178069,
16176
+ "grad_norm": 1.0633893013000488,
16177
+ "learning_rate": 8.759372071227742e-06,
16178
+ "loss": 0.0468,
16179
+ "step": 21180
16180
+ },
16181
+ {
16182
+ "epoch": 9.929709465791941,
16183
+ "grad_norm": 2.109058380126953,
16184
+ "learning_rate": 8.758786316776007e-06,
16185
+ "loss": 0.0572,
16186
+ "step": 21190
16187
+ },
16188
+ {
16189
+ "epoch": 9.934395501405811,
16190
+ "grad_norm": 1.6682454347610474,
16191
+ "learning_rate": 8.758200562324274e-06,
16192
+ "loss": 0.0542,
16193
+ "step": 21200
16194
+ },
16195
+ {
16196
+ "epoch": 9.939081537019682,
16197
+ "grad_norm": 1.6142311096191406,
16198
+ "learning_rate": 8.757614807872541e-06,
16199
+ "loss": 0.0508,
16200
+ "step": 21210
16201
+ },
16202
+ {
16203
+ "epoch": 9.943767572633552,
16204
+ "grad_norm": 1.564691424369812,
16205
+ "learning_rate": 8.757029053420806e-06,
16206
+ "loss": 0.0518,
16207
+ "step": 21220
16208
+ },
16209
+ {
16210
+ "epoch": 9.948453608247423,
16211
+ "grad_norm": 1.526147723197937,
16212
+ "learning_rate": 8.756443298969073e-06,
16213
+ "loss": 0.0541,
16214
+ "step": 21230
16215
+ },
16216
+ {
16217
+ "epoch": 9.953139643861293,
16218
+ "grad_norm": 1.049249291419983,
16219
+ "learning_rate": 8.755857544517339e-06,
16220
+ "loss": 0.0564,
16221
+ "step": 21240
16222
+ },
16223
+ {
16224
+ "epoch": 9.957825679475164,
16225
+ "grad_norm": 2.233095407485962,
16226
+ "learning_rate": 8.755271790065605e-06,
16227
+ "loss": 0.0406,
16228
+ "step": 21250
16229
+ },
16230
+ {
16231
+ "epoch": 9.957825679475164,
16232
+ "eval_loss": 0.03874640911817551,
16233
+ "eval_pearson_cosine": 0.7823759183217831,
16234
+ "eval_pearson_dot": 0.6362060503787177,
16235
+ "eval_pearson_euclidean": 0.7337201116603183,
16236
+ "eval_pearson_manhattan": 0.7335170834683709,
16237
+ "eval_runtime": 42.9733,
16238
+ "eval_samples_per_second": 34.905,
16239
+ "eval_spearman_cosine": 0.7845387476571652,
16240
+ "eval_spearman_dot": 0.6531911497158578,
16241
+ "eval_spearman_euclidean": 0.7463049410965222,
16242
+ "eval_spearman_manhattan": 0.7460162771900863,
16243
+ "eval_steps_per_second": 34.905,
16244
+ "step": 21250
16245
+ },
16246
+ {
16247
+ "epoch": 9.962511715089034,
16248
+ "grad_norm": 1.1485975980758667,
16249
+ "learning_rate": 8.754686035613872e-06,
16250
+ "loss": 0.0483,
16251
+ "step": 21260
16252
+ },
16253
+ {
16254
+ "epoch": 9.967197750702905,
16255
+ "grad_norm": 1.0608566999435425,
16256
+ "learning_rate": 8.754100281162138e-06,
16257
+ "loss": 0.0488,
16258
+ "step": 21270
16259
+ },
16260
+ {
16261
+ "epoch": 9.971883786316775,
16262
+ "grad_norm": 2.202392339706421,
16263
+ "learning_rate": 8.753514526710405e-06,
16264
+ "loss": 0.0559,
16265
+ "step": 21280
16266
+ },
16267
+ {
16268
+ "epoch": 9.976569821930646,
16269
+ "grad_norm": 1.6414839029312134,
16270
+ "learning_rate": 8.75292877225867e-06,
16271
+ "loss": 0.064,
16272
+ "step": 21290
16273
+ },
16274
+ {
16275
+ "epoch": 9.981255857544518,
16276
+ "grad_norm": 1.7389216423034668,
16277
+ "learning_rate": 8.752343017806935e-06,
16278
+ "loss": 0.0464,
16279
+ "step": 21300
16280
+ },
16281
+ {
16282
+ "epoch": 9.985941893158389,
16283
+ "grad_norm": 1.7043269872665405,
16284
+ "learning_rate": 8.751757263355202e-06,
16285
+ "loss": 0.0473,
16286
+ "step": 21310
16287
+ },
16288
+ {
16289
+ "epoch": 9.990627928772259,
16290
+ "grad_norm": 1.5085012912750244,
16291
+ "learning_rate": 8.751171508903469e-06,
16292
+ "loss": 0.0496,
16293
+ "step": 21320
16294
+ },
16295
+ {
16296
+ "epoch": 9.99531396438613,
16297
+ "grad_norm": 1.686661720275879,
16298
+ "learning_rate": 8.750585754451734e-06,
16299
+ "loss": 0.051,
16300
+ "step": 21330
16301
+ },
16302
+ {
16303
+ "epoch": 10.0,
16304
+ "grad_norm": 1.0168319940567017,
16305
+ "learning_rate": 8.750000000000001e-06,
16306
+ "loss": 0.0477,
16307
+ "step": 21340
16308
  }
16309
  ],
16310
  "logging_steps": 10,
 
16319
  "should_evaluate": false,
16320
  "should_log": false,
16321
  "should_save": true,
16322
+ "should_training_stop": true
16323
  },
16324
  "attributes": {}
16325
  }