CocoRoF commited on
Commit
a52b1a3
·
verified ·
1 Parent(s): 791d86a

Training in progress, step 8536, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14971b6e04cadaa88534f712e721171d13999a95ce9d9ac46c4729800b89e946
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703b75465ed45e4a47b755f4dbf7613f34e8cb9a9c6557491a46a67bc25a57ef
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cebce73c6f9897e73bc658e05632b2e976a2c1891e8ef3f6c3c2f8924ba60b4c
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:943fd807c0afa2eb74111f9b3a9e2bfca879f4ad22f91c7601145761a7d127c8
3
  size 1475248442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff2a98b7b58dd272a86869334fd0edf641ab47ceb102b634b242c3ff92151a26
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:774612f2d4811ca7da639cec8d3b509c1f87b5ffc57761546af6466447a0619a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7a23627952aa878a89c58e1effd3a883c81420a06e0fccd761ecb8b1539b91f
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bece9809bf70ad158471014e9f0407932e462ce7f7864e1800e151872b48ca7
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.7488284910965324,
5
  "eval_steps": 250,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6119,6 +6119,409 @@
6119
  "eval_spearman_manhattan": 0.8023527525471515,
6120
  "eval_steps_per_second": 29.406,
6121
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6122
  }
6123
  ],
6124
  "logging_steps": 10,
@@ -6133,7 +6536,7 @@
6133
  "should_evaluate": false,
6134
  "should_log": false,
6135
  "should_save": true,
6136
- "should_training_stop": false
6137
  },
6138
  "attributes": {}
6139
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 250,
6
+ "global_step": 8536,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6119
  "eval_spearman_manhattan": 0.8023527525471515,
6120
  "eval_steps_per_second": 29.406,
6121
  "step": 8000
6122
+ },
6123
+ {
6124
+ "epoch": 3.753514526710403,
6125
+ "grad_norm": 1.525901198387146,
6126
+ "learning_rate": 1.7654053420805998e-05,
6127
+ "loss": 0.1211,
6128
+ "step": 8010
6129
+ },
6130
+ {
6131
+ "epoch": 3.758200562324274,
6132
+ "grad_norm": 2.8532297611236572,
6133
+ "learning_rate": 1.765112464854733e-05,
6134
+ "loss": 0.1165,
6135
+ "step": 8020
6136
+ },
6137
+ {
6138
+ "epoch": 3.7628865979381443,
6139
+ "grad_norm": 1.453282117843628,
6140
+ "learning_rate": 1.764819587628866e-05,
6141
+ "loss": 0.1293,
6142
+ "step": 8030
6143
+ },
6144
+ {
6145
+ "epoch": 3.7675726335520148,
6146
+ "grad_norm": 1.6476629972457886,
6147
+ "learning_rate": 1.7645267104029993e-05,
6148
+ "loss": 0.1104,
6149
+ "step": 8040
6150
+ },
6151
+ {
6152
+ "epoch": 3.7722586691658857,
6153
+ "grad_norm": 1.583380103111267,
6154
+ "learning_rate": 1.7642338331771322e-05,
6155
+ "loss": 0.1191,
6156
+ "step": 8050
6157
+ },
6158
+ {
6159
+ "epoch": 3.776944704779756,
6160
+ "grad_norm": 1.4234002828598022,
6161
+ "learning_rate": 1.7639409559512655e-05,
6162
+ "loss": 0.1201,
6163
+ "step": 8060
6164
+ },
6165
+ {
6166
+ "epoch": 3.781630740393627,
6167
+ "grad_norm": 2.0201187133789062,
6168
+ "learning_rate": 1.7636480787253985e-05,
6169
+ "loss": 0.1398,
6170
+ "step": 8070
6171
+ },
6172
+ {
6173
+ "epoch": 3.7863167760074976,
6174
+ "grad_norm": 1.8647639751434326,
6175
+ "learning_rate": 1.7633552014995314e-05,
6176
+ "loss": 0.1344,
6177
+ "step": 8080
6178
+ },
6179
+ {
6180
+ "epoch": 3.791002811621368,
6181
+ "grad_norm": 1.5310838222503662,
6182
+ "learning_rate": 1.7630623242736647e-05,
6183
+ "loss": 0.118,
6184
+ "step": 8090
6185
+ },
6186
+ {
6187
+ "epoch": 3.795688847235239,
6188
+ "grad_norm": 1.740401268005371,
6189
+ "learning_rate": 1.7627694470477977e-05,
6190
+ "loss": 0.1187,
6191
+ "step": 8100
6192
+ },
6193
+ {
6194
+ "epoch": 3.8003748828491095,
6195
+ "grad_norm": 1.0978221893310547,
6196
+ "learning_rate": 1.762476569821931e-05,
6197
+ "loss": 0.1218,
6198
+ "step": 8110
6199
+ },
6200
+ {
6201
+ "epoch": 3.8050609184629804,
6202
+ "grad_norm": 1.8838212490081787,
6203
+ "learning_rate": 1.762183692596064e-05,
6204
+ "loss": 0.1349,
6205
+ "step": 8120
6206
+ },
6207
+ {
6208
+ "epoch": 3.809746954076851,
6209
+ "grad_norm": 1.8535819053649902,
6210
+ "learning_rate": 1.761890815370197e-05,
6211
+ "loss": 0.118,
6212
+ "step": 8130
6213
+ },
6214
+ {
6215
+ "epoch": 3.8144329896907214,
6216
+ "grad_norm": 1.482851266860962,
6217
+ "learning_rate": 1.76159793814433e-05,
6218
+ "loss": 0.1136,
6219
+ "step": 8140
6220
+ },
6221
+ {
6222
+ "epoch": 3.8191190253045924,
6223
+ "grad_norm": 1.4817432165145874,
6224
+ "learning_rate": 1.761305060918463e-05,
6225
+ "loss": 0.1144,
6226
+ "step": 8150
6227
+ },
6228
+ {
6229
+ "epoch": 3.823805060918463,
6230
+ "grad_norm": 1.8795218467712402,
6231
+ "learning_rate": 1.761012183692596e-05,
6232
+ "loss": 0.1337,
6233
+ "step": 8160
6234
+ },
6235
+ {
6236
+ "epoch": 3.8284910965323338,
6237
+ "grad_norm": 1.5762320756912231,
6238
+ "learning_rate": 1.7607193064667293e-05,
6239
+ "loss": 0.1186,
6240
+ "step": 8170
6241
+ },
6242
+ {
6243
+ "epoch": 3.8331771321462043,
6244
+ "grad_norm": 1.3855458498001099,
6245
+ "learning_rate": 1.7604264292408623e-05,
6246
+ "loss": 0.1213,
6247
+ "step": 8180
6248
+ },
6249
+ {
6250
+ "epoch": 3.8378631677600747,
6251
+ "grad_norm": 1.619994044303894,
6252
+ "learning_rate": 1.7601335520149952e-05,
6253
+ "loss": 0.1484,
6254
+ "step": 8190
6255
+ },
6256
+ {
6257
+ "epoch": 3.8425492033739457,
6258
+ "grad_norm": 1.3682477474212646,
6259
+ "learning_rate": 1.7598406747891285e-05,
6260
+ "loss": 0.0907,
6261
+ "step": 8200
6262
+ },
6263
+ {
6264
+ "epoch": 3.847235238987816,
6265
+ "grad_norm": 1.5766955614089966,
6266
+ "learning_rate": 1.7595477975632615e-05,
6267
+ "loss": 0.1081,
6268
+ "step": 8210
6269
+ },
6270
+ {
6271
+ "epoch": 3.851921274601687,
6272
+ "grad_norm": 1.5448287725448608,
6273
+ "learning_rate": 1.7592549203373948e-05,
6274
+ "loss": 0.1296,
6275
+ "step": 8220
6276
+ },
6277
+ {
6278
+ "epoch": 3.8566073102155576,
6279
+ "grad_norm": 1.2130484580993652,
6280
+ "learning_rate": 1.7589620431115277e-05,
6281
+ "loss": 0.1141,
6282
+ "step": 8230
6283
+ },
6284
+ {
6285
+ "epoch": 3.861293345829428,
6286
+ "grad_norm": 1.5601420402526855,
6287
+ "learning_rate": 1.758669165885661e-05,
6288
+ "loss": 0.1303,
6289
+ "step": 8240
6290
+ },
6291
+ {
6292
+ "epoch": 3.865979381443299,
6293
+ "grad_norm": 1.26397705078125,
6294
+ "learning_rate": 1.758376288659794e-05,
6295
+ "loss": 0.1017,
6296
+ "step": 8250
6297
+ },
6298
+ {
6299
+ "epoch": 3.865979381443299,
6300
+ "eval_loss": 0.031061464920639992,
6301
+ "eval_pearson_cosine": 0.8181771715322625,
6302
+ "eval_pearson_dot": 0.753218304404399,
6303
+ "eval_pearson_euclidean": 0.7900298439690836,
6304
+ "eval_pearson_manhattan": 0.792536151039883,
6305
+ "eval_runtime": 3.1857,
6306
+ "eval_samples_per_second": 470.856,
6307
+ "eval_spearman_cosine": 0.8174158550444287,
6308
+ "eval_spearman_dot": 0.7522993308222343,
6309
+ "eval_spearman_euclidean": 0.798562890702385,
6310
+ "eval_spearman_manhattan": 0.8006887717421057,
6311
+ "eval_steps_per_second": 29.507,
6312
+ "step": 8250
6313
+ },
6314
+ {
6315
+ "epoch": 3.8706654170571695,
6316
+ "grad_norm": 1.8911367654800415,
6317
+ "learning_rate": 1.7580834114339272e-05,
6318
+ "loss": 0.1197,
6319
+ "step": 8260
6320
+ },
6321
+ {
6322
+ "epoch": 3.8753514526710404,
6323
+ "grad_norm": 1.5515751838684082,
6324
+ "learning_rate": 1.7577905342080602e-05,
6325
+ "loss": 0.1278,
6326
+ "step": 8270
6327
+ },
6328
+ {
6329
+ "epoch": 3.880037488284911,
6330
+ "grad_norm": 2.1667306423187256,
6331
+ "learning_rate": 1.757497656982193e-05,
6332
+ "loss": 0.1387,
6333
+ "step": 8280
6334
+ },
6335
+ {
6336
+ "epoch": 3.8847235238987814,
6337
+ "grad_norm": 1.149591326713562,
6338
+ "learning_rate": 1.7572047797563264e-05,
6339
+ "loss": 0.1231,
6340
+ "step": 8290
6341
+ },
6342
+ {
6343
+ "epoch": 3.8894095595126523,
6344
+ "grad_norm": 1.848067283630371,
6345
+ "learning_rate": 1.7569119025304594e-05,
6346
+ "loss": 0.1228,
6347
+ "step": 8300
6348
+ },
6349
+ {
6350
+ "epoch": 3.894095595126523,
6351
+ "grad_norm": 1.451674222946167,
6352
+ "learning_rate": 1.7566190253045923e-05,
6353
+ "loss": 0.1161,
6354
+ "step": 8310
6355
+ },
6356
+ {
6357
+ "epoch": 3.8987816307403937,
6358
+ "grad_norm": 1.7280783653259277,
6359
+ "learning_rate": 1.7563261480787256e-05,
6360
+ "loss": 0.1191,
6361
+ "step": 8320
6362
+ },
6363
+ {
6364
+ "epoch": 3.9034676663542642,
6365
+ "grad_norm": 1.5939549207687378,
6366
+ "learning_rate": 1.7560332708528586e-05,
6367
+ "loss": 0.1272,
6368
+ "step": 8330
6369
+ },
6370
+ {
6371
+ "epoch": 3.9081537019681347,
6372
+ "grad_norm": 1.6509348154067993,
6373
+ "learning_rate": 1.7557403936269915e-05,
6374
+ "loss": 0.127,
6375
+ "step": 8340
6376
+ },
6377
+ {
6378
+ "epoch": 3.9128397375820057,
6379
+ "grad_norm": 1.8915349245071411,
6380
+ "learning_rate": 1.7554475164011248e-05,
6381
+ "loss": 0.1352,
6382
+ "step": 8350
6383
+ },
6384
+ {
6385
+ "epoch": 3.917525773195876,
6386
+ "grad_norm": 2.188493490219116,
6387
+ "learning_rate": 1.7551546391752578e-05,
6388
+ "loss": 0.1105,
6389
+ "step": 8360
6390
+ },
6391
+ {
6392
+ "epoch": 3.922211808809747,
6393
+ "grad_norm": 1.8589377403259277,
6394
+ "learning_rate": 1.754861761949391e-05,
6395
+ "loss": 0.1031,
6396
+ "step": 8370
6397
+ },
6398
+ {
6399
+ "epoch": 3.9268978444236176,
6400
+ "grad_norm": 1.7054208517074585,
6401
+ "learning_rate": 1.754568884723524e-05,
6402
+ "loss": 0.1165,
6403
+ "step": 8380
6404
+ },
6405
+ {
6406
+ "epoch": 3.931583880037488,
6407
+ "grad_norm": 1.2826303243637085,
6408
+ "learning_rate": 1.754276007497657e-05,
6409
+ "loss": 0.0994,
6410
+ "step": 8390
6411
+ },
6412
+ {
6413
+ "epoch": 3.936269915651359,
6414
+ "grad_norm": 2.087935209274292,
6415
+ "learning_rate": 1.7539831302717902e-05,
6416
+ "loss": 0.1493,
6417
+ "step": 8400
6418
+ },
6419
+ {
6420
+ "epoch": 3.9409559512652295,
6421
+ "grad_norm": 1.4399867057800293,
6422
+ "learning_rate": 1.7536902530459232e-05,
6423
+ "loss": 0.1126,
6424
+ "step": 8410
6425
+ },
6426
+ {
6427
+ "epoch": 3.9456419868791004,
6428
+ "grad_norm": 2.081295967102051,
6429
+ "learning_rate": 1.7533973758200565e-05,
6430
+ "loss": 0.1149,
6431
+ "step": 8420
6432
+ },
6433
+ {
6434
+ "epoch": 3.950328022492971,
6435
+ "grad_norm": 1.6477272510528564,
6436
+ "learning_rate": 1.7531044985941894e-05,
6437
+ "loss": 0.124,
6438
+ "step": 8430
6439
+ },
6440
+ {
6441
+ "epoch": 3.9550140581068414,
6442
+ "grad_norm": 1.43690025806427,
6443
+ "learning_rate": 1.7528116213683227e-05,
6444
+ "loss": 0.1175,
6445
+ "step": 8440
6446
+ },
6447
+ {
6448
+ "epoch": 3.9597000937207123,
6449
+ "grad_norm": 2.231391429901123,
6450
+ "learning_rate": 1.7525187441424557e-05,
6451
+ "loss": 0.1039,
6452
+ "step": 8450
6453
+ },
6454
+ {
6455
+ "epoch": 3.964386129334583,
6456
+ "grad_norm": 1.699771761894226,
6457
+ "learning_rate": 1.752225866916589e-05,
6458
+ "loss": 0.096,
6459
+ "step": 8460
6460
+ },
6461
+ {
6462
+ "epoch": 3.9690721649484537,
6463
+ "grad_norm": 0.9869770407676697,
6464
+ "learning_rate": 1.751932989690722e-05,
6465
+ "loss": 0.1318,
6466
+ "step": 8470
6467
+ },
6468
+ {
6469
+ "epoch": 3.973758200562324,
6470
+ "grad_norm": 1.2464418411254883,
6471
+ "learning_rate": 1.751640112464855e-05,
6472
+ "loss": 0.1025,
6473
+ "step": 8480
6474
+ },
6475
+ {
6476
+ "epoch": 3.9784442361761947,
6477
+ "grad_norm": 1.8724063634872437,
6478
+ "learning_rate": 1.751347235238988e-05,
6479
+ "loss": 0.1154,
6480
+ "step": 8490
6481
+ },
6482
+ {
6483
+ "epoch": 3.9831302717900656,
6484
+ "grad_norm": 1.6470191478729248,
6485
+ "learning_rate": 1.751054358013121e-05,
6486
+ "loss": 0.1132,
6487
+ "step": 8500
6488
+ },
6489
+ {
6490
+ "epoch": 3.9831302717900656,
6491
+ "eval_loss": 0.03063393384218216,
6492
+ "eval_pearson_cosine": 0.821106317003462,
6493
+ "eval_pearson_dot": 0.7578136492444401,
6494
+ "eval_pearson_euclidean": 0.7882531341441634,
6495
+ "eval_pearson_manhattan": 0.790852878268538,
6496
+ "eval_runtime": 3.174,
6497
+ "eval_samples_per_second": 472.588,
6498
+ "eval_spearman_cosine": 0.8198241690509209,
6499
+ "eval_spearman_dot": 0.7578266102334076,
6500
+ "eval_spearman_euclidean": 0.7967830640080272,
6501
+ "eval_spearman_manhattan": 0.7991467507473939,
6502
+ "eval_steps_per_second": 29.616,
6503
+ "step": 8500
6504
+ },
6505
+ {
6506
+ "epoch": 3.987816307403936,
6507
+ "grad_norm": 1.676721453666687,
6508
+ "learning_rate": 1.750761480787254e-05,
6509
+ "loss": 0.1188,
6510
+ "step": 8510
6511
+ },
6512
+ {
6513
+ "epoch": 3.992502343017807,
6514
+ "grad_norm": 1.187525987625122,
6515
+ "learning_rate": 1.750468603561387e-05,
6516
+ "loss": 0.132,
6517
+ "step": 8520
6518
+ },
6519
+ {
6520
+ "epoch": 3.9971883786316775,
6521
+ "grad_norm": 1.2841644287109375,
6522
+ "learning_rate": 1.7501757263355203e-05,
6523
+ "loss": 0.1379,
6524
+ "step": 8530
6525
  }
6526
  ],
6527
  "logging_steps": 10,
 
6536
  "should_evaluate": false,
6537
  "should_log": false,
6538
  "should_save": true,
6539
+ "should_training_stop": true
6540
  },
6541
  "attributes": {}
6542
  }