Sela223 commited on
Commit
c496c85
·
verified ·
1 Parent(s): e07a256

Training in progress, step 1196, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f9c1aa13c3c5817e2fa598b5b536720d683cfafff224fa37b79f7684a482f0b
3
  size 310152752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26c57f537c6e8cb1f250dc86e919d048561f018dbb08c57403f8908ff3476437
3
  size 310152752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3c2d5b3b0bb33cbea232fc3499e4b0f66d0c0e42402b974b2b74cdc33cb50e1
3
  size 133909059
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5efc5ad94f15e851bd1bbc752176792db889009682cb9c361d1d3290ee91066f
3
  size 133909059
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae6c927eb09246f1e597bb2ec85da20567dd614a07f2630055148a5c724de9f7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:304e4f2a6e2248bf62fc7d1ebac820e06d4e575b12b458ce60adbbc6486b8711
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1150,
3
  "best_metric": 1.943885087966919,
4
  "best_model_checkpoint": "outputs/checkpoint-1150",
5
- "epoch": 1.9230769230769231,
6
  "eval_steps": 50,
7
- "global_step": 1150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -8242,6 +8242,328 @@
8242
  "eval_samples_per_second": 22.611,
8243
  "eval_steps_per_second": 2.885,
8244
  "step": 1150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8245
  }
8246
  ],
8247
  "logging_steps": 1,
@@ -8256,12 +8578,12 @@
8256
  "should_evaluate": false,
8257
  "should_log": false,
8258
  "should_save": true,
8259
- "should_training_stop": false
8260
  },
8261
  "attributes": {}
8262
  }
8263
  },
8264
- "total_flos": 1.3491082933875825e+18,
8265
  "train_batch_size": 12,
8266
  "trial_name": null,
8267
  "trial_params": null
 
2
  "best_global_step": 1150,
3
  "best_metric": 1.943885087966919,
4
  "best_model_checkpoint": "outputs/checkpoint-1150",
5
+ "epoch": 2.0,
6
  "eval_steps": 50,
7
+ "global_step": 1196,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
8242
  "eval_samples_per_second": 22.611,
8243
  "eval_steps_per_second": 2.885,
8244
  "step": 1150
8245
+ },
8246
+ {
8247
+ "epoch": 1.9247491638795986,
8248
+ "grad_norm": 0.22156818211078644,
8249
+ "learning_rate": 1.9850918440857958e-07,
8250
+ "loss": 1.7031984329223633,
8251
+ "step": 1151
8252
+ },
8253
+ {
8254
+ "epoch": 1.9264214046822743,
8255
+ "grad_norm": 0.24198277294635773,
8256
+ "learning_rate": 1.899829931495012e-07,
8257
+ "loss": 1.9431957006454468,
8258
+ "step": 1152
8259
+ },
8260
+ {
8261
+ "epoch": 1.92809364548495,
8262
+ "grad_norm": 0.23016783595085144,
8263
+ "learning_rate": 1.8164324970625645e-07,
8264
+ "loss": 1.9043176174163818,
8265
+ "step": 1153
8266
+ },
8267
+ {
8268
+ "epoch": 1.9297658862876255,
8269
+ "grad_norm": 0.23631806671619415,
8270
+ "learning_rate": 1.7349001675219245e-07,
8271
+ "loss": 2.0027687549591064,
8272
+ "step": 1154
8273
+ },
8274
+ {
8275
+ "epoch": 1.931438127090301,
8276
+ "grad_norm": 0.24670401215553284,
8277
+ "learning_rate": 1.6552335555903297e-07,
8278
+ "loss": 2.1321308612823486,
8279
+ "step": 1155
8280
+ },
8281
+ {
8282
+ "epoch": 1.9331103678929766,
8283
+ "grad_norm": 0.2432417869567871,
8284
+ "learning_rate": 1.577433259964123e-07,
8285
+ "loss": 1.8444653749465942,
8286
+ "step": 1156
8287
+ },
8288
+ {
8289
+ "epoch": 1.9347826086956523,
8290
+ "grad_norm": 0.2338031530380249,
8291
+ "learning_rate": 1.501499865314171e-07,
8292
+ "loss": 1.697690486907959,
8293
+ "step": 1157
8294
+ },
8295
+ {
8296
+ "epoch": 1.9364548494983278,
8297
+ "grad_norm": 0.23973986506462097,
8298
+ "learning_rate": 1.4274339422816195e-07,
8299
+ "loss": 1.9107983112335205,
8300
+ "step": 1158
8301
+ },
8302
+ {
8303
+ "epoch": 1.9381270903010033,
8304
+ "grad_norm": 0.2242889702320099,
8305
+ "learning_rate": 1.3552360474734793e-07,
8306
+ "loss": 1.8276660442352295,
8307
+ "step": 1159
8308
+ },
8309
+ {
8310
+ "epoch": 1.939799331103679,
8311
+ "grad_norm": 0.2711448669433594,
8312
+ "learning_rate": 1.284906723458462e-07,
8313
+ "loss": 1.7825736999511719,
8314
+ "step": 1160
8315
+ },
8316
+ {
8317
+ "epoch": 1.9414715719063547,
8318
+ "grad_norm": 0.2267025262117386,
8319
+ "learning_rate": 1.216446498763013e-07,
8320
+ "loss": 1.7734841108322144,
8321
+ "step": 1161
8322
+ },
8323
+ {
8324
+ "epoch": 1.9431438127090301,
8325
+ "grad_norm": 0.22980396449565887,
8326
+ "learning_rate": 1.1498558878672016e-07,
8327
+ "loss": 1.7618581056594849,
8328
+ "step": 1162
8329
+ },
8330
+ {
8331
+ "epoch": 1.9448160535117056,
8332
+ "grad_norm": 0.23873302340507507,
8333
+ "learning_rate": 1.0851353912008644e-07,
8334
+ "loss": 1.9185343980789185,
8335
+ "step": 1163
8336
+ },
8337
+ {
8338
+ "epoch": 1.9464882943143813,
8339
+ "grad_norm": 0.24374531209468842,
8340
+ "learning_rate": 1.0222854951399407e-07,
8341
+ "loss": 1.9559900760650635,
8342
+ "step": 1164
8343
+ },
8344
+ {
8345
+ "epoch": 1.948160535117057,
8346
+ "grad_norm": 0.23170500993728638,
8347
+ "learning_rate": 9.613066720028097e-08,
8348
+ "loss": 2.0244576930999756,
8349
+ "step": 1165
8350
+ },
8351
+ {
8352
+ "epoch": 1.9498327759197325,
8353
+ "grad_norm": 0.2283419668674469,
8354
+ "learning_rate": 9.021993800466256e-08,
8355
+ "loss": 1.7836267948150635,
8356
+ "step": 1166
8357
+ },
8358
+ {
8359
+ "epoch": 1.951505016722408,
8360
+ "grad_norm": 0.25570887327194214,
8361
+ "learning_rate": 8.449640634639878e-08,
8362
+ "loss": 2.202239513397217,
8363
+ "step": 1167
8364
+ },
8365
+ {
8366
+ "epoch": 1.9531772575250836,
8367
+ "grad_norm": 0.23577114939689636,
8368
+ "learning_rate": 7.896011523794988e-08,
8369
+ "loss": 1.9459967613220215,
8370
+ "step": 1168
8371
+ },
8372
+ {
8373
+ "epoch": 1.9548494983277593,
8374
+ "grad_norm": 0.23402273654937744,
8375
+ "learning_rate": 7.361110628466838e-08,
8376
+ "loss": 1.822799801826477,
8377
+ "step": 1169
8378
+ },
8379
+ {
8380
+ "epoch": 1.9565217391304348,
8381
+ "grad_norm": 0.23017290234565735,
8382
+ "learning_rate": 6.84494196844715e-08,
8383
+ "loss": 1.8911821842193604,
8384
+ "step": 1170
8385
+ },
8386
+ {
8387
+ "epoch": 1.9581939799331103,
8388
+ "grad_norm": 0.23453032970428467,
8389
+ "learning_rate": 6.347509422754139e-08,
8390
+ "loss": 1.8932383060455322,
8391
+ "step": 1171
8392
+ },
8393
+ {
8394
+ "epoch": 1.959866220735786,
8395
+ "grad_norm": 0.2325022965669632,
8396
+ "learning_rate": 5.868816729604765e-08,
8397
+ "loss": 1.9721827507019043,
8398
+ "step": 1172
8399
+ },
8400
+ {
8401
+ "epoch": 1.9615384615384617,
8402
+ "grad_norm": 0.2408953160047531,
8403
+ "learning_rate": 5.408867486384472e-08,
8404
+ "loss": 2.094602584838867,
8405
+ "step": 1173
8406
+ },
8407
+ {
8408
+ "epoch": 1.9632107023411371,
8409
+ "grad_norm": 0.23328392207622528,
8410
+ "learning_rate": 4.9676651496222136e-08,
8411
+ "loss": 1.8785374164581299,
8412
+ "step": 1174
8413
+ },
8414
+ {
8415
+ "epoch": 1.9648829431438126,
8416
+ "grad_norm": 0.2375405728816986,
8417
+ "learning_rate": 4.5452130349629694e-08,
8418
+ "loss": 2.090651273727417,
8419
+ "step": 1175
8420
+ },
8421
+ {
8422
+ "epoch": 1.9665551839464883,
8423
+ "grad_norm": 0.2384941726922989,
8424
+ "learning_rate": 4.141514317143602e-08,
8425
+ "loss": 1.932543396949768,
8426
+ "step": 1176
8427
+ },
8428
+ {
8429
+ "epoch": 1.968227424749164,
8430
+ "grad_norm": 0.22377879917621613,
8431
+ "learning_rate": 3.7565720299687076e-08,
8432
+ "loss": 1.8287705183029175,
8433
+ "step": 1177
8434
+ },
8435
+ {
8436
+ "epoch": 1.9698996655518395,
8437
+ "grad_norm": 0.22661490738391876,
8438
+ "learning_rate": 3.3903890662878576e-08,
8439
+ "loss": 1.8487858772277832,
8440
+ "step": 1178
8441
+ },
8442
+ {
8443
+ "epoch": 1.971571906354515,
8444
+ "grad_norm": 0.23505854606628418,
8445
+ "learning_rate": 3.0429681779739484e-08,
8446
+ "loss": 1.9943294525146484,
8447
+ "step": 1179
8448
+ },
8449
+ {
8450
+ "epoch": 1.9732441471571907,
8451
+ "grad_norm": 0.23116961121559143,
8452
+ "learning_rate": 2.7143119759026613e-08,
8453
+ "loss": 1.818049669265747,
8454
+ "step": 1180
8455
+ },
8456
+ {
8457
+ "epoch": 1.9749163879598663,
8458
+ "grad_norm": 0.22798657417297363,
8459
+ "learning_rate": 2.404422929932204e-08,
8460
+ "loss": 1.8310678005218506,
8461
+ "step": 1181
8462
+ },
8463
+ {
8464
+ "epoch": 1.9765886287625418,
8465
+ "grad_norm": 0.24219320714473724,
8466
+ "learning_rate": 2.113303368885822e-08,
8467
+ "loss": 1.975824236869812,
8468
+ "step": 1182
8469
+ },
8470
+ {
8471
+ "epoch": 1.9782608695652173,
8472
+ "grad_norm": 0.244304358959198,
8473
+ "learning_rate": 1.8409554805329245e-08,
8474
+ "loss": 2.0979132652282715,
8475
+ "step": 1183
8476
+ },
8477
+ {
8478
+ "epoch": 1.979933110367893,
8479
+ "grad_norm": 0.22320342063903809,
8480
+ "learning_rate": 1.5873813115740988e-08,
8481
+ "loss": 1.6685010194778442,
8482
+ "step": 1184
8483
+ },
8484
+ {
8485
+ "epoch": 1.9816053511705687,
8486
+ "grad_norm": 0.24459514021873474,
8487
+ "learning_rate": 1.3525827676247326e-08,
8488
+ "loss": 1.8652524948120117,
8489
+ "step": 1185
8490
+ },
8491
+ {
8492
+ "epoch": 1.9832775919732442,
8493
+ "grad_norm": 0.23350538313388824,
8494
+ "learning_rate": 1.1365616132008593e-08,
8495
+ "loss": 1.7653487920761108,
8496
+ "step": 1186
8497
+ },
8498
+ {
8499
+ "epoch": 1.9849498327759196,
8500
+ "grad_norm": 0.23290188610553741,
8501
+ "learning_rate": 9.393194717061127e-09,
8502
+ "loss": 1.9263311624526978,
8503
+ "step": 1187
8504
+ },
8505
+ {
8506
+ "epoch": 1.9866220735785953,
8507
+ "grad_norm": 0.23780138790607452,
8508
+ "learning_rate": 7.608578254195142e-09,
8509
+ "loss": 2.1235318183898926,
8510
+ "step": 1188
8511
+ },
8512
+ {
8513
+ "epoch": 1.988294314381271,
8514
+ "grad_norm": 0.221963033080101,
8515
+ "learning_rate": 6.0117801548437155e-09,
8516
+ "loss": 1.9430090188980103,
8517
+ "step": 1189
8518
+ },
8519
+ {
8520
+ "epoch": 1.9899665551839465,
8521
+ "grad_norm": 0.24497728049755096,
8522
+ "learning_rate": 4.602812418974534e-09,
8523
+ "loss": 1.9605302810668945,
8524
+ "step": 1190
8525
+ },
8526
+ {
8527
+ "epoch": 1.991638795986622,
8528
+ "grad_norm": 0.24507929384708405,
8529
+ "learning_rate": 3.3816856350177284e-09,
8530
+ "loss": 1.8447235822677612,
8531
+ "step": 1191
8532
+ },
8533
+ {
8534
+ "epoch": 1.9933110367892977,
8535
+ "grad_norm": 0.2350272387266159,
8536
+ "learning_rate": 2.348408979760408e-09,
8537
+ "loss": 1.8975446224212646,
8538
+ "step": 1192
8539
+ },
8540
+ {
8541
+ "epoch": 1.9949832775919734,
8542
+ "grad_norm": 0.24266520142555237,
8543
+ "learning_rate": 1.502990218302247e-09,
8544
+ "loss": 1.9446773529052734,
8545
+ "step": 1193
8546
+ },
8547
+ {
8548
+ "epoch": 1.9966555183946488,
8549
+ "grad_norm": 0.23780354857444763,
8550
+ "learning_rate": 8.454357039860971e-10,
8551
+ "loss": 1.9795520305633545,
8552
+ "step": 1194
8553
+ },
8554
+ {
8555
+ "epoch": 1.9983277591973243,
8556
+ "grad_norm": 0.2187688648700714,
8557
+ "learning_rate": 3.757503783424765e-10,
8558
+ "loss": 1.903891682624817,
8559
+ "step": 1195
8560
+ },
8561
+ {
8562
+ "epoch": 2.0,
8563
+ "grad_norm": 0.2732059061527252,
8564
+ "learning_rate": 9.393777107291613e-11,
8565
+ "loss": 2.0153965950012207,
8566
+ "step": 1196
8567
  }
8568
  ],
8569
  "logging_steps": 1,
 
8578
  "should_evaluate": false,
8579
  "should_log": false,
8580
  "should_save": true,
8581
+ "should_training_stop": true
8582
  },
8583
  "attributes": {}
8584
  }
8585
  },
8586
+ "total_flos": 1.4028145166609603e+18,
8587
  "train_batch_size": 12,
8588
  "trial_name": null,
8589
  "trial_params": null