amos1088 commited on
Commit
bc903df
·
verified ·
1 Parent(s): 6b336fa

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db923703ef289ea2b7f0ed89f414100bbacd7933a8175b3546192c70f2a0dbdc
3
  size 35668592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc89b84ff994a416b1fd8a35db8384b6b9a40381f903264827d17ecdff5f45be
3
  size 35668592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:364bd8ce2b599a8e19aee7f20074b51e7dc5e2b7715f29a114265f76ea1f9d15
3
  size 18257163
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6ffe29712d5eeee8f43f601ab13dd23dc7df7e12fb41454b57276f9b97c3680
3
  size 18257163
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e5049d02e7af5cc4d06887e6c9cc2ef9b2dd211b1bb6f0170770aee213d071d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a17d91ff6dcca4633791a0e119c48601550130760f9eabb15146d59647aafb1
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b25f828af166f81a33163f34ab272a85139e04116e81b2518c52956ce2a16287
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c680537e123ff05619f00235a6bb4e1115b680be2ad94388dedf9dffc0968a0
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b81a6838d8230cd65883dd5365346eaa99ff0ef44d4f6c714bc03cc2ae120638
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1024392986cac23a001e3e4a426a85b67203cda9404b22609e539557db80bbac
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.12034599473486272,
6
  "eval_steps": 500,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -368,6 +368,96 @@
368
  "mean_token_accuracy": 0.625,
369
  "num_tokens": 1385961.0,
370
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  }
372
  ],
373
  "logging_steps": 10,
@@ -387,7 +477,7 @@
387
  "attributes": {}
388
  }
389
  },
390
- "total_flos": 3.103021297234944e+16,
391
  "train_batch_size": 1,
392
  "trial_name": null,
393
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.15043249341857842,
6
  "eval_steps": 500,
7
+ "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
368
  "mean_token_accuracy": 0.625,
369
  "num_tokens": 1385961.0,
370
  "step": 400
371
+ },
372
+ {
373
+ "epoch": 0.1233546446032343,
374
+ "grad_norm": 0.06864658743143082,
375
+ "learning_rate": 2.0290581162324652e-05,
376
+ "loss": 0.0724,
377
+ "mean_token_accuracy": 0.55,
378
+ "num_tokens": 1421542.0,
379
+ "step": 410
380
+ },
381
+ {
382
+ "epoch": 0.12636329447160588,
383
+ "grad_norm": 0.0036715222522616386,
384
+ "learning_rate": 2.079158316633267e-05,
385
+ "loss": 0.007,
386
+ "mean_token_accuracy": 0.55,
387
+ "num_tokens": 1457078.0,
388
+ "step": 420
389
+ },
390
+ {
391
+ "epoch": 0.12937194433997742,
392
+ "grad_norm": 0.00313239055685699,
393
+ "learning_rate": 2.1292585170340683e-05,
394
+ "loss": 0.001,
395
+ "mean_token_accuracy": 0.6375,
396
+ "num_tokens": 1490852.0,
397
+ "step": 430
398
+ },
399
+ {
400
+ "epoch": 0.132380594208349,
401
+ "grad_norm": 0.0015605625230818987,
402
+ "learning_rate": 2.1793587174348697e-05,
403
+ "loss": 0.2283,
404
+ "mean_token_accuracy": 0.5125,
405
+ "num_tokens": 1525241.0,
406
+ "step": 440
407
+ },
408
+ {
409
+ "epoch": 0.13538924407672057,
410
+ "grad_norm": 0.0009020116995088756,
411
+ "learning_rate": 2.2294589178356714e-05,
412
+ "loss": 0.0017,
413
+ "mean_token_accuracy": 0.5875,
414
+ "num_tokens": 1561253.0,
415
+ "step": 450
416
+ },
417
+ {
418
+ "epoch": 0.13839789394509214,
419
+ "grad_norm": 0.0005520946579053998,
420
+ "learning_rate": 2.279559118236473e-05,
421
+ "loss": 0.0005,
422
+ "mean_token_accuracy": 0.6375,
423
+ "num_tokens": 1596990.0,
424
+ "step": 460
425
+ },
426
+ {
427
+ "epoch": 0.1414065438134637,
428
+ "grad_norm": 0.0006775453221052885,
429
+ "learning_rate": 2.3296593186372748e-05,
430
+ "loss": 0.0085,
431
+ "mean_token_accuracy": 0.5,
432
+ "num_tokens": 1633419.0,
433
+ "step": 470
434
+ },
435
+ {
436
+ "epoch": 0.14441519368183528,
437
+ "grad_norm": 0.0002734291192609817,
438
+ "learning_rate": 2.3797595190380762e-05,
439
+ "loss": 0.0014,
440
+ "mean_token_accuracy": 0.575,
441
+ "num_tokens": 1668732.0,
442
+ "step": 480
443
+ },
444
+ {
445
+ "epoch": 0.14742384355020685,
446
+ "grad_norm": 0.0007396186119876802,
447
+ "learning_rate": 2.429859719438878e-05,
448
+ "loss": 0.001,
449
+ "mean_token_accuracy": 0.6375,
450
+ "num_tokens": 1703568.0,
451
+ "step": 490
452
+ },
453
+ {
454
+ "epoch": 0.15043249341857842,
455
+ "grad_norm": 0.0009952335385605693,
456
+ "learning_rate": 2.4799599198396793e-05,
457
+ "loss": 0.0017,
458
+ "mean_token_accuracy": 0.7375,
459
+ "num_tokens": 1736202.0,
460
+ "step": 500
461
  }
462
  ],
463
  "logging_steps": 10,
 
477
  "attributes": {}
478
  }
479
  },
480
+ "total_flos": 3.887174157355008e+16,
481
  "train_batch_size": 1,
482
  "trial_name": null,
483
  "trial_params": null