FormlessAI commited on
Commit
84c832e
·
verified ·
1 Parent(s): fe619fc

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbd7e7ee0e0b0d24778ae4ae26100b0a8fb2df96c0c8e46068a21bb252a4bf39
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b2192cab039402b759c7755739934206cf2585c7ded8de3eba8e8a4711f0f2
3
  size 98088784
last-checkpoint/global_step1600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e6242c13ae98b8a5cbe60e5ad651c403bb13deedea6b758ac3fc066fc1a3ac
3
+ size 73939813
last-checkpoint/global_step1600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03ed76c8aaa1ef74b40fbc859d853a5c8344f37e789b087826352ff27c85296a
3
+ size 73939813
last-checkpoint/global_step1600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:384d53dcec29199c1a7bf744018c71f65b4a798b58a2526e0a6f485dc25b1cad
3
+ size 73939877
last-checkpoint/global_step1600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e006e5a3030d4fc513e5c423de939f5b9cd0faa645e0b0ad363d85078ceccd8e
3
+ size 73939877
last-checkpoint/global_step1600/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11bbc3b21a3d45ce24346133fd20140bab87f069916c01102d188f1090ea0202
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1550
 
1
+ global_step1600
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e01ef876a968a28df370a06abf6d57c6d373bd832789aaf63d56fde5afbe7bb
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1eedf98ea9975d3d0f462291ea7b85b1da5289375a42dca5aaa06c6d74ce34c5
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e8b5a3d8a67eeb06d474c91493fbd7ff584a0b5183fd544d54b0c6eb1c4e56c
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d8118304cff88474c86198c7f5aa76463f040ece3bc21b07aecbb56d4fa64a
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62c1801ecd5d4bb9dd5fe34a6df0b80f63650a26836f380f43dd909361d68fe1
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:429e5f6db3d40572a6c2433c9bdbbba576389574bcdd6f230c7b20ea60fcad6b
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cca02c741884e18e1696ec0f5091b962de1adf48b6c8c872b9fe8bad0a58b156
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25916e2cde20cc021d3d06b16632aaf89d5216f10390eef1707ef1a019a03407
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:623d7e2a01cfe6bc8edd920eeeb1368be0061931f43bba9f8429c60a7e24e4e0
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa753350ab43e2a824d8e89dd96b9a3ddede15e688d15c87d248607ff08488a
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6680008769035339,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.5192896509491733,
6
  "eval_steps": 50,
7
- "global_step": 1550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2426,6 +2426,84 @@
2426
  "eval_samples_per_second": 126.077,
2427
  "eval_steps_per_second": 15.768,
2428
  "step": 1550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2429
  }
2430
  ],
2431
  "logging_steps": 5,
@@ -2454,7 +2532,7 @@
2454
  "attributes": {}
2455
  }
2456
  },
2457
- "total_flos": 7.99411319630463e+17,
2458
  "train_batch_size": 2,
2459
  "trial_name": null,
2460
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6650952696800232,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.5682792406613595,
6
  "eval_steps": 50,
7
+ "global_step": 1600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2426
  "eval_samples_per_second": 126.077,
2427
  "eval_steps_per_second": 15.768,
2428
  "step": 1550
2429
+ },
2430
+ {
2431
+ "epoch": 1.524188609920392,
2432
+ "grad_norm": 0.20915450155735016,
2433
+ "learning_rate": 8.841917952432193e-05,
2434
+ "loss": 0.7048,
2435
+ "step": 1555
2436
+ },
2437
+ {
2438
+ "epoch": 1.5290875688916106,
2439
+ "grad_norm": 0.21618790924549103,
2440
+ "learning_rate": 8.827641367989242e-05,
2441
+ "loss": 0.6895,
2442
+ "step": 1560
2443
+ },
2444
+ {
2445
+ "epoch": 1.533986527862829,
2446
+ "grad_norm": 0.20739570260047913,
2447
+ "learning_rate": 8.813330996545632e-05,
2448
+ "loss": 0.712,
2449
+ "step": 1565
2450
+ },
2451
+ {
2452
+ "epoch": 1.5388854868340478,
2453
+ "grad_norm": 0.2209644615650177,
2454
+ "learning_rate": 8.798986985162568e-05,
2455
+ "loss": 0.6987,
2456
+ "step": 1570
2457
+ },
2458
+ {
2459
+ "epoch": 1.5437844458052665,
2460
+ "grad_norm": 0.20145706832408905,
2461
+ "learning_rate": 8.784609481246963e-05,
2462
+ "loss": 0.71,
2463
+ "step": 1575
2464
+ },
2465
+ {
2466
+ "epoch": 1.548683404776485,
2467
+ "grad_norm": 0.20716360211372375,
2468
+ "learning_rate": 8.770198632549912e-05,
2469
+ "loss": 0.6984,
2470
+ "step": 1580
2471
+ },
2472
+ {
2473
+ "epoch": 1.5535823637477035,
2474
+ "grad_norm": 0.1935468167066574,
2475
+ "learning_rate": 8.755754587165184e-05,
2476
+ "loss": 0.701,
2477
+ "step": 1585
2478
+ },
2479
+ {
2480
+ "epoch": 1.5584813227189223,
2481
+ "grad_norm": 0.19698968529701233,
2482
+ "learning_rate": 8.741277493527693e-05,
2483
+ "loss": 0.6829,
2484
+ "step": 1590
2485
+ },
2486
+ {
2487
+ "epoch": 1.563380281690141,
2488
+ "grad_norm": 0.20820119976997375,
2489
+ "learning_rate": 8.726767500411974e-05,
2490
+ "loss": 0.6963,
2491
+ "step": 1595
2492
+ },
2493
+ {
2494
+ "epoch": 1.5682792406613595,
2495
+ "grad_norm": 0.18384264409542084,
2496
+ "learning_rate": 8.712224756930659e-05,
2497
+ "loss": 0.7072,
2498
+ "step": 1600
2499
+ },
2500
+ {
2501
+ "epoch": 1.5682792406613595,
2502
+ "eval_loss": 0.6650952696800232,
2503
+ "eval_runtime": 15.609,
2504
+ "eval_samples_per_second": 125.505,
2505
+ "eval_steps_per_second": 15.696,
2506
+ "step": 1600
2507
  }
2508
  ],
2509
  "logging_steps": 5,
 
2532
  "attributes": {}
2533
  }
2534
  },
2535
+ "total_flos": 8.252213790611866e+17,
2536
  "train_batch_size": 2,
2537
  "trial_name": null,
2538
  "trial_params": null