FormlessAI commited on
Commit
66f1549
·
verified ·
1 Parent(s): ba29524

Training in progress, epoch 11, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccf2833204f1ebfbdd66d162d15ff1c32727735894636aaced086424b91aabe9
3
  size 892897944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e437eaee19bbd15e83d4a939cae73699cf7c04de79bc02cf69d3835fe0906e8f
3
  size 892897944
last-checkpoint/global_step805/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fe24ef50b1c15624fd2003a91f04fa978ea6c7fc7dae2feffb8609e4e5351bd
3
+ size 673148517
last-checkpoint/global_step805/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b3413c88245fa91d0398f4c5c91539ff255d7617af0bfd0b1ea4941e609b04f
3
+ size 673148581
last-checkpoint/global_step805/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2c9af2053cea06a16992278b7c3a81170506faeafec1ee9c0d2f18e0e694eb
3
+ size 673148581
last-checkpoint/global_step805/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aac879faa1bf1eb50521a4fd4ad39dd6de95d6fde6bfcb6e12f4a1db23c80d78
3
+ size 673148581
last-checkpoint/global_step805/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dc19002c5db0266879cb308fc9e0413a78a9370073a901953e11c9950ad380c
3
+ size 893076569
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step705
 
1
+ global_step805
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1937a443d7368c8ec9254650849425295f524b6811196164c9f8145ae9528880
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff16acfda6bbc5cf50b99aa265031ad0b4a77a4ea996fe62e798c2b245c7d95
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0a996684adc127ea8c51ebb0d616f5ff3e480192cd01de6d293712583e60f2b
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a794a90df9c5b0631ebe2e7987dab57982f6a583f0010d55836d1074ee1a40d
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18dcda78addf690b92cd7056f07582eb468846f2e21bc29981e4ee2c6c66d84b
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd6668393a5bc1b0de5d288b1691ef425f75f4c9702525195c76ff72f801ebb1
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5b1adb37a2fab20cad86ca7dee4e55987e43f200ac69e3c2cd774e08f39674a
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f24d02dcb6989a54f9b8b9339108aafa5df8c99759987bdb33d7a9ef54169400
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d3f6472b6143fc6353ad6362140eebb6ce12ab1f1f8c14125bcce21cd059346
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311eb9812176f51b258a368822f71b70f80b61465aa3d1733f9b098718c1a5ae
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.03448659926652908,
4
  "best_model_checkpoint": null,
5
- "epoch": 10.14336917562724,
6
  "eval_steps": 50,
7
- "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2332,6 +2332,338 @@
2332
  "eval_samples_per_second": 25.568,
2333
  "eval_steps_per_second": 0.862,
2334
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2335
  }
2336
  ],
2337
  "logging_steps": 5,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.03328302875161171,
4
  "best_model_checkpoint": null,
5
+ "epoch": 11.587813620071685,
6
  "eval_steps": 50,
7
+ "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2332
  "eval_samples_per_second": 25.568,
2333
  "eval_steps_per_second": 0.862,
2334
  "step": 700
2335
+ },
2336
+ {
2337
+ "epoch": 10.21505376344086,
2338
+ "grad_norm": 3.6100528240203857,
2339
+ "learning_rate": 6.638172079355048e-05,
2340
+ "logits/chosen": -1.1095702648162842,
2341
+ "logits/rejected": -1.202734351158142,
2342
+ "logps/chosen": -385.0,
2343
+ "logps/rejected": -644.2999877929688,
2344
+ "loss": 0.029,
2345
+ "rewards/accuracies": 0.984375,
2346
+ "rewards/chosen": -1.306646704673767,
2347
+ "rewards/margins": 14.65625,
2348
+ "rewards/rejected": -15.978124618530273,
2349
+ "step": 705
2350
+ },
2351
+ {
2352
+ "epoch": 10.28673835125448,
2353
+ "grad_norm": 0.639515221118927,
2354
+ "learning_rate": 6.637155458698307e-05,
2355
+ "logits/chosen": -1.1066405773162842,
2356
+ "logits/rejected": -1.2199218273162842,
2357
+ "logps/chosen": -375.1000061035156,
2358
+ "logps/rejected": -635.5999755859375,
2359
+ "loss": 0.0257,
2360
+ "rewards/accuracies": 0.981249988079071,
2361
+ "rewards/chosen": -1.794921875,
2362
+ "rewards/margins": 14.431249618530273,
2363
+ "rewards/rejected": -16.215625762939453,
2364
+ "step": 710
2365
+ },
2366
+ {
2367
+ "epoch": 10.3584229390681,
2368
+ "grad_norm": 1.197538137435913,
2369
+ "learning_rate": 6.63613056201972e-05,
2370
+ "logits/chosen": -1.065039038658142,
2371
+ "logits/rejected": -1.20703125,
2372
+ "logps/chosen": -403.79998779296875,
2373
+ "logps/rejected": -669.5999755859375,
2374
+ "loss": 0.0463,
2375
+ "rewards/accuracies": 0.9671875238418579,
2376
+ "rewards/chosen": -1.352148413658142,
2377
+ "rewards/margins": 16.912500381469727,
2378
+ "rewards/rejected": -18.253124237060547,
2379
+ "step": 715
2380
+ },
2381
+ {
2382
+ "epoch": 10.43010752688172,
2383
+ "grad_norm": 2.171043872833252,
2384
+ "learning_rate": 6.635097391899463e-05,
2385
+ "logits/chosen": -1.103515625,
2386
+ "logits/rejected": -1.211328148841858,
2387
+ "logps/chosen": -386.6000061035156,
2388
+ "logps/rejected": -628.5,
2389
+ "loss": 0.0346,
2390
+ "rewards/accuracies": 0.981249988079071,
2391
+ "rewards/chosen": -1.0,
2392
+ "rewards/margins": 16.143749237060547,
2393
+ "rewards/rejected": -17.115625381469727,
2394
+ "step": 720
2395
+ },
2396
+ {
2397
+ "epoch": 10.501792114695341,
2398
+ "grad_norm": 2.131438732147217,
2399
+ "learning_rate": 6.63405595093854e-05,
2400
+ "logits/chosen": -1.1662108898162842,
2401
+ "logits/rejected": -1.2726562023162842,
2402
+ "logps/chosen": -396.3999938964844,
2403
+ "logps/rejected": -647.2000122070312,
2404
+ "loss": 0.0233,
2405
+ "rewards/accuracies": 0.989062488079071,
2406
+ "rewards/chosen": -1.1624664068222046,
2407
+ "rewards/margins": 16.649999618530273,
2408
+ "rewards/rejected": -17.818750381469727,
2409
+ "step": 725
2410
+ },
2411
+ {
2412
+ "epoch": 10.57347670250896,
2413
+ "grad_norm": 1.5541068315505981,
2414
+ "learning_rate": 6.633006241758778e-05,
2415
+ "logits/chosen": -1.228124976158142,
2416
+ "logits/rejected": -1.3742187023162842,
2417
+ "logps/chosen": -400.70001220703125,
2418
+ "logps/rejected": -670.2999877929688,
2419
+ "loss": 0.0487,
2420
+ "rewards/accuracies": 0.965624988079071,
2421
+ "rewards/chosen": -2.663867235183716,
2422
+ "rewards/margins": 18.240625381469727,
2423
+ "rewards/rejected": -20.918750762939453,
2424
+ "step": 730
2425
+ },
2426
+ {
2427
+ "epoch": 10.64516129032258,
2428
+ "grad_norm": 4.768660545349121,
2429
+ "learning_rate": 6.631948267002819e-05,
2430
+ "logits/chosen": -1.2703125476837158,
2431
+ "logits/rejected": -1.368749976158142,
2432
+ "logps/chosen": -423.8999938964844,
2433
+ "logps/rejected": -721.2000122070312,
2434
+ "loss": 0.0192,
2435
+ "rewards/accuracies": 0.9906250238418579,
2436
+ "rewards/chosen": -3.223828077316284,
2437
+ "rewards/margins": 20.174999237060547,
2438
+ "rewards/rejected": -23.399999618530273,
2439
+ "step": 735
2440
+ },
2441
+ {
2442
+ "epoch": 10.716845878136201,
2443
+ "grad_norm": 1.4146541357040405,
2444
+ "learning_rate": 6.630882029334112e-05,
2445
+ "logits/chosen": -1.2531249523162842,
2446
+ "logits/rejected": -1.345312476158142,
2447
+ "logps/chosen": -412.0,
2448
+ "logps/rejected": -667.5,
2449
+ "loss": 0.0345,
2450
+ "rewards/accuracies": 0.973437488079071,
2451
+ "rewards/chosen": -2.1131834983825684,
2452
+ "rewards/margins": 17.715625762939453,
2453
+ "rewards/rejected": -19.828125,
2454
+ "step": 740
2455
+ },
2456
+ {
2457
+ "epoch": 10.78853046594982,
2458
+ "grad_norm": 10.958137512207031,
2459
+ "learning_rate": 6.62980753143691e-05,
2460
+ "logits/chosen": -1.258203148841858,
2461
+ "logits/rejected": -1.382421851158142,
2462
+ "logps/chosen": -396.1000061035156,
2463
+ "logps/rejected": -648.0999755859375,
2464
+ "loss": 0.5085,
2465
+ "rewards/accuracies": 0.9859374761581421,
2466
+ "rewards/chosen": -2.490673780441284,
2467
+ "rewards/margins": 16.428125381469727,
2468
+ "rewards/rejected": -18.918750762939453,
2469
+ "step": 745
2470
+ },
2471
+ {
2472
+ "epoch": 10.86021505376344,
2473
+ "grad_norm": 0.7889600396156311,
2474
+ "learning_rate": 6.62872477601626e-05,
2475
+ "logits/chosen": -1.3039062023162842,
2476
+ "logits/rejected": -1.4226562976837158,
2477
+ "logps/chosen": -410.3999938964844,
2478
+ "logps/rejected": -664.2000122070312,
2479
+ "loss": 0.2331,
2480
+ "rewards/accuracies": 0.987500011920929,
2481
+ "rewards/chosen": -2.001757860183716,
2482
+ "rewards/margins": 18.571874618530273,
2483
+ "rewards/rejected": -20.575000762939453,
2484
+ "step": 750
2485
+ },
2486
+ {
2487
+ "epoch": 10.86021505376344,
2488
+ "eval_logits/chosen": -1.3402777910232544,
2489
+ "eval_logits/rejected": -1.4592013359069824,
2490
+ "eval_logps/chosen": -408.4444580078125,
2491
+ "eval_logps/rejected": -681.3333129882812,
2492
+ "eval_loss": 0.04453478381037712,
2493
+ "eval_rewards/accuracies": 0.9725378751754761,
2494
+ "eval_rewards/chosen": -2.062066078186035,
2495
+ "eval_rewards/margins": 18.46527862548828,
2496
+ "eval_rewards/rejected": -20.52083396911621,
2497
+ "eval_runtime": 10.2105,
2498
+ "eval_samples_per_second": 26.149,
2499
+ "eval_steps_per_second": 0.881,
2500
+ "step": 750
2501
+ },
2502
+ {
2503
+ "epoch": 10.931899641577061,
2504
+ "grad_norm": 1.6667836904525757,
2505
+ "learning_rate": 6.627633765797999e-05,
2506
+ "logits/chosen": -1.3240234851837158,
2507
+ "logits/rejected": -1.429296851158142,
2508
+ "logps/chosen": -408.3999938964844,
2509
+ "logps/rejected": -681.4000244140625,
2510
+ "loss": 0.1956,
2511
+ "rewards/accuracies": 0.965624988079071,
2512
+ "rewards/chosen": -2.8973631858825684,
2513
+ "rewards/margins": 18.996875762939453,
2514
+ "rewards/rejected": -21.881250381469727,
2515
+ "step": 755
2516
+ },
2517
+ {
2518
+ "epoch": 11.014336917562725,
2519
+ "grad_norm": 2.1559181213378906,
2520
+ "learning_rate": 6.62653450352874e-05,
2521
+ "logits/chosen": -1.3860085010528564,
2522
+ "logits/rejected": -1.4886363744735718,
2523
+ "logps/chosen": -414.18182373046875,
2524
+ "logps/rejected": -683.6363525390625,
2525
+ "loss": 0.0331,
2526
+ "rewards/accuracies": 0.984375,
2527
+ "rewards/chosen": -3.476029872894287,
2528
+ "rewards/margins": 19.136363983154297,
2529
+ "rewards/rejected": -22.625,
2530
+ "step": 760
2531
+ },
2532
+ {
2533
+ "epoch": 11.086021505376344,
2534
+ "grad_norm": 0.5421841740608215,
2535
+ "learning_rate": 6.625426991975878e-05,
2536
+ "logits/chosen": -1.2683594226837158,
2537
+ "logits/rejected": -1.3828125,
2538
+ "logps/chosen": -449.20001220703125,
2539
+ "logps/rejected": -716.2000122070312,
2540
+ "loss": 0.0245,
2541
+ "rewards/accuracies": 0.981249988079071,
2542
+ "rewards/chosen": -3.512500047683716,
2543
+ "rewards/margins": 18.740625381469727,
2544
+ "rewards/rejected": -22.262500762939453,
2545
+ "step": 765
2546
+ },
2547
+ {
2548
+ "epoch": 11.157706093189963,
2549
+ "grad_norm": 0.014763603918254375,
2550
+ "learning_rate": 6.624311233927571e-05,
2551
+ "logits/chosen": -1.2734375,
2552
+ "logits/rejected": -1.381250023841858,
2553
+ "logps/chosen": -408.20001220703125,
2554
+ "logps/rejected": -683.7999877929688,
2555
+ "loss": 0.0215,
2556
+ "rewards/accuracies": 0.979687511920929,
2557
+ "rewards/chosen": -3.837890625,
2558
+ "rewards/margins": 17.071874618530273,
2559
+ "rewards/rejected": -20.912500381469727,
2560
+ "step": 770
2561
+ },
2562
+ {
2563
+ "epoch": 11.229390681003585,
2564
+ "grad_norm": 2.519441843032837,
2565
+ "learning_rate": 6.623187232192738e-05,
2566
+ "logits/chosen": -1.279687523841858,
2567
+ "logits/rejected": -1.396875023841858,
2568
+ "logps/chosen": -404.1000061035156,
2569
+ "logps/rejected": -662.2999877929688,
2570
+ "loss": 0.0389,
2571
+ "rewards/accuracies": 0.965624988079071,
2572
+ "rewards/chosen": -4.133593559265137,
2573
+ "rewards/margins": 17.243749618530273,
2574
+ "rewards/rejected": -21.375,
2575
+ "step": 775
2576
+ },
2577
+ {
2578
+ "epoch": 11.301075268817204,
2579
+ "grad_norm": 1.973528504371643,
2580
+ "learning_rate": 6.622054989601051e-05,
2581
+ "logits/chosen": -1.212890625,
2582
+ "logits/rejected": -1.360742211341858,
2583
+ "logps/chosen": -410.8999938964844,
2584
+ "logps/rejected": -691.4000244140625,
2585
+ "loss": 0.0222,
2586
+ "rewards/accuracies": 0.979687511920929,
2587
+ "rewards/chosen": -4.862500190734863,
2588
+ "rewards/margins": 16.665624618530273,
2589
+ "rewards/rejected": -21.518749237060547,
2590
+ "step": 780
2591
+ },
2592
+ {
2593
+ "epoch": 11.372759856630825,
2594
+ "grad_norm": 0.8458139896392822,
2595
+ "learning_rate": 6.620914509002932e-05,
2596
+ "logits/chosen": -1.215234398841858,
2597
+ "logits/rejected": -1.349609375,
2598
+ "logps/chosen": -403.29998779296875,
2599
+ "logps/rejected": -650.2999877929688,
2600
+ "loss": 0.0473,
2601
+ "rewards/accuracies": 0.973437488079071,
2602
+ "rewards/chosen": -3.1806640625,
2603
+ "rewards/margins": 16.887500762939453,
2604
+ "rewards/rejected": -20.071874618530273,
2605
+ "step": 785
2606
+ },
2607
+ {
2608
+ "epoch": 11.444444444444445,
2609
+ "grad_norm": 0.09396322816610336,
2610
+ "learning_rate": 6.619765793269539e-05,
2611
+ "logits/chosen": -1.1103515625,
2612
+ "logits/rejected": -1.233984351158142,
2613
+ "logps/chosen": -384.79998779296875,
2614
+ "logps/rejected": -670.5999755859375,
2615
+ "loss": 0.0192,
2616
+ "rewards/accuracies": 0.9859374761581421,
2617
+ "rewards/chosen": -2.9014649391174316,
2618
+ "rewards/margins": 16.671875,
2619
+ "rewards/rejected": -19.559375762939453,
2620
+ "step": 790
2621
+ },
2622
+ {
2623
+ "epoch": 11.516129032258064,
2624
+ "grad_norm": 0.5156263709068298,
2625
+ "learning_rate": 6.618608845292762e-05,
2626
+ "logits/chosen": -1.0205078125,
2627
+ "logits/rejected": -1.146875023841858,
2628
+ "logps/chosen": -389.8999938964844,
2629
+ "logps/rejected": -652.2000122070312,
2630
+ "loss": 0.0317,
2631
+ "rewards/accuracies": 0.9828125238418579,
2632
+ "rewards/chosen": -1.9188964366912842,
2633
+ "rewards/margins": 16.628124237060547,
2634
+ "rewards/rejected": -18.540624618530273,
2635
+ "step": 795
2636
+ },
2637
+ {
2638
+ "epoch": 11.587813620071685,
2639
+ "grad_norm": 2.2448863983154297,
2640
+ "learning_rate": 6.617443667985216e-05,
2641
+ "logits/chosen": -1.002539038658142,
2642
+ "logits/rejected": -1.141210913658142,
2643
+ "logps/chosen": -406.0,
2644
+ "logps/rejected": -656.4000244140625,
2645
+ "loss": 0.0225,
2646
+ "rewards/accuracies": 0.989062488079071,
2647
+ "rewards/chosen": -1.7773040533065796,
2648
+ "rewards/margins": 16.728124618530273,
2649
+ "rewards/rejected": -18.512500762939453,
2650
+ "step": 800
2651
+ },
2652
+ {
2653
+ "epoch": 11.587813620071685,
2654
+ "eval_logits/chosen": -1.0394965410232544,
2655
+ "eval_logits/rejected": -1.1527777910232544,
2656
+ "eval_logps/chosen": -404.0,
2657
+ "eval_logps/rejected": -672.888916015625,
2658
+ "eval_loss": 0.03328302875161171,
2659
+ "eval_rewards/accuracies": 0.9760100841522217,
2660
+ "eval_rewards/chosen": -1.6126302480697632,
2661
+ "eval_rewards/margins": 18.16666603088379,
2662
+ "eval_rewards/rejected": -19.8125,
2663
+ "eval_runtime": 10.2022,
2664
+ "eval_samples_per_second": 26.171,
2665
+ "eval_steps_per_second": 0.882,
2666
+ "step": 800
2667
  }
2668
  ],
2669
  "logging_steps": 5,