irishprancer commited on
Commit
c1bffca
·
verified ·
1 Parent(s): 3940de3

Training in progress, step 2700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16a48e299ffea4a1305480644e0252c4aab5e02d9191b6e951f282025c021c36
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f33daf140fd585464eee41484de04e2474d8e47d63abe817dceaf9b00888e076
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccb7656d0058945a544e068c4c181cbcbae03cf7066df76778cb4579b9066242
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6902f4a187e909e2a61310381b65d1586200f826c3f0696581929350cf563d6
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33335d8e454fa636f80bfcf35b73daf17e2d9682ef2741d0ad1097e25ee4742d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e4de9332de0a821c86e7798fc265e5a2e1afe5f2f6b38a27a1659456e57185
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f98b757d648be3e63607e2156858bad579d6a12d490a3cb2c8748d9ae2cce45
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ceceb1b964f59d3125423a0c5ef9d004a1142d04972a62dc4fa7ae95e8eed4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 110.8695652173913,
5
  "eval_steps": 150,
6
- "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2472,6 +2472,151 @@
2472
  "EMA_steps_per_second": 24.098,
2473
  "epoch": 110.8695652173913,
2474
  "step": 2550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2475
  }
2476
  ],
2477
  "logging_steps": 10,
@@ -2491,7 +2636,7 @@
2491
  "attributes": {}
2492
  }
2493
  },
2494
- "total_flos": 6.568246037407334e+16,
2495
  "train_batch_size": 4,
2496
  "trial_name": null,
2497
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 117.3913043478261,
5
  "eval_steps": 150,
6
+ "global_step": 2700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2472
  "EMA_steps_per_second": 24.098,
2473
  "epoch": 110.8695652173913,
2474
  "step": 2550
2475
+ },
2476
+ {
2477
+ "epoch": 111.30434782608695,
2478
+ "grad_norm": 2.409362554550171,
2479
+ "learning_rate": 7.485945469179237e-06,
2480
+ "loss": 0.2816,
2481
+ "step": 2560
2482
+ },
2483
+ {
2484
+ "epoch": 111.73913043478261,
2485
+ "grad_norm": 2.021090030670166,
2486
+ "learning_rate": 7.485869057739486e-06,
2487
+ "loss": 0.228,
2488
+ "step": 2570
2489
+ },
2490
+ {
2491
+ "epoch": 112.17391304347827,
2492
+ "grad_norm": 2.0017611980438232,
2493
+ "learning_rate": 7.485791163117665e-06,
2494
+ "loss": 0.2461,
2495
+ "step": 2580
2496
+ },
2497
+ {
2498
+ "epoch": 112.6086956521739,
2499
+ "grad_norm": 1.6572258472442627,
2500
+ "learning_rate": 7.485711785344648e-06,
2501
+ "loss": 0.2461,
2502
+ "step": 2590
2503
+ },
2504
+ {
2505
+ "epoch": 113.04347826086956,
2506
+ "grad_norm": 2.1028172969818115,
2507
+ "learning_rate": 7.485630924451897e-06,
2508
+ "loss": 0.2658,
2509
+ "step": 2600
2510
+ },
2511
+ {
2512
+ "epoch": 113.47826086956522,
2513
+ "grad_norm": 1.8281258344650269,
2514
+ "learning_rate": 7.485548580471464e-06,
2515
+ "loss": 0.2257,
2516
+ "step": 2610
2517
+ },
2518
+ {
2519
+ "epoch": 113.91304347826087,
2520
+ "grad_norm": 2.1749765872955322,
2521
+ "learning_rate": 7.485464753435987e-06,
2522
+ "loss": 0.2756,
2523
+ "step": 2620
2524
+ },
2525
+ {
2526
+ "epoch": 114.34782608695652,
2527
+ "grad_norm": 2.009671688079834,
2528
+ "learning_rate": 7.485379443378693e-06,
2529
+ "loss": 0.2447,
2530
+ "step": 2630
2531
+ },
2532
+ {
2533
+ "epoch": 114.78260869565217,
2534
+ "grad_norm": 2.52178955078125,
2535
+ "learning_rate": 7.485292650333394e-06,
2536
+ "loss": 0.2289,
2537
+ "step": 2640
2538
+ },
2539
+ {
2540
+ "epoch": 115.21739130434783,
2541
+ "grad_norm": 1.7975748777389526,
2542
+ "learning_rate": 7.485204374334494e-06,
2543
+ "loss": 0.2551,
2544
+ "step": 2650
2545
+ },
2546
+ {
2547
+ "epoch": 115.65217391304348,
2548
+ "grad_norm": 1.9182255268096924,
2549
+ "learning_rate": 7.485114615416982e-06,
2550
+ "loss": 0.2721,
2551
+ "step": 2660
2552
+ },
2553
+ {
2554
+ "epoch": 116.08695652173913,
2555
+ "grad_norm": 2.3910796642303467,
2556
+ "learning_rate": 7.485023373616437e-06,
2557
+ "loss": 0.2156,
2558
+ "step": 2670
2559
+ },
2560
+ {
2561
+ "epoch": 116.52173913043478,
2562
+ "grad_norm": 2.55471134185791,
2563
+ "learning_rate": 7.484930648969023e-06,
2564
+ "loss": 0.2447,
2565
+ "step": 2680
2566
+ },
2567
+ {
2568
+ "epoch": 116.95652173913044,
2569
+ "grad_norm": 1.5849785804748535,
2570
+ "learning_rate": 7.484836441511492e-06,
2571
+ "loss": 0.2441,
2572
+ "step": 2690
2573
+ },
2574
+ {
2575
+ "epoch": 117.3913043478261,
2576
+ "grad_norm": 1.6347429752349854,
2577
+ "learning_rate": 7.484740751281187e-06,
2578
+ "loss": 0.2362,
2579
+ "step": 2700
2580
+ },
2581
+ {
2582
+ "epoch": 117.3913043478261,
2583
+ "eval_loss": 0.9330541491508484,
2584
+ "eval_runtime": 0.4063,
2585
+ "eval_samples_per_second": 24.614,
2586
+ "eval_steps_per_second": 24.614,
2587
+ "step": 2700
2588
+ },
2589
+ {
2590
+ "Start_State_loss": 0.861186683177948,
2591
+ "Start_State_runtime": 0.4124,
2592
+ "Start_State_samples_per_second": 24.246,
2593
+ "Start_State_steps_per_second": 24.246,
2594
+ "epoch": 117.3913043478261,
2595
+ "step": 2700
2596
+ },
2597
+ {
2598
+ "Raw_Model_loss": 0.9330541491508484,
2599
+ "Raw_Model_runtime": 0.4024,
2600
+ "Raw_Model_samples_per_second": 24.852,
2601
+ "Raw_Model_steps_per_second": 24.852,
2602
+ "epoch": 117.3913043478261,
2603
+ "step": 2700
2604
+ },
2605
+ {
2606
+ "SWA_loss": 0.7649438977241516,
2607
+ "SWA_runtime": 0.4267,
2608
+ "SWA_samples_per_second": 23.437,
2609
+ "SWA_steps_per_second": 23.437,
2610
+ "epoch": 117.3913043478261,
2611
+ "step": 2700
2612
+ },
2613
+ {
2614
+ "EMA_loss": 0.8608808517456055,
2615
+ "EMA_runtime": 0.4102,
2616
+ "EMA_samples_per_second": 24.381,
2617
+ "EMA_steps_per_second": 24.381,
2618
+ "epoch": 117.3913043478261,
2619
+ "step": 2700
2620
  }
2621
  ],
2622
  "logging_steps": 10,
 
2636
  "attributes": {}
2637
  }
2638
  },
2639
+ "total_flos": 6.95112550820905e+16,
2640
  "train_batch_size": 4,
2641
  "trial_name": null,
2642
  "trial_params": null