TweedleDeepLearnings commited on
Commit
1d9f758
·
verified ·
1 Parent(s): 230a18c

Training in progress, step 3450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a32a7f2a18fd02b5a784b7de9c35953dc8eed4dec1cdb5ae4e32e720e9314ce3
3
  size 1439199808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f1bcd8236a991dd015bc000d8db154dfb1ab38aa55bbc13be7fc78bd1888b7d
3
  size 1439199808
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2541760ad8140d78334f44ffdbdde5fa426cfe8d489fee70ae2942932b1fbecf
3
  size 2746271162
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5be81d5f3a8e665425eeafba440a547e141c9f7fd542ab92f841751e1f2a8d4
3
  size 2746271162
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06eacf4beac2f87634932b7784d96f580eb7673fd7aa2ddeca9382968e9ad7a2
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b94f628e4c41aea4f7b0ab86aa7e78fa8398f6aeb08d3f097d04ff5471c1532
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48383a39b3b26acc46de2d6a247f92cd1459e51652dd3ef90c9ec324e3a206b4
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbc64161a220321c09f919ba59daaf2e36f5d33bd28e64db671f20f3cea3cf35
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.059717416763305664,
3
  "best_model_checkpoint": "./output/checkpoint-2550",
4
- "epoch": 6.088560885608856,
5
  "eval_steps": 150,
6
- "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2493,6 +2493,119 @@
2493
  "eval_samples_per_second": 10.6,
2494
  "eval_steps_per_second": 10.6,
2495
  "step": 3300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2496
  }
2497
  ],
2498
  "logging_steps": 10,
@@ -2512,7 +2625,7 @@
2512
  "attributes": {}
2513
  }
2514
  },
2515
- "total_flos": 1.6990246793751552e+17,
2516
  "train_batch_size": 8,
2517
  "trial_name": null,
2518
  "trial_params": null
 
1
  {
2
  "best_metric": 0.059717416763305664,
3
  "best_model_checkpoint": "./output/checkpoint-2550",
4
+ "epoch": 6.365313653136531,
5
  "eval_steps": 150,
6
+ "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2493
  "eval_samples_per_second": 10.6,
2494
  "eval_steps_per_second": 10.6,
2495
  "step": 3300
2496
+ },
2497
+ {
2498
+ "epoch": 6.107011070110701,
2499
+ "grad_norm": 0.5109656453132629,
2500
+ "learning_rate": 2.127147601759018e-05,
2501
+ "loss": 0.011,
2502
+ "step": 3310
2503
+ },
2504
+ {
2505
+ "epoch": 6.125461254612546,
2506
+ "grad_norm": 0.3670618236064911,
2507
+ "learning_rate": 2.1045253501080058e-05,
2508
+ "loss": 0.0115,
2509
+ "step": 3320
2510
+ },
2511
+ {
2512
+ "epoch": 6.143911439114391,
2513
+ "grad_norm": 0.21240659058094025,
2514
+ "learning_rate": 2.081981013995785e-05,
2515
+ "loss": 0.0071,
2516
+ "step": 3330
2517
+ },
2518
+ {
2519
+ "epoch": 6.162361623616236,
2520
+ "grad_norm": 0.25418519973754883,
2521
+ "learning_rate": 2.0595155201317115e-05,
2522
+ "loss": 0.0143,
2523
+ "step": 3340
2524
+ },
2525
+ {
2526
+ "epoch": 6.180811808118081,
2527
+ "grad_norm": 0.8638357520103455,
2528
+ "learning_rate": 2.03712979198425e-05,
2529
+ "loss": 0.009,
2530
+ "step": 3350
2531
+ },
2532
+ {
2533
+ "epoch": 6.199261992619927,
2534
+ "grad_norm": 0.047012124210596085,
2535
+ "learning_rate": 2.0148247497430012e-05,
2536
+ "loss": 0.0109,
2537
+ "step": 3360
2538
+ },
2539
+ {
2540
+ "epoch": 6.217712177121771,
2541
+ "grad_norm": 0.6433148384094238,
2542
+ "learning_rate": 1.992601310280891e-05,
2543
+ "loss": 0.014,
2544
+ "step": 3370
2545
+ },
2546
+ {
2547
+ "epoch": 6.236162361623617,
2548
+ "grad_norm": 0.10094081610441208,
2549
+ "learning_rate": 1.970460387116472e-05,
2550
+ "loss": 0.009,
2551
+ "step": 3380
2552
+ },
2553
+ {
2554
+ "epoch": 6.254612546125461,
2555
+ "grad_norm": 0.28598272800445557,
2556
+ "learning_rate": 1.948402890376376e-05,
2557
+ "loss": 0.0089,
2558
+ "step": 3390
2559
+ },
2560
+ {
2561
+ "epoch": 6.273062730627307,
2562
+ "grad_norm": 0.1534859538078308,
2563
+ "learning_rate": 1.9264297267579e-05,
2564
+ "loss": 0.0179,
2565
+ "step": 3400
2566
+ },
2567
+ {
2568
+ "epoch": 6.291512915129151,
2569
+ "grad_norm": 0.00930574256926775,
2570
+ "learning_rate": 1.904541799491738e-05,
2571
+ "loss": 0.0067,
2572
+ "step": 3410
2573
+ },
2574
+ {
2575
+ "epoch": 6.3099630996309966,
2576
+ "grad_norm": 0.23130927979946136,
2577
+ "learning_rate": 1.8827400083048503e-05,
2578
+ "loss": 0.0143,
2579
+ "step": 3420
2580
+ },
2581
+ {
2582
+ "epoch": 6.328413284132841,
2583
+ "grad_norm": 0.19890816509723663,
2584
+ "learning_rate": 1.8610252493834814e-05,
2585
+ "loss": 0.0249,
2586
+ "step": 3430
2587
+ },
2588
+ {
2589
+ "epoch": 6.3468634686346865,
2590
+ "grad_norm": 0.34883108735084534,
2591
+ "learning_rate": 1.8393984153363203e-05,
2592
+ "loss": 0.0127,
2593
+ "step": 3440
2594
+ },
2595
+ {
2596
+ "epoch": 6.365313653136531,
2597
+ "grad_norm": 0.6582930088043213,
2598
+ "learning_rate": 1.8178603951578057e-05,
2599
+ "loss": 0.0132,
2600
+ "step": 3450
2601
+ },
2602
+ {
2603
+ "epoch": 6.365313653136531,
2604
+ "eval_loss": 0.07403679937124252,
2605
+ "eval_runtime": 44.152,
2606
+ "eval_samples_per_second": 10.917,
2607
+ "eval_steps_per_second": 10.917,
2608
+ "step": 3450
2609
  }
2610
  ],
2611
  "logging_steps": 10,
 
2625
  "attributes": {}
2626
  }
2627
  },
2628
+ "total_flos": 1.7764962206435942e+17,
2629
  "train_batch_size": 8,
2630
  "trial_name": null,
2631
  "trial_params": null