Training in progress, step 16500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47aa7b6dff74362f082b035925e2a5f7cb54a6d412fcc30408f989b971b037c6
|
| 3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac0c8a0f8ae72f5f1c6ba696fc1b1a47bee4f015c1aa8caebecb7d165365a472
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d28d5f3a49ada942ec355f276b33d6e4bff345075b872b6e2b651f5666a06f0a
|
| 3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ac02e2eb311b0a2b3525f61fc41905e20d307e2a487c095f2b84052384c6ffa
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb9a109782b96ee7b703894b485d97bae6299b8517060ebdc76bb5c7072fef6b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2760,6 +2760,92 @@
|
|
| 2760 |
"eval_samples_per_second": 55.548,
|
| 2761 |
"eval_steps_per_second": 13.887,
|
| 2762 |
"step": 16000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2763 |
}
|
| 2764 |
],
|
| 2765 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 16250,
|
| 3 |
+
"best_metric": 1.4850120544433594,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
|
| 5 |
+
"epoch": 1.2691331436043383,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 16500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2760 |
"eval_samples_per_second": 55.548,
|
| 2761 |
"eval_steps_per_second": 13.887,
|
| 2762 |
"step": 16000
|
| 2763 |
+
},
|
| 2764 |
+
{
|
| 2765 |
+
"epoch": 1.234520421506038,
|
| 2766 |
+
"grad_norm": 1.1019631624221802,
|
| 2767 |
+
"learning_rate": 5.796404543950589e-05,
|
| 2768 |
+
"loss": 1.3918,
|
| 2769 |
+
"step": 16050
|
| 2770 |
+
},
|
| 2771 |
+
{
|
| 2772 |
+
"epoch": 1.2383662795169603,
|
| 2773 |
+
"grad_norm": 1.7206593751907349,
|
| 2774 |
+
"learning_rate": 5.770670195948678e-05,
|
| 2775 |
+
"loss": 1.4726,
|
| 2776 |
+
"step": 16100
|
| 2777 |
+
},
|
| 2778 |
+
{
|
| 2779 |
+
"epoch": 1.2422121375278825,
|
| 2780 |
+
"grad_norm": 1.9747880697250366,
|
| 2781 |
+
"learning_rate": 5.7449358479467666e-05,
|
| 2782 |
+
"loss": 1.4829,
|
| 2783 |
+
"step": 16150
|
| 2784 |
+
},
|
| 2785 |
+
{
|
| 2786 |
+
"epoch": 1.2460579955388047,
|
| 2787 |
+
"grad_norm": 1.605573058128357,
|
| 2788 |
+
"learning_rate": 5.719201499944854e-05,
|
| 2789 |
+
"loss": 1.4476,
|
| 2790 |
+
"step": 16200
|
| 2791 |
+
},
|
| 2792 |
+
{
|
| 2793 |
+
"epoch": 1.2499038535497269,
|
| 2794 |
+
"grad_norm": 1.180405616760254,
|
| 2795 |
+
"learning_rate": 5.6934671519429426e-05,
|
| 2796 |
+
"loss": 1.3904,
|
| 2797 |
+
"step": 16250
|
| 2798 |
+
},
|
| 2799 |
+
{
|
| 2800 |
+
"epoch": 1.2499038535497269,
|
| 2801 |
+
"eval_loss": 1.4850120544433594,
|
| 2802 |
+
"eval_runtime": 18.0422,
|
| 2803 |
+
"eval_samples_per_second": 55.426,
|
| 2804 |
+
"eval_steps_per_second": 13.856,
|
| 2805 |
+
"step": 16250
|
| 2806 |
+
},
|
| 2807 |
+
{
|
| 2808 |
+
"epoch": 1.253749711560649,
|
| 2809 |
+
"grad_norm": 1.9959101676940918,
|
| 2810 |
+
"learning_rate": 5.667732803941031e-05,
|
| 2811 |
+
"loss": 1.4512,
|
| 2812 |
+
"step": 16300
|
| 2813 |
+
},
|
| 2814 |
+
{
|
| 2815 |
+
"epoch": 1.2575955695715715,
|
| 2816 |
+
"grad_norm": 1.8853541612625122,
|
| 2817 |
+
"learning_rate": 5.641998455939119e-05,
|
| 2818 |
+
"loss": 1.458,
|
| 2819 |
+
"step": 16350
|
| 2820 |
+
},
|
| 2821 |
+
{
|
| 2822 |
+
"epoch": 1.2614414275824937,
|
| 2823 |
+
"grad_norm": 1.4618902206420898,
|
| 2824 |
+
"learning_rate": 5.616264107937208e-05,
|
| 2825 |
+
"loss": 1.4968,
|
| 2826 |
+
"step": 16400
|
| 2827 |
+
},
|
| 2828 |
+
{
|
| 2829 |
+
"epoch": 1.2652872855934159,
|
| 2830 |
+
"grad_norm": 1.4913650751113892,
|
| 2831 |
+
"learning_rate": 5.5905297599352965e-05,
|
| 2832 |
+
"loss": 1.3966,
|
| 2833 |
+
"step": 16450
|
| 2834 |
+
},
|
| 2835 |
+
{
|
| 2836 |
+
"epoch": 1.2691331436043383,
|
| 2837 |
+
"grad_norm": 1.3095403909683228,
|
| 2838 |
+
"learning_rate": 5.564795411933384e-05,
|
| 2839 |
+
"loss": 1.4484,
|
| 2840 |
+
"step": 16500
|
| 2841 |
+
},
|
| 2842 |
+
{
|
| 2843 |
+
"epoch": 1.2691331436043383,
|
| 2844 |
+
"eval_loss": 1.4897910356521606,
|
| 2845 |
+
"eval_runtime": 18.0248,
|
| 2846 |
+
"eval_samples_per_second": 55.479,
|
| 2847 |
+
"eval_steps_per_second": 13.87,
|
| 2848 |
+
"step": 16500
|
| 2849 |
}
|
| 2850 |
],
|
| 2851 |
"logging_steps": 50,
|