Training in progress, step 34000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:183bead4ca9a0d8a9ad0402f8298de3e2bfdf1c6bc3c98f0a8a4be0ee1e31d4d
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b47889f113a05e0ff3862fe30181a9dc731902d20018b65b9702f60ad52946b
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e318b97ed9819790ac140e0dd6c53dca0b92f84a57cf1dedca58f5c9fdf217c4
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a8f7000d6d396338bd8027a37093a36e3ad9a9fa3dd5e939b7cf2fd1b06a270
|
| 3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d185966dc7e40240f30d39f221c00702b5a813416a3b53099b3469fe07e391c8
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2206e3ea9caa91bbe357a176c4a03573c2b47177cf241fe9772382f8b2e0ca8f
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5770,6 +5770,92 @@
|
|
| 5770 |
"eval_samples_per_second": 58.033,
|
| 5771 |
"eval_steps_per_second": 14.508,
|
| 5772 |
"step": 33500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5773 |
}
|
| 5774 |
],
|
| 5775 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 34000,
|
| 3 |
+
"best_metric": 0.987713634967804,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-34000",
|
| 5 |
+
"epoch": 2.615183447427121,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 34000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5770 |
"eval_samples_per_second": 58.033,
|
| 5771 |
"eval_steps_per_second": 14.508,
|
| 5772 |
"step": 33500
|
| 5773 |
+
},
|
| 5774 |
+
{
|
| 5775 |
+
"epoch": 2.580570725328821,
|
| 5776 |
+
"grad_norm": 0.5430648922920227,
|
| 5777 |
+
"learning_rate": 2.8402981585850453e-05,
|
| 5778 |
+
"loss": 0.9658,
|
| 5779 |
+
"step": 33550
|
| 5780 |
+
},
|
| 5781 |
+
{
|
| 5782 |
+
"epoch": 2.5844165833397432,
|
| 5783 |
+
"grad_norm": 0.6678454279899597,
|
| 5784 |
+
"learning_rate": 2.8143261564033973e-05,
|
| 5785 |
+
"loss": 1.0292,
|
| 5786 |
+
"step": 33600
|
| 5787 |
+
},
|
| 5788 |
+
{
|
| 5789 |
+
"epoch": 2.588262441350665,
|
| 5790 |
+
"grad_norm": 0.7208724021911621,
|
| 5791 |
+
"learning_rate": 2.788354154221749e-05,
|
| 5792 |
+
"loss": 0.9505,
|
| 5793 |
+
"step": 33650
|
| 5794 |
+
},
|
| 5795 |
+
{
|
| 5796 |
+
"epoch": 2.5921082993615876,
|
| 5797 |
+
"grad_norm": 1.2248526811599731,
|
| 5798 |
+
"learning_rate": 2.7623821520401012e-05,
|
| 5799 |
+
"loss": 0.9728,
|
| 5800 |
+
"step": 33700
|
| 5801 |
+
},
|
| 5802 |
+
{
|
| 5803 |
+
"epoch": 2.5959541573725096,
|
| 5804 |
+
"grad_norm": 1.0026588439941406,
|
| 5805 |
+
"learning_rate": 2.7364101498584525e-05,
|
| 5806 |
+
"loss": 0.9783,
|
| 5807 |
+
"step": 33750
|
| 5808 |
+
},
|
| 5809 |
+
{
|
| 5810 |
+
"epoch": 2.5959541573725096,
|
| 5811 |
+
"eval_loss": 0.9881900548934937,
|
| 5812 |
+
"eval_runtime": 17.2742,
|
| 5813 |
+
"eval_samples_per_second": 57.89,
|
| 5814 |
+
"eval_steps_per_second": 14.472,
|
| 5815 |
+
"step": 33750
|
| 5816 |
+
},
|
| 5817 |
+
{
|
| 5818 |
+
"epoch": 2.599800015383432,
|
| 5819 |
+
"grad_norm": 0.9579987525939941,
|
| 5820 |
+
"learning_rate": 2.710438147676804e-05,
|
| 5821 |
+
"loss": 0.9882,
|
| 5822 |
+
"step": 33800
|
| 5823 |
+
},
|
| 5824 |
+
{
|
| 5825 |
+
"epoch": 2.6036458733943544,
|
| 5826 |
+
"grad_norm": 1.0152076482772827,
|
| 5827 |
+
"learning_rate": 2.6844661454951564e-05,
|
| 5828 |
+
"loss": 1.041,
|
| 5829 |
+
"step": 33850
|
| 5830 |
+
},
|
| 5831 |
+
{
|
| 5832 |
+
"epoch": 2.6074917314052763,
|
| 5833 |
+
"grad_norm": 1.1370351314544678,
|
| 5834 |
+
"learning_rate": 2.658494143313508e-05,
|
| 5835 |
+
"loss": 0.9518,
|
| 5836 |
+
"step": 33900
|
| 5837 |
+
},
|
| 5838 |
+
{
|
| 5839 |
+
"epoch": 2.6113375894161988,
|
| 5840 |
+
"grad_norm": 0.9851937890052795,
|
| 5841 |
+
"learning_rate": 2.63252214113186e-05,
|
| 5842 |
+
"loss": 0.9125,
|
| 5843 |
+
"step": 33950
|
| 5844 |
+
},
|
| 5845 |
+
{
|
| 5846 |
+
"epoch": 2.615183447427121,
|
| 5847 |
+
"grad_norm": 0.8480270504951477,
|
| 5848 |
+
"learning_rate": 2.6065501389502116e-05,
|
| 5849 |
+
"loss": 0.9736,
|
| 5850 |
+
"step": 34000
|
| 5851 |
+
},
|
| 5852 |
+
{
|
| 5853 |
+
"epoch": 2.615183447427121,
|
| 5854 |
+
"eval_loss": 0.987713634967804,
|
| 5855 |
+
"eval_runtime": 17.2436,
|
| 5856 |
+
"eval_samples_per_second": 57.993,
|
| 5857 |
+
"eval_steps_per_second": 14.498,
|
| 5858 |
+
"step": 34000
|
| 5859 |
}
|
| 5860 |
],
|
| 5861 |
"logging_steps": 50,
|