rootxhacker commited on
Commit
a283365
·
verified ·
1 Parent(s): b90dbe4

Training in progress, step 23000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48890795503039966e7c3f8690ea5e69f170d6d775e60a7c19814730a3ff98f0
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ec5a5e069d7a9559066b015901f6cd9d1e5e11a494d63a1a5a07d7cc56ce83
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:323d035573abc68ea1fb28a0dbd169b42c919244458b9d48c24a8196ef4f1fbd
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5655b365a65633fda63eb12cb6b41fa6fff3c01576ce3df81bb8d871bd860169
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0edeec917b285b0e88139f206de61b3e301e9b677aa51d87d86f36aba6dd8e7e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21304ce44e93f8f86da1b431eb1e188b0a7d5ce22c8a8c84f5d679245daaffa
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b66cb5271b1546b480a6c7621e1addbb2863a83f6bfe9ced4a812a79ec729e68
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21cd7c7ff5865cbb23888889c7201c6bfb6882d80652a0ade03d69cb9ad78253
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee960841273de5fa43863b36a2f8d901df0b6ff85b61e6b4992f108a85a45d9c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fdd8c08bac54a46207946491aba470a601d04858deabd8ec1d66f5286fc9a46
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 22250,
3
  "best_metric": 1.457463264465332,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-22000",
5
- "epoch": 1.7306361049150065,
6
  "eval_steps": 250,
7
- "global_step": 22500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3878,6 +3878,92 @@
3878
  "eval_samples_per_second": 54.921,
3879
  "eval_steps_per_second": 13.73,
3880
  "step": 22500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3881
  }
3882
  ],
3883
  "logging_steps": 50,
 
2
  "best_global_step": 22250,
3
  "best_metric": 1.457463264465332,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-22000",
5
+ "epoch": 1.7690946850242288,
6
  "eval_steps": 250,
7
+ "global_step": 23000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3878
  "eval_samples_per_second": 54.921,
3879
  "eval_steps_per_second": 13.73,
3880
  "step": 22500
3881
+ },
3882
+ {
3883
+ "epoch": 1.734481962925929,
3884
+ "grad_norm": 0.9285104870796204,
3885
+ "learning_rate": 2.4514539906621075e-05,
3886
+ "loss": 1.4243,
3887
+ "step": 22550
3888
+ },
3889
+ {
3890
+ "epoch": 1.738327820936851,
3891
+ "grad_norm": 1.2848355770111084,
3892
+ "learning_rate": 2.4257196426601962e-05,
3893
+ "loss": 1.4596,
3894
+ "step": 22600
3895
+ },
3896
+ {
3897
+ "epoch": 1.7421736789477733,
3898
+ "grad_norm": 1.4614371061325073,
3899
+ "learning_rate": 2.3999852946582845e-05,
3900
+ "loss": 1.3918,
3901
+ "step": 22650
3902
+ },
3903
+ {
3904
+ "epoch": 1.7460195369586955,
3905
+ "grad_norm": 0.9543781876564026,
3906
+ "learning_rate": 2.3742509466563724e-05,
3907
+ "loss": 1.4044,
3908
+ "step": 22700
3909
+ },
3910
+ {
3911
+ "epoch": 1.7498653949696177,
3912
+ "grad_norm": 1.602250099182129,
3913
+ "learning_rate": 2.348516598654461e-05,
3914
+ "loss": 1.4607,
3915
+ "step": 22750
3916
+ },
3917
+ {
3918
+ "epoch": 1.7498653949696177,
3919
+ "eval_loss": 1.4677520990371704,
3920
+ "eval_runtime": 18.158,
3921
+ "eval_samples_per_second": 55.072,
3922
+ "eval_steps_per_second": 13.768,
3923
+ "step": 22750
3924
+ },
3925
+ {
3926
+ "epoch": 1.75371125298054,
3927
+ "grad_norm": 1.1664291620254517,
3928
+ "learning_rate": 2.3227822506525494e-05,
3929
+ "loss": 1.5153,
3930
+ "step": 22800
3931
+ },
3932
+ {
3933
+ "epoch": 1.757557110991462,
3934
+ "grad_norm": 1.472679853439331,
3935
+ "learning_rate": 2.2970479026506374e-05,
3936
+ "loss": 1.4774,
3937
+ "step": 22850
3938
+ },
3939
+ {
3940
+ "epoch": 1.7614029690023845,
3941
+ "grad_norm": 1.7927029132843018,
3942
+ "learning_rate": 2.271313554648726e-05,
3943
+ "loss": 1.4551,
3944
+ "step": 22900
3945
+ },
3946
+ {
3947
+ "epoch": 1.7652488270133067,
3948
+ "grad_norm": 2.9085824489593506,
3949
+ "learning_rate": 2.2455792066468143e-05,
3950
+ "loss": 1.4474,
3951
+ "step": 22950
3952
+ },
3953
+ {
3954
+ "epoch": 1.7690946850242288,
3955
+ "grad_norm": 1.8322957754135132,
3956
+ "learning_rate": 2.2198448586449026e-05,
3957
+ "loss": 1.4642,
3958
+ "step": 23000
3959
+ },
3960
+ {
3961
+ "epoch": 1.7690946850242288,
3962
+ "eval_loss": 1.4676103591918945,
3963
+ "eval_runtime": 17.9158,
3964
+ "eval_samples_per_second": 55.817,
3965
+ "eval_steps_per_second": 13.954,
3966
+ "step": 23000
3967
  }
3968
  ],
3969
  "logging_steps": 50,