rootxhacker commited on
Commit
6c55d78
·
verified ·
1 Parent(s): 4d48040

Training in progress, step 18000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fd64b4fff9be5f0c3e2f6b4d9919b9e6375df4b9f578e7ba32211a7c5285de9
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f56b521d1a393817f17047a20ecf2bb1e83ee9f43692fbe30db28020b9d79b36
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d27c5a577064cfc46e35c17df10152b71102230382c1124b730922de85a7cde
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af75d0581fbdc90030a2d5f4ec75948b6c9a777f25105f2635a11a66e19a8991
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c6f63d896b1e09539b72ac15eabafd681bf824376794bc90d0981fb00940917
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec74914111c893531d77390ed26a8e62f47c5fd368563e3e2ea395d9971bdd0
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a3d47a33fe410d7c39aed7512ee64bb9c07ee05e603db567ade52fed1bf5077
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb36538bca0e8f82a230d205fcda169c895fd4416ff12f18badea8938ea79e34
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fefc995528f5d0eb5c6c7d885b8cddef853e490bf4df60e860a20196ce82f7a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04fb24a5e33627ec28e9d8236874cf729cbb5859a1714c36c632683bea019abf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 16750,
3
  "best_metric": 1.4708431959152222,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
- "epoch": 1.346050303822783,
6
  "eval_steps": 250,
7
- "global_step": 17500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3018,6 +3018,92 @@
3018
  "eval_samples_per_second": 55.814,
3019
  "eval_steps_per_second": 13.954,
3020
  "step": 17500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3021
  }
3022
  ],
3023
  "logging_steps": 50,
 
2
  "best_global_step": 16750,
3
  "best_metric": 1.4708431959152222,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
+ "epoch": 1.3845088839320052,
6
  "eval_steps": 250,
7
+ "global_step": 18000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3018
  "eval_samples_per_second": 55.814,
3019
  "eval_steps_per_second": 13.954,
3020
  "step": 17500
3021
+ },
3022
+ {
3023
+ "epoch": 1.349896161833705,
3024
+ "grad_norm": 1.6970813274383545,
3025
+ "learning_rate": 5.0243741038932386e-05,
3026
+ "loss": 1.4442,
3027
+ "step": 17550
3028
+ },
3029
+ {
3030
+ "epoch": 1.3537420198446273,
3031
+ "grad_norm": 1.0942292213439941,
3032
+ "learning_rate": 4.998639755891327e-05,
3033
+ "loss": 1.4769,
3034
+ "step": 17600
3035
+ },
3036
+ {
3037
+ "epoch": 1.3575878778555497,
3038
+ "grad_norm": 1.720035195350647,
3039
+ "learning_rate": 4.972905407889416e-05,
3040
+ "loss": 1.4519,
3041
+ "step": 17650
3042
+ },
3043
+ {
3044
+ "epoch": 1.3614337358664719,
3045
+ "grad_norm": 0.8887185454368591,
3046
+ "learning_rate": 4.947171059887503e-05,
3047
+ "loss": 1.4201,
3048
+ "step": 17700
3049
+ },
3050
+ {
3051
+ "epoch": 1.365279593877394,
3052
+ "grad_norm": 1.9557030200958252,
3053
+ "learning_rate": 4.921436711885592e-05,
3054
+ "loss": 1.4848,
3055
+ "step": 17750
3056
+ },
3057
+ {
3058
+ "epoch": 1.365279593877394,
3059
+ "eval_loss": 1.476893424987793,
3060
+ "eval_runtime": 17.9988,
3061
+ "eval_samples_per_second": 55.559,
3062
+ "eval_steps_per_second": 13.89,
3063
+ "step": 17750
3064
+ },
3065
+ {
3066
+ "epoch": 1.3691254518883162,
3067
+ "grad_norm": 1.471414566040039,
3068
+ "learning_rate": 4.8957023638836804e-05,
3069
+ "loss": 1.4541,
3070
+ "step": 17800
3071
+ },
3072
+ {
3073
+ "epoch": 1.3729713098992384,
3074
+ "grad_norm": 1.350690484046936,
3075
+ "learning_rate": 4.8699680158817684e-05,
3076
+ "loss": 1.3954,
3077
+ "step": 17850
3078
+ },
3079
+ {
3080
+ "epoch": 1.3768171679101608,
3081
+ "grad_norm": 0.7363431453704834,
3082
+ "learning_rate": 4.844233667879857e-05,
3083
+ "loss": 1.4919,
3084
+ "step": 17900
3085
+ },
3086
+ {
3087
+ "epoch": 1.380663025921083,
3088
+ "grad_norm": 1.8820909261703491,
3089
+ "learning_rate": 4.818499319877946e-05,
3090
+ "loss": 1.4177,
3091
+ "step": 17950
3092
+ },
3093
+ {
3094
+ "epoch": 1.3845088839320052,
3095
+ "grad_norm": 0.8440986275672913,
3096
+ "learning_rate": 4.792764971876033e-05,
3097
+ "loss": 1.3995,
3098
+ "step": 18000
3099
+ },
3100
+ {
3101
+ "epoch": 1.3845088839320052,
3102
+ "eval_loss": 1.4794726371765137,
3103
+ "eval_runtime": 17.9989,
3104
+ "eval_samples_per_second": 55.559,
3105
+ "eval_steps_per_second": 13.89,
3106
+ "step": 18000
3107
  }
3108
  ],
3109
  "logging_steps": 50,