rootxhacker commited on
Commit
4ba582f
·
verified ·
1 Parent(s): 8aba67d

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ecef08c15acbbcafdbd18353f8fac26c3a385caa8d1e0800b9015c88eca7688
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3903c059a7b663d5bfd566efc9974ae82cc96ac9d5b539705078e742f56c337d
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ff134a511b0525e97d92ca6b0df4a29b3c192a8c38ede989f92d5a4599a5387
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c871c606d134fb7ce2c803972d6df79093d0f6a67161c6b1b4060e6eb55be6
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44c02dc0791054ba042a754ce4b193305b8c58f3c6ec32c6d59f64ae1e19274a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f4acfd5fa4964d8f20ab734945eda1f469ecfbbe259a3fd74f731ec4baabed1
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df1b3ae13e2a70e8f07dd9d7c74ac5ab9a7fad8c5d805908d310a8c82d08cad1
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6777a8c32c870836be295ac0aa7f4cc3d40129a0ebc96e1b375fa98ce8275f00
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4505e6ebc4cb5953ed365ab7de48d1737346198587960aabd24a26dc3d267d0c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61aefcf58c021497ade44ee93d84da1168bb6593c3269faeaeb7e7cace70ca34
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 5000,
3
- "best_metric": 4.414160251617432,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-5000",
5
- "epoch": 0.4999615414198908,
6
  "eval_steps": 250,
7
- "global_step": 6500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1126,6 +1126,92 @@
1126
  "eval_samples_per_second": 53.008,
1127
  "eval_steps_per_second": 13.252,
1128
  "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1129
  }
1130
  ],
1131
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 7000,
3
+ "best_metric": 4.411437034606934,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-7000",
5
+ "epoch": 0.5384201215291131,
6
  "eval_steps": 250,
7
+ "global_step": 7000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1126
  "eval_samples_per_second": 53.008,
1127
  "eval_steps_per_second": 13.252,
1128
  "step": 6500
1129
+ },
1130
+ {
1131
+ "epoch": 0.503807399430813,
1132
+ "grad_norm": 4.299264430999756,
1133
+ "learning_rate": 0.00010684386603433697,
1134
+ "loss": 4.6103,
1135
+ "step": 6550
1136
+ },
1137
+ {
1138
+ "epoch": 0.5076532574417353,
1139
+ "grad_norm": 4.186795234680176,
1140
+ "learning_rate": 0.00010659166942391823,
1141
+ "loss": 4.5303,
1142
+ "step": 6600
1143
+ },
1144
+ {
1145
+ "epoch": 0.5114991154526575,
1146
+ "grad_norm": 2.925708293914795,
1147
+ "learning_rate": 0.00010633432594389911,
1148
+ "loss": 4.4265,
1149
+ "step": 6650
1150
+ },
1151
+ {
1152
+ "epoch": 0.5153449734635798,
1153
+ "grad_norm": 6.368393421173096,
1154
+ "learning_rate": 0.00010607698246388,
1155
+ "loss": 4.3358,
1156
+ "step": 6700
1157
+ },
1158
+ {
1159
+ "epoch": 0.519190831474502,
1160
+ "grad_norm": 4.947482585906982,
1161
+ "learning_rate": 0.00010581963898386088,
1162
+ "loss": 4.5812,
1163
+ "step": 6750
1164
+ },
1165
+ {
1166
+ "epoch": 0.519190831474502,
1167
+ "eval_loss": 4.466405868530273,
1168
+ "eval_runtime": 18.8333,
1169
+ "eval_samples_per_second": 53.097,
1170
+ "eval_steps_per_second": 13.274,
1171
+ "step": 6750
1172
+ },
1173
+ {
1174
+ "epoch": 0.5230366894854241,
1175
+ "grad_norm": 2.469914674758911,
1176
+ "learning_rate": 0.00010556229550384175,
1177
+ "loss": 4.3623,
1178
+ "step": 6800
1179
+ },
1180
+ {
1181
+ "epoch": 0.5268825474963464,
1182
+ "grad_norm": 5.027404308319092,
1183
+ "learning_rate": 0.00010530495202382264,
1184
+ "loss": 4.5466,
1185
+ "step": 6850
1186
+ },
1187
+ {
1188
+ "epoch": 0.5307284055072686,
1189
+ "grad_norm": 4.797220706939697,
1190
+ "learning_rate": 0.00010504760854380352,
1191
+ "loss": 4.4486,
1192
+ "step": 6900
1193
+ },
1194
+ {
1195
+ "epoch": 0.5345742635181909,
1196
+ "grad_norm": 5.403319358825684,
1197
+ "learning_rate": 0.00010479026506378442,
1198
+ "loss": 4.4919,
1199
+ "step": 6950
1200
+ },
1201
+ {
1202
+ "epoch": 0.5384201215291131,
1203
+ "grad_norm": 4.601899147033691,
1204
+ "learning_rate": 0.0001045329215837653,
1205
+ "loss": 4.4703,
1206
+ "step": 7000
1207
+ },
1208
+ {
1209
+ "epoch": 0.5384201215291131,
1210
+ "eval_loss": 4.411437034606934,
1211
+ "eval_runtime": 18.8691,
1212
+ "eval_samples_per_second": 52.997,
1213
+ "eval_steps_per_second": 13.249,
1214
+ "step": 7000
1215
  }
1216
  ],
1217
  "logging_steps": 50,