rootxhacker commited on
Commit
2aa5c0e
·
verified ·
1 Parent(s): 9b579ae

Training in progress, step 13000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd49fdfe9bdd857be5419dddbdbaa6f93701dd16afd12b8202d9abb0f608e217
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:211303fda3172e98417fefcf4ae565442875c8e0001f83fa9b613f42eb68e416
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8066ffe6e1fa6681b38c047067ca29a6eed18da1b1acf88431943e295d78e330
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bf6caeca7f6bd6acaeee1391b627a2e314559d2e523a534ad583a26de79d19e
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a5ae118ec673d57a7f62a6284956ef83acf5113a9420dcd7ba5fd941799b59c
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbf8948534182fde8a4da31776a53cc78337e1e18ed4b526e2ee8ca99eff5731
3
+ size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63eee2f78c948ec330b424961c4bdfb29608b15bd25064fec9d957a2f7fcbcd2
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:501e0348ebe2e7da1549a0a032d61d9c5a8a3fbf8f3846e0f360ce065e6e05b7
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:334ae361472f93791190a5614ebfa0a90c8bcb9632490f070b08594d58ab788b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da01fa338433614d7203b84552679acf4a96484a8201c92d575f7a8bdc7e698e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 12250,
3
- "best_metric": 1.5108226537704468,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-12000",
5
- "epoch": 0.9614645027305592,
6
  "eval_steps": 250,
7
- "global_step": 12500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2158,6 +2158,92 @@
2158
  "eval_samples_per_second": 55.797,
2159
  "eval_steps_per_second": 13.949,
2160
  "step": 12500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2161
  }
2162
  ],
2163
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 13000,
3
+ "best_metric": 1.5033278465270996,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-13000",
5
+ "epoch": 0.9999230828397816,
6
  "eval_steps": 250,
7
+ "global_step": 13000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2158
  "eval_samples_per_second": 55.797,
2159
  "eval_steps_per_second": 13.949,
2160
  "step": 12500
2161
+ },
2162
+ {
2163
+ "epoch": 0.9653103607414815,
2164
+ "grad_norm": 7.388810634613037,
2165
+ "learning_rate": 7.597808904084407e-05,
2166
+ "loss": 1.5683,
2167
+ "step": 12550
2168
+ },
2169
+ {
2170
+ "epoch": 0.9691562187524037,
2171
+ "grad_norm": 1.5548075437545776,
2172
+ "learning_rate": 7.572074556082496e-05,
2173
+ "loss": 1.4956,
2174
+ "step": 12600
2175
+ },
2176
+ {
2177
+ "epoch": 0.973002076763326,
2178
+ "grad_norm": 1.5935887098312378,
2179
+ "learning_rate": 7.546340208080584e-05,
2180
+ "loss": 1.5363,
2181
+ "step": 12650
2182
+ },
2183
+ {
2184
+ "epoch": 0.9768479347742481,
2185
+ "grad_norm": 1.985238790512085,
2186
+ "learning_rate": 7.520605860078672e-05,
2187
+ "loss": 1.5314,
2188
+ "step": 12700
2189
+ },
2190
+ {
2191
+ "epoch": 0.9806937927851703,
2192
+ "grad_norm": 1.5040565729141235,
2193
+ "learning_rate": 7.494871512076762e-05,
2194
+ "loss": 1.5108,
2195
+ "step": 12750
2196
+ },
2197
+ {
2198
+ "epoch": 0.9806937927851703,
2199
+ "eval_loss": 1.5085468292236328,
2200
+ "eval_runtime": 18.0531,
2201
+ "eval_samples_per_second": 55.392,
2202
+ "eval_steps_per_second": 13.848,
2203
+ "step": 12750
2204
+ },
2205
+ {
2206
+ "epoch": 0.9845396507960926,
2207
+ "grad_norm": 1.2956914901733398,
2208
+ "learning_rate": 7.46913716407485e-05,
2209
+ "loss": 1.4287,
2210
+ "step": 12800
2211
+ },
2212
+ {
2213
+ "epoch": 0.9883855088070148,
2214
+ "grad_norm": 1.1903409957885742,
2215
+ "learning_rate": 7.443402816072938e-05,
2216
+ "loss": 1.5583,
2217
+ "step": 12850
2218
+ },
2219
+ {
2220
+ "epoch": 0.9922313668179371,
2221
+ "grad_norm": 1.9069184064865112,
2222
+ "learning_rate": 7.417668468071026e-05,
2223
+ "loss": 1.5214,
2224
+ "step": 12900
2225
+ },
2226
+ {
2227
+ "epoch": 0.9960772248288593,
2228
+ "grad_norm": 1.7362926006317139,
2229
+ "learning_rate": 7.391934120069114e-05,
2230
+ "loss": 1.55,
2231
+ "step": 12950
2232
+ },
2233
+ {
2234
+ "epoch": 0.9999230828397816,
2235
+ "grad_norm": 1.2136348485946655,
2236
+ "learning_rate": 7.366199772067203e-05,
2237
+ "loss": 1.5035,
2238
+ "step": 13000
2239
+ },
2240
+ {
2241
+ "epoch": 0.9999230828397816,
2242
+ "eval_loss": 1.5033278465270996,
2243
+ "eval_runtime": 18.189,
2244
+ "eval_samples_per_second": 54.978,
2245
+ "eval_steps_per_second": 13.745,
2246
+ "step": 13000
2247
  }
2248
  ],
2249
  "logging_steps": 50,