rootxhacker commited on
Commit
2c6eaba
·
verified ·
1 Parent(s): ffc8410

Training in progress, step 29500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afb1f994dba522602e482c6617e68b2ec24254fe04ad07399654bbd51f70ee28
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79c28d4b4840c8a771d27b1f27c6ac5c00a9f2991d7394f3a6733892a09080ab
3
  size 36730224
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8076f6cd20d7f4c66417fcba7f62699920c2ac2bc88d4cc342541ab812336f0a
3
  size 1544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30265894652137003ce0d462067739a1e1f87e1fef05727cce7c9bc27d9969d
3
  size 1544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3531171f249076e214e0e84ae3500c8a6ba16b1c8f7d014a2f1b1fe43cc09834
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35ee565d832de94c600e576ee117f0978437f0823600766c050ee216a94c8988
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5f1da1040925b2052aee2dca0c3fa273b607e957cbf18770059a4080553de3f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734e3f7dc3063a42733f6397bd808730560212803e7502d677f32c1adc811a33
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ac450af05edcacc287976d7cce0a697b2e2b1346a00fb8804e2fa8c2427dff3
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4379a058e70e2b699ffe8db7ee723ac96ffd7adc9d62248596ac9aebf5e54f3
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01b7c9c1894540a3bb43365814662cd05e754a3caa17602ed4c796362bf454cf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f444f08db434c1664e05b94a04b805dac6e17ed393cf9fcf28a82949e586c98
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 29000,
3
  "best_metric": 0.9999537467956543,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-29000",
5
- "epoch": 2.230597646334897,
6
  "eval_steps": 250,
7
- "global_step": 29000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4996,6 +4996,92 @@
4996
  "eval_samples_per_second": 57.999,
4997
  "eval_steps_per_second": 14.5,
4998
  "step": 29000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4999
  }
5000
  ],
5001
  "logging_steps": 50,
 
2
  "best_global_step": 29000,
3
  "best_metric": 0.9999537467956543,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-29000",
5
+ "epoch": 2.26905622644412,
6
  "eval_steps": 250,
7
+ "global_step": 29500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4996
  "eval_samples_per_second": 57.999,
4997
  "eval_steps_per_second": 14.5,
4998
  "step": 29000
4999
+ },
5000
+ {
5001
+ "epoch": 2.2344435043458195,
5002
+ "grad_norm": 0.5559306740760803,
5003
+ "learning_rate": 5.1772589148897485e-05,
5004
+ "loss": 0.9749,
5005
+ "step": 29050
5006
+ },
5007
+ {
5008
+ "epoch": 2.238289362356742,
5009
+ "grad_norm": 0.6720598936080933,
5010
+ "learning_rate": 5.151286912708101e-05,
5011
+ "loss": 0.9662,
5012
+ "step": 29100
5013
+ },
5014
+ {
5015
+ "epoch": 2.242135220367664,
5016
+ "grad_norm": 1.117200493812561,
5017
+ "learning_rate": 5.125314910526453e-05,
5018
+ "loss": 0.9552,
5019
+ "step": 29150
5020
+ },
5021
+ {
5022
+ "epoch": 2.2459810783785863,
5023
+ "grad_norm": 0.6818645000457764,
5024
+ "learning_rate": 5.0993429083448044e-05,
5025
+ "loss": 0.9032,
5026
+ "step": 29200
5027
+ },
5028
+ {
5029
+ "epoch": 2.2498269363895087,
5030
+ "grad_norm": 0.9796412587165833,
5031
+ "learning_rate": 5.0733709061631564e-05,
5032
+ "loss": 0.9429,
5033
+ "step": 29250
5034
+ },
5035
+ {
5036
+ "epoch": 2.2498269363895087,
5037
+ "eval_loss": 1.0121312141418457,
5038
+ "eval_runtime": 17.3211,
5039
+ "eval_samples_per_second": 57.733,
5040
+ "eval_steps_per_second": 14.433,
5041
+ "step": 29250
5042
+ },
5043
+ {
5044
+ "epoch": 2.2536727944004307,
5045
+ "grad_norm": 1.021713137626648,
5046
+ "learning_rate": 5.047398903981508e-05,
5047
+ "loss": 0.979,
5048
+ "step": 29300
5049
+ },
5050
+ {
5051
+ "epoch": 2.257518652411353,
5052
+ "grad_norm": 1.1321250200271606,
5053
+ "learning_rate": 5.02142690179986e-05,
5054
+ "loss": 1.0327,
5055
+ "step": 29350
5056
+ },
5057
+ {
5058
+ "epoch": 2.261364510422275,
5059
+ "grad_norm": 0.7670277953147888,
5060
+ "learning_rate": 4.9954548996182116e-05,
5061
+ "loss": 0.9668,
5062
+ "step": 29400
5063
+ },
5064
+ {
5065
+ "epoch": 2.2652103684331975,
5066
+ "grad_norm": 1.447698712348938,
5067
+ "learning_rate": 4.9694828974365635e-05,
5068
+ "loss": 0.9288,
5069
+ "step": 29450
5070
+ },
5071
+ {
5072
+ "epoch": 2.26905622644412,
5073
+ "grad_norm": 1.0438776016235352,
5074
+ "learning_rate": 4.9435108952549155e-05,
5075
+ "loss": 1.0154,
5076
+ "step": 29500
5077
+ },
5078
+ {
5079
+ "epoch": 2.26905622644412,
5080
+ "eval_loss": 1.0100510120391846,
5081
+ "eval_runtime": 17.4293,
5082
+ "eval_samples_per_second": 57.375,
5083
+ "eval_steps_per_second": 14.344,
5084
+ "step": 29500
5085
  }
5086
  ],
5087
  "logging_steps": 50,