rootxhacker commited on
Commit
55f8f4c
·
verified ·
1 Parent(s): 09edc38

Training in progress, step 6500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a554f35b6a17be128fce6b6dde18077ae472e710857689bc82a3a107a5064b70
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ecef08c15acbbcafdbd18353f8fac26c3a385caa8d1e0800b9015c88eca7688
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7489e1764b16dc1b56ae149e381b65054da7550c9735ed9c2fdc8fc794efba0e
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ff134a511b0525e97d92ca6b0df4a29b3c192a8c38ede989f92d5a4599a5387
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12c22be93200b6ea29a5a43fdc896fd16b0e2c374bfcbb275fb907bb85c40cd3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c02dc0791054ba042a754ce4b193305b8c58f3c6ec32c6d59f64ae1e19274a
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6914f08d038d2fd5583fe9d1dfb74316a740319f66641c1d86781bc3fc4b8381
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df1b3ae13e2a70e8f07dd9d7c74ac5ab9a7fad8c5d805908d310a8c82d08cad1
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f20cc39b1ac70cfd962157cd5b033ae7e76e9f02816bf84f00fd90791d769ab
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4505e6ebc4cb5953ed365ab7de48d1737346198587960aabd24a26dc3d267d0c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 5000,
3
  "best_metric": 4.414160251617432,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-5000",
5
- "epoch": 0.4615029613106684,
6
  "eval_steps": 250,
7
- "global_step": 6000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1040,6 +1040,92 @@
1040
  "eval_samples_per_second": 53.084,
1041
  "eval_steps_per_second": 13.271,
1042
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
  }
1044
  ],
1045
  "logging_steps": 50,
 
2
  "best_global_step": 5000,
3
  "best_metric": 4.414160251617432,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-5000",
5
+ "epoch": 0.4999615414198908,
6
  "eval_steps": 250,
7
+ "global_step": 6500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1040
  "eval_samples_per_second": 53.084,
1041
  "eval_steps_per_second": 13.271,
1042
  "step": 6000
1043
+ },
1044
+ {
1045
+ "epoch": 0.46534881932159067,
1046
+ "grad_norm": 3.6767919063568115,
1047
+ "learning_rate": 0.00010941730083452813,
1048
+ "loss": 4.5798,
1049
+ "step": 6050
1050
+ },
1051
+ {
1052
+ "epoch": 0.4691946773325129,
1053
+ "grad_norm": 3.8597254753112793,
1054
+ "learning_rate": 0.00010915995735450901,
1055
+ "loss": 4.5867,
1056
+ "step": 6100
1057
+ },
1058
+ {
1059
+ "epoch": 0.4730405353434351,
1060
+ "grad_norm": 2.8041980266571045,
1061
+ "learning_rate": 0.0001089026138744899,
1062
+ "loss": 4.4825,
1063
+ "step": 6150
1064
+ },
1065
+ {
1066
+ "epoch": 0.47688639335435734,
1067
+ "grad_norm": 3.3872950077056885,
1068
+ "learning_rate": 0.00010864527039447078,
1069
+ "loss": 4.5624,
1070
+ "step": 6200
1071
+ },
1072
+ {
1073
+ "epoch": 0.4807322513652796,
1074
+ "grad_norm": 3.698118209838867,
1075
+ "learning_rate": 0.00010838792691445166,
1076
+ "loss": 4.4889,
1077
+ "step": 6250
1078
+ },
1079
+ {
1080
+ "epoch": 0.4807322513652796,
1081
+ "eval_loss": 4.451441287994385,
1082
+ "eval_runtime": 19.2349,
1083
+ "eval_samples_per_second": 51.989,
1084
+ "eval_steps_per_second": 12.997,
1085
+ "step": 6250
1086
+ },
1087
+ {
1088
+ "epoch": 0.4845781093762018,
1089
+ "grad_norm": 3.7140421867370605,
1090
+ "learning_rate": 0.00010813058343443254,
1091
+ "loss": 4.4654,
1092
+ "step": 6300
1093
+ },
1094
+ {
1095
+ "epoch": 0.48842396738712407,
1096
+ "grad_norm": 3.095348834991455,
1097
+ "learning_rate": 0.00010787323995441342,
1098
+ "loss": 4.4761,
1099
+ "step": 6350
1100
+ },
1101
+ {
1102
+ "epoch": 0.4922698253980463,
1103
+ "grad_norm": 3.289018392562866,
1104
+ "learning_rate": 0.00010761589647439432,
1105
+ "loss": 4.5459,
1106
+ "step": 6400
1107
+ },
1108
+ {
1109
+ "epoch": 0.49611568340896856,
1110
+ "grad_norm": 3.9891817569732666,
1111
+ "learning_rate": 0.0001073585529943752,
1112
+ "loss": 4.3685,
1113
+ "step": 6450
1114
+ },
1115
+ {
1116
+ "epoch": 0.4999615414198908,
1117
+ "grad_norm": 4.315449237823486,
1118
+ "learning_rate": 0.00010710120951435608,
1119
+ "loss": 4.4197,
1120
+ "step": 6500
1121
+ },
1122
+ {
1123
+ "epoch": 0.4999615414198908,
1124
+ "eval_loss": 4.4507598876953125,
1125
+ "eval_runtime": 18.8652,
1126
+ "eval_samples_per_second": 53.008,
1127
+ "eval_steps_per_second": 13.252,
1128
+ "step": 6500
1129
  }
1130
  ],
1131
  "logging_steps": 50,