rootxhacker commited on
Commit
f012efc
·
verified ·
1 Parent(s): c0132ba

Training in progress, step 6500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d3519cd9f54111b61d5c7bda32b6503a50d9a901f0867b18c77e1c8775d34cc
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4fa545226f8a1bc971fe1a1680b858ab3cba84884085de4d7bb95fce3027ee
3
  size 36730224
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc8be5680951e9b7e427f6500eddc8c10ce019d345c9c9f83581fc8bb331d518
3
  size 1736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b20c2693c849c11648664638444158f688e791753d1fd5b0a3839f3bf439279b
3
  size 1736
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fead462933224306a9b42a0eb2433085db43c13976438f5d79faccec086ccd4f
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b38307c83b8af70cb28809299297b128e7799c2bcec4e16e0bf925a16820657
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f460ca4efaa2c47b577474097b3561bee10506073f07d444922fddb74fb1bc7b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3dc41260bd3c4c88915ea4a89c036f2cf6425407a322ee64551935a201e505c
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:faa0523a11b32ac61978a5b91bfe471f27b6d880dfaedd29cb6e6516dad00d0b
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76773cd6ef78ee26a2f9b89a5cdc92391f18eaca0e15a3c1b947462d6c8a47c1
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ee9289020d6b3ecd17f692689ed65a2fe5f50143b40afcd424248034cb8eabc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63a3451545c9d3c80af7346ccecc86c947b582a85ff2768518fe0c4acdf7dcd7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 6000,
3
  "best_metric": 1.4524279832839966,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-6000",
5
- "epoch": 0.4615029613106684,
6
  "eval_steps": 250,
7
- "global_step": 6000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1040,6 +1040,92 @@
1040
  "eval_samples_per_second": 59.22,
1041
  "eval_steps_per_second": 14.805,
1042
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
  }
1044
  ],
1045
  "logging_steps": 50,
 
2
  "best_global_step": 6000,
3
  "best_metric": 1.4524279832839966,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-6000",
5
+ "epoch": 0.4999615414198908,
6
  "eval_steps": 250,
7
+ "global_step": 6500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1040
  "eval_samples_per_second": 59.22,
1041
  "eval_steps_per_second": 14.805,
1042
  "step": 6000
1043
+ },
1044
+ {
1045
+ "epoch": 0.46534881932159067,
1046
+ "grad_norm": 1.1764196157455444,
1047
+ "learning_rate": 0.00017119704958055217,
1048
+ "loss": 1.4821,
1049
+ "step": 6050
1050
+ },
1051
+ {
1052
+ "epoch": 0.4691946773325129,
1053
+ "grad_norm": 0.9580527544021606,
1054
+ "learning_rate": 0.00017093732955873568,
1055
+ "loss": 1.4367,
1056
+ "step": 6100
1057
+ },
1058
+ {
1059
+ "epoch": 0.4730405353434351,
1060
+ "grad_norm": 0.8218849301338196,
1061
+ "learning_rate": 0.0001706776095369192,
1062
+ "loss": 1.4738,
1063
+ "step": 6150
1064
+ },
1065
+ {
1066
+ "epoch": 0.47688639335435734,
1067
+ "grad_norm": 1.0671401023864746,
1068
+ "learning_rate": 0.00017041788951510273,
1069
+ "loss": 1.4951,
1070
+ "step": 6200
1071
+ },
1072
+ {
1073
+ "epoch": 0.4807322513652796,
1074
+ "grad_norm": 1.223479986190796,
1075
+ "learning_rate": 0.00017015816949328625,
1076
+ "loss": 1.4481,
1077
+ "step": 6250
1078
+ },
1079
+ {
1080
+ "epoch": 0.4807322513652796,
1081
+ "eval_loss": 1.4635798931121826,
1082
+ "eval_runtime": 17.0372,
1083
+ "eval_samples_per_second": 58.695,
1084
+ "eval_steps_per_second": 14.674,
1085
+ "step": 6250
1086
+ },
1087
+ {
1088
+ "epoch": 0.4845781093762018,
1089
+ "grad_norm": 1.6794487237930298,
1090
+ "learning_rate": 0.00016989844947146976,
1091
+ "loss": 1.4217,
1092
+ "step": 6300
1093
+ },
1094
+ {
1095
+ "epoch": 0.48842396738712407,
1096
+ "grad_norm": 0.9655230045318604,
1097
+ "learning_rate": 0.0001696387294496533,
1098
+ "loss": 1.4969,
1099
+ "step": 6350
1100
+ },
1101
+ {
1102
+ "epoch": 0.4922698253980463,
1103
+ "grad_norm": 1.2234684228897095,
1104
+ "learning_rate": 0.00016937900942783679,
1105
+ "loss": 1.5139,
1106
+ "step": 6400
1107
+ },
1108
+ {
1109
+ "epoch": 0.49611568340896856,
1110
+ "grad_norm": 2.1821775436401367,
1111
+ "learning_rate": 0.0001691192894060203,
1112
+ "loss": 1.4661,
1113
+ "step": 6450
1114
+ },
1115
+ {
1116
+ "epoch": 0.4999615414198908,
1117
+ "grad_norm": 1.1690566539764404,
1118
+ "learning_rate": 0.00016885956938420384,
1119
+ "loss": 1.4417,
1120
+ "step": 6500
1121
+ },
1122
+ {
1123
+ "epoch": 0.4999615414198908,
1124
+ "eval_loss": 1.452785611152649,
1125
+ "eval_runtime": 16.7494,
1126
+ "eval_samples_per_second": 59.703,
1127
+ "eval_steps_per_second": 14.926,
1128
+ "step": 6500
1129
  }
1130
  ],
1131
  "logging_steps": 50,