ToastyPigeon commited on
Commit
3c361a4
·
verified ·
1 Parent(s): 3218b2e

Training in progress, step 160, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:918768fd81ec1f6d0b6a14800750fff3bd0659bef1c4646e80aa92d97afde2e2
3
  size 1824599104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b940ff3c10efda3484d8154d06829b625f4661e41ccae9b4f19346cb07b313a
3
  size 1824599104
last-checkpoint/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51c893d47ba51df78c0e8e0a151e2c27326009267c048825c44f824560f32fbb
3
  size 3649546931
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daa1c395c04b88c3560659b77a7658fbb3720dd025b9258a7d325ce415d84fe3
3
  size 3649546931
last-checkpoint/pytorch_model_fsdp.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f364fd837ca5ff32dc0301544a5dbae56e1fc6f2c04c5d52ab320e7be8eb302
3
  size 1824732017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99127e94c2effa149a5652f46296157ec1072a023025a99704d613e9107afb0b
3
  size 1824732017
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fe494caee29827121ede25c140b98d732abcef295793011a9da3e63f9f787f1
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328fe9c5ce5de7cfc64522510573505020717e04aec0eb1a953b29b716e65835
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9f54950f1d5722d2dfd26fbe8ca22a21f30a3202910f114a3f83d950b2f3659
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3e386176596a4a6fe816025b7392c8664cea5fdfad2019e05665976ee0b3112
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b6455e29f08e0afb47c24dca5da42d6d008549a09137c0332f3f89df0df981b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d034473a6f1844782ea448b09a1d7345e766bf381ca00de81b2eda370185c5a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7115628970775095,
6
  "eval_steps": 20,
7
- "global_step": 140,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1052,6 +1052,154 @@
1052
  "eval_samples_per_second": 0.261,
1053
  "eval_steps_per_second": 0.138,
1054
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1055
  }
1056
  ],
1057
  "logging_steps": 1,
@@ -1071,7 +1219,7 @@
1071
  "attributes": {}
1072
  }
1073
  },
1074
- "total_flos": 1.3758826801004544e+18,
1075
  "train_batch_size": 1,
1076
  "trial_name": null,
1077
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8132147395171537,
6
  "eval_steps": 20,
7
+ "global_step": 160,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1052
  "eval_samples_per_second": 0.261,
1053
  "eval_steps_per_second": 0.138,
1054
  "step": 140
1055
+ },
1056
+ {
1057
+ "epoch": 0.7166454891994918,
1058
+ "grad_norm": 0.2736265957355499,
1059
+ "learning_rate": 8.017492711370262e-06,
1060
+ "loss": 2.4641,
1061
+ "step": 141
1062
+ },
1063
+ {
1064
+ "epoch": 0.7217280813214739,
1065
+ "grad_norm": 0.3174920678138733,
1066
+ "learning_rate": 7.976366322008863e-06,
1067
+ "loss": 2.4185,
1068
+ "step": 142
1069
+ },
1070
+ {
1071
+ "epoch": 0.7268106734434562,
1072
+ "grad_norm": 0.37216174602508545,
1073
+ "learning_rate": 7.934131736526946e-06,
1074
+ "loss": 2.3133,
1075
+ "step": 143
1076
+ },
1077
+ {
1078
+ "epoch": 0.7318932655654383,
1079
+ "grad_norm": 0.36393406987190247,
1080
+ "learning_rate": 7.890743550834599e-06,
1081
+ "loss": 2.5207,
1082
+ "step": 144
1083
+ },
1084
+ {
1085
+ "epoch": 0.7369758576874206,
1086
+ "grad_norm": 0.5867409706115723,
1087
+ "learning_rate": 7.846153846153847e-06,
1088
+ "loss": 2.328,
1089
+ "step": 145
1090
+ },
1091
+ {
1092
+ "epoch": 0.7420584498094028,
1093
+ "grad_norm": 0.4749656915664673,
1094
+ "learning_rate": 7.8003120124805e-06,
1095
+ "loss": 2.4006,
1096
+ "step": 146
1097
+ },
1098
+ {
1099
+ "epoch": 0.747141041931385,
1100
+ "grad_norm": 0.3379077613353729,
1101
+ "learning_rate": 7.753164556962026e-06,
1102
+ "loss": 2.3301,
1103
+ "step": 147
1104
+ },
1105
+ {
1106
+ "epoch": 0.7522236340533672,
1107
+ "grad_norm": 0.30944380164146423,
1108
+ "learning_rate": 7.704654895666132e-06,
1109
+ "loss": 2.3985,
1110
+ "step": 148
1111
+ },
1112
+ {
1113
+ "epoch": 0.7573062261753494,
1114
+ "grad_norm": 0.3790503442287445,
1115
+ "learning_rate": 7.65472312703583e-06,
1116
+ "loss": 2.3055,
1117
+ "step": 149
1118
+ },
1119
+ {
1120
+ "epoch": 0.7623888182973316,
1121
+ "grad_norm": 0.34668728709220886,
1122
+ "learning_rate": 7.603305785123969e-06,
1123
+ "loss": 2.3896,
1124
+ "step": 150
1125
+ },
1126
+ {
1127
+ "epoch": 0.7674714104193139,
1128
+ "grad_norm": 0.3420522212982178,
1129
+ "learning_rate": 7.550335570469799e-06,
1130
+ "loss": 2.4232,
1131
+ "step": 151
1132
+ },
1133
+ {
1134
+ "epoch": 0.772554002541296,
1135
+ "grad_norm": 0.3033508062362671,
1136
+ "learning_rate": 7.4957410562180576e-06,
1137
+ "loss": 2.2478,
1138
+ "step": 152
1139
+ },
1140
+ {
1141
+ "epoch": 0.7776365946632783,
1142
+ "grad_norm": 0.2843773663043976,
1143
+ "learning_rate": 7.439446366782007e-06,
1144
+ "loss": 2.3415,
1145
+ "step": 153
1146
+ },
1147
+ {
1148
+ "epoch": 0.7827191867852605,
1149
+ "grad_norm": 0.3108879327774048,
1150
+ "learning_rate": 7.3813708260105456e-06,
1151
+ "loss": 2.3628,
1152
+ "step": 154
1153
+ },
1154
+ {
1155
+ "epoch": 0.7878017789072427,
1156
+ "grad_norm": 0.3458247184753418,
1157
+ "learning_rate": 7.321428571428573e-06,
1158
+ "loss": 2.3096,
1159
+ "step": 155
1160
+ },
1161
+ {
1162
+ "epoch": 0.7928843710292249,
1163
+ "grad_norm": 0.32452043890953064,
1164
+ "learning_rate": 7.259528130671507e-06,
1165
+ "loss": 2.2148,
1166
+ "step": 156
1167
+ },
1168
+ {
1169
+ "epoch": 0.7979669631512071,
1170
+ "grad_norm": 0.3931266665458679,
1171
+ "learning_rate": 7.195571955719557e-06,
1172
+ "loss": 2.3311,
1173
+ "step": 157
1174
+ },
1175
+ {
1176
+ "epoch": 0.8030495552731893,
1177
+ "grad_norm": 0.341151624917984,
1178
+ "learning_rate": 7.129455909943714e-06,
1179
+ "loss": 2.4496,
1180
+ "step": 158
1181
+ },
1182
+ {
1183
+ "epoch": 0.8081321473951716,
1184
+ "grad_norm": 0.35213160514831543,
1185
+ "learning_rate": 7.061068702290077e-06,
1186
+ "loss": 2.3066,
1187
+ "step": 159
1188
+ },
1189
+ {
1190
+ "epoch": 0.8132147395171537,
1191
+ "grad_norm": 0.35280901193618774,
1192
+ "learning_rate": 6.990291262135923e-06,
1193
+ "loss": 2.365,
1194
+ "step": 160
1195
+ },
1196
+ {
1197
+ "epoch": 0.8132147395171537,
1198
+ "eval_loss": 2.241875410079956,
1199
+ "eval_runtime": 64.1026,
1200
+ "eval_samples_per_second": 0.265,
1201
+ "eval_steps_per_second": 0.14,
1202
+ "step": 160
1203
  }
1204
  ],
1205
  "logging_steps": 1,
 
1219
  "attributes": {}
1220
  }
1221
  },
1222
+ "total_flos": 1.5724373486862336e+18,
1223
  "train_batch_size": 1,
1224
  "trial_name": null,
1225
  "trial_params": null