YazoPi commited on
Commit
9032eb0
·
verified ·
1 Parent(s): b80299b

Training in progress, step 1794, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a01d5cb64f0c537a59a0cdfcf1bc3a560ff1761f3abd4cf499bed94982d2222b
3
  size 389074464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:442d270b50645e8920121f8bcb1e3642bfd469619b17f2c672139a95fd23af56
3
  size 389074464
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e7686aa5ab2d44d69c5f4408fe4db2b21f4858bcadc7a6fc0ce0e75707e1a34
3
  size 198016005
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:186f1f434e7d3e152023d123ef21c94c4a0d6ce9718d70926a1817fd851c08fe
3
  size 198016005
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4a99307857adad062a03c20182e56acb1143f23b345fb9eaeaec2099e02e31a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4453f8a4437d3cd9972effeb19da458043df1075458e51d17d709e5b85678e59
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.3411371237458196,
6
  "eval_steps": 500,
7
- "global_step": 1400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -988,6 +988,279 @@
988
  "learning_rate": 2.3107863556921237e-05,
989
  "loss": 3.587228012084961,
990
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991
  }
992
  ],
993
  "logging_steps": 10,
@@ -1002,12 +1275,12 @@
1002
  "should_evaluate": false,
1003
  "should_log": false,
1004
  "should_save": true,
1005
- "should_training_stop": false
1006
  },
1007
  "attributes": {}
1008
  }
1009
  },
1010
- "total_flos": 1.6052200791938826e+18,
1011
  "train_batch_size": 42,
1012
  "trial_name": null,
1013
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 1794,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
988
  "learning_rate": 2.3107863556921237e-05,
989
  "loss": 3.587228012084961,
990
  "step": 1400
991
+ },
992
+ {
993
+ "epoch": 2.3578595317725752,
994
+ "grad_norm": 0.21534579992294312,
995
+ "learning_rate": 2.199705098358459e-05,
996
+ "loss": 3.6128841400146485,
997
+ "step": 1410
998
+ },
999
+ {
1000
+ "epoch": 2.374581939799331,
1001
+ "grad_norm": 0.19469194114208221,
1002
+ "learning_rate": 2.091029194607431e-05,
1003
+ "loss": 3.612522506713867,
1004
+ "step": 1420
1005
+ },
1006
+ {
1007
+ "epoch": 2.391304347826087,
1008
+ "grad_norm": 0.19594796001911163,
1009
+ "learning_rate": 1.984792156502072e-05,
1010
+ "loss": 3.612636184692383,
1011
+ "step": 1430
1012
+ },
1013
+ {
1014
+ "epoch": 2.408026755852843,
1015
+ "grad_norm": 0.19416235387325287,
1016
+ "learning_rate": 1.8810267440397246e-05,
1017
+ "loss": 3.611737823486328,
1018
+ "step": 1440
1019
+ },
1020
+ {
1021
+ "epoch": 2.4247491638795986,
1022
+ "grad_norm": 0.20568251609802246,
1023
+ "learning_rate": 1.779764955049925e-05,
1024
+ "loss": 3.6048515319824217,
1025
+ "step": 1450
1026
+ },
1027
+ {
1028
+ "epoch": 2.4414715719063547,
1029
+ "grad_norm": 0.21181504428386688,
1030
+ "learning_rate": 1.6810380153273362e-05,
1031
+ "loss": 3.60155029296875,
1032
+ "step": 1460
1033
+ },
1034
+ {
1035
+ "epoch": 2.4581939799331103,
1036
+ "grad_norm": 0.20696775615215302,
1037
+ "learning_rate": 1.584876369002751e-05,
1038
+ "loss": 3.623727035522461,
1039
+ "step": 1470
1040
+ },
1041
+ {
1042
+ "epoch": 2.4749163879598663,
1043
+ "grad_norm": 0.20903262495994568,
1044
+ "learning_rate": 1.4913096691551077e-05,
1045
+ "loss": 3.623518371582031,
1046
+ "step": 1480
1047
+ },
1048
+ {
1049
+ "epoch": 2.491638795986622,
1050
+ "grad_norm": 0.22404134273529053,
1051
+ "learning_rate": 1.4003667686674793e-05,
1052
+ "loss": 3.6152099609375,
1053
+ "step": 1490
1054
+ },
1055
+ {
1056
+ "epoch": 2.508361204013378,
1057
+ "grad_norm": 0.2037034034729004,
1058
+ "learning_rate": 1.3120757113297777e-05,
1059
+ "loss": 3.612331771850586,
1060
+ "step": 1500
1061
+ },
1062
+ {
1063
+ "epoch": 2.5250836120401337,
1064
+ "grad_norm": 0.21633440256118774,
1065
+ "learning_rate": 1.226463723190987e-05,
1066
+ "loss": 3.601060485839844,
1067
+ "step": 1510
1068
+ },
1069
+ {
1070
+ "epoch": 2.5418060200668897,
1071
+ "grad_norm": 0.20832663774490356,
1072
+ "learning_rate": 1.1435572041635489e-05,
1073
+ "loss": 3.6438526153564452,
1074
+ "step": 1520
1075
+ },
1076
+ {
1077
+ "epoch": 2.5585284280936453,
1078
+ "grad_norm": 0.21630828082561493,
1079
+ "learning_rate": 1.0633817198824858e-05,
1080
+ "loss": 3.6141563415527345,
1081
+ "step": 1530
1082
+ },
1083
+ {
1084
+ "epoch": 2.5752508361204014,
1085
+ "grad_norm": 0.21327731013298035,
1086
+ "learning_rate": 9.859619938218222e-06,
1087
+ "loss": 3.5744644165039063,
1088
+ "step": 1540
1089
+ },
1090
+ {
1091
+ "epoch": 2.591973244147157,
1092
+ "grad_norm": 0.20320357382297516,
1093
+ "learning_rate": 9.113218996706651e-06,
1094
+ "loss": 3.6112804412841797,
1095
+ "step": 1550
1096
+ },
1097
+ {
1098
+ "epoch": 2.608695652173913,
1099
+ "grad_norm": 0.20723755657672882,
1100
+ "learning_rate": 8.394844539713587e-06,
1101
+ "loss": 3.6093166351318358,
1102
+ "step": 1560
1103
+ },
1104
+ {
1105
+ "epoch": 2.625418060200669,
1106
+ "grad_norm": 0.20720350742340088,
1107
+ "learning_rate": 7.704718090219299e-06,
1108
+ "loss": 3.6185359954833984,
1109
+ "step": 1570
1110
+ },
1111
+ {
1112
+ "epoch": 2.6421404682274248,
1113
+ "grad_norm": 0.20689421892166138,
1114
+ "learning_rate": 7.043052460450595e-06,
1115
+ "loss": 3.5807472229003907,
1116
+ "step": 1580
1117
+ },
1118
+ {
1119
+ "epoch": 2.6588628762541804,
1120
+ "grad_norm": 0.1947193443775177,
1121
+ "learning_rate": 6.410051686256524e-06,
1122
+ "loss": 3.63294677734375,
1123
+ "step": 1590
1124
+ },
1125
+ {
1126
+ "epoch": 2.6755852842809364,
1127
+ "grad_norm": 0.20829661190509796,
1128
+ "learning_rate": 5.805910964190464e-06,
1129
+ "loss": 3.5655914306640626,
1130
+ "step": 1600
1131
+ },
1132
+ {
1133
+ "epoch": 2.6923076923076925,
1134
+ "grad_norm": 0.21496719121932983,
1135
+ "learning_rate": 5.2308165913179e-06,
1136
+ "loss": 3.6056419372558595,
1137
+ "step": 1610
1138
+ },
1139
+ {
1140
+ "epoch": 2.709030100334448,
1141
+ "grad_norm": 0.21569029986858368,
1142
+ "learning_rate": 4.684945907768623e-06,
1143
+ "loss": 3.63220329284668,
1144
+ "step": 1620
1145
+ },
1146
+ {
1147
+ "epoch": 2.7257525083612038,
1148
+ "grad_norm": 0.21249784529209137,
1149
+ "learning_rate": 4.168467242050822e-06,
1150
+ "loss": 3.6161312103271483,
1151
+ "step": 1630
1152
+ },
1153
+ {
1154
+ "epoch": 2.74247491638796,
1155
+ "grad_norm": 0.19498836994171143,
1156
+ "learning_rate": 3.6815398591441676e-06,
1157
+ "loss": 3.6304805755615233,
1158
+ "step": 1640
1159
+ },
1160
+ {
1161
+ "epoch": 2.759197324414716,
1162
+ "grad_norm": 0.19185300171375275,
1163
+ "learning_rate": 3.224313911387755e-06,
1164
+ "loss": 3.610300064086914,
1165
+ "step": 1650
1166
+ },
1167
+ {
1168
+ "epoch": 2.7759197324414715,
1169
+ "grad_norm": 0.2097301483154297,
1170
+ "learning_rate": 2.79693039217801e-06,
1171
+ "loss": 3.6425819396972656,
1172
+ "step": 1660
1173
+ },
1174
+ {
1175
+ "epoch": 2.7926421404682276,
1176
+ "grad_norm": 0.2074955701828003,
1177
+ "learning_rate": 2.399521092491075e-06,
1178
+ "loss": 3.5936614990234377,
1179
+ "step": 1670
1180
+ },
1181
+ {
1182
+ "epoch": 2.809364548494983,
1183
+ "grad_norm": 0.20003236830234528,
1184
+ "learning_rate": 2.032208560242732e-06,
1185
+ "loss": 3.5973114013671874,
1186
+ "step": 1680
1187
+ },
1188
+ {
1189
+ "epoch": 2.8260869565217392,
1190
+ "grad_norm": 0.20352588593959808,
1191
+ "learning_rate": 1.695106062498708e-06,
1192
+ "loss": 3.6302867889404298,
1193
+ "step": 1690
1194
+ },
1195
+ {
1196
+ "epoch": 2.842809364548495,
1197
+ "grad_norm": 0.20454245805740356,
1198
+ "learning_rate": 1.3883175505468693e-06,
1199
+ "loss": 3.614506149291992,
1200
+ "step": 1700
1201
+ },
1202
+ {
1203
+ "epoch": 2.859531772575251,
1204
+ "grad_norm": 0.20201674103736877,
1205
+ "learning_rate": 1.11193762784203e-06,
1206
+ "loss": 3.5982948303222657,
1207
+ "step": 1710
1208
+ },
1209
+ {
1210
+ "epoch": 2.8762541806020065,
1211
+ "grad_norm": 0.20037053525447845,
1212
+ "learning_rate": 8.660515208334108e-07,
1213
+ "loss": 3.6015445709228517,
1214
+ "step": 1720
1215
+ },
1216
+ {
1217
+ "epoch": 2.8929765886287626,
1218
+ "grad_norm": 0.21147583425045013,
1219
+ "learning_rate": 6.507350526835709e-07,
1220
+ "loss": 3.5722988128662108,
1221
+ "step": 1730
1222
+ },
1223
+ {
1224
+ "epoch": 2.9096989966555182,
1225
+ "grad_norm": 0.20878112316131592,
1226
+ "learning_rate": 4.6605461988707965e-07,
1227
+ "loss": 3.6185012817382813,
1228
+ "step": 1740
1229
+ },
1230
+ {
1231
+ "epoch": 2.9264214046822743,
1232
+ "grad_norm": 0.19872544705867767,
1233
+ "learning_rate": 3.1206717179601554e-07,
1234
+ "loss": 3.6195068359375,
1235
+ "step": 1750
1236
+ },
1237
+ {
1238
+ "epoch": 2.94314381270903,
1239
+ "grad_norm": 0.21033529937267303,
1240
+ "learning_rate": 1.8882019305866972e-07,
1241
+ "loss": 3.6103542327880858,
1242
+ "step": 1760
1243
+ },
1244
+ {
1245
+ "epoch": 2.959866220735786,
1246
+ "grad_norm": 0.20153765380382538,
1247
+ "learning_rate": 9.635168897684787e-08,
1248
+ "loss": 3.585107421875,
1249
+ "step": 1770
1250
+ },
1251
+ {
1252
+ "epoch": 2.976588628762542,
1253
+ "grad_norm": 0.21875017881393433,
1254
+ "learning_rate": 3.4690173786255945e-08,
1255
+ "loss": 3.6015293121337892,
1256
+ "step": 1780
1257
+ },
1258
+ {
1259
+ "epoch": 2.9933110367892977,
1260
+ "grad_norm": 0.2030608206987381,
1261
+ "learning_rate": 3.8546618637225196e-09,
1262
+ "loss": 3.6415565490722654,
1263
+ "step": 1790
1264
  }
1265
  ],
1266
  "logging_steps": 10,
 
1275
  "should_evaluate": false,
1276
  "should_log": false,
1277
  "should_save": true,
1278
+ "should_training_stop": true
1279
  },
1280
  "attributes": {}
1281
  }
1282
  },
1283
+ "total_flos": 2.058402188770345e+18,
1284
  "train_batch_size": 42,
1285
  "trial_name": null,
1286
  "trial_params": null