Plaaasma commited on
Commit
b56aac9
·
1 Parent(s): 6b1717c

Upload 8 files

Browse files
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +105 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c427e293dfb675cd0055b4b8837887686d3c202f2c4b1c3c08cd0bcd69883cab
3
  size 6192502837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c84df6dad1bbebe536721901b19df5684b5b61bd6504a7973c7784e2cd3d6906
3
  size 6192502837
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5075518047ef7374143dc014b6c925009e3aa96d714cacbb77e66c8aae26e3f
3
  size 3134031497
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6f1358759d56ec7dd838051a9897376d5783b670a8aaa910a055f074e80ab8
3
  size 3134031497
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbe49cba5b4bbea74ca7ad0a69f65d2951ebd36b2fdee0ef08b9ae2994877aae
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2ced2b38eecc35ed03eeaeb5c0a49167957bb55459f4813d83c507af091fc81
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba3a5d1c69a9427a53e2ab87233696d8956a8613a03782eb9c2342a7fb49829d
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa9c592a1519044a351c3d8ea017a6bc5f46ea0bc295892e6ac3489a0ae5a94b
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.170676932948348,
5
- "global_step": 6500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -84,11 +84,113 @@
84
  "learning_rate": 4.791399229781772e-05,
85
  "loss": 0.1452,
86
  "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  }
88
  ],
89
  "max_steps": 155800,
90
  "num_train_epochs": 100,
91
- "total_flos": 2.260838219092992e+17,
92
  "trial_name": null,
93
  "trial_params": null
94
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.624639076034649,
5
+ "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
84
  "learning_rate": 4.791399229781772e-05,
85
  "loss": 0.1452,
86
  "step": 6500
87
+ },
88
+ {
89
+ "epoch": 4.49,
90
+ "learning_rate": 4.775353016688062e-05,
91
+ "loss": 0.1324,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 4.81,
96
+ "learning_rate": 4.759306803594352e-05,
97
+ "loss": 0.1362,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 5.13,
102
+ "learning_rate": 4.743260590500642e-05,
103
+ "loss": 0.1275,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 5.45,
108
+ "learning_rate": 4.727214377406932e-05,
109
+ "loss": 0.114,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 5.77,
114
+ "learning_rate": 4.711168164313222e-05,
115
+ "loss": 0.1171,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 6.1,
120
+ "learning_rate": 4.695121951219512e-05,
121
+ "loss": 0.1104,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 6.42,
126
+ "learning_rate": 4.679075738125803e-05,
127
+ "loss": 0.0965,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 6.74,
132
+ "learning_rate": 4.663029525032093e-05,
133
+ "loss": 0.0998,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 7.06,
138
+ "learning_rate": 4.646983311938383e-05,
139
+ "loss": 0.0983,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 7.38,
144
+ "learning_rate": 4.630937098844673e-05,
145
+ "loss": 0.0817,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 7.7,
150
+ "learning_rate": 4.614890885750963e-05,
151
+ "loss": 0.0854,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 8.02,
156
+ "learning_rate": 4.598844672657253e-05,
157
+ "loss": 0.0867,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 8.34,
162
+ "learning_rate": 4.582798459563543e-05,
163
+ "loss": 0.0698,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 8.66,
168
+ "learning_rate": 4.566752246469833e-05,
169
+ "loss": 0.0722,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 8.98,
174
+ "learning_rate": 4.550706033376124e-05,
175
+ "loss": 0.0752,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 9.3,
180
+ "learning_rate": 4.534659820282414e-05,
181
+ "loss": 0.0593,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 9.62,
186
+ "learning_rate": 4.518613607188704e-05,
187
+ "loss": 0.0617,
188
+ "step": 15000
189
  }
190
  ],
191
  "max_steps": 155800,
192
  "num_train_epochs": 100,
193
+ "total_flos": 5.217329001215232e+17,
194
  "trial_name": null,
195
  "trial_params": null
196
  }