houck2040 commited on
Commit
38a6a61
·
1 Parent(s): b1730f3

Upload 8 files

Browse files
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +83 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a226fcc947eec5a7ecf7755fe07a0e2398ca9bfd23c98493eb22879f4c702c1d
3
  size 655342981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae132a4a670d70c2365439b1b32f73578bbe513edd2e0299f01ea1302af81f0c
3
  size 655342981
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3f5a6d7915f9dee44fe610451a7f8d7525d5e60d700f17af21240683111649c
3
  size 333968953
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95508259f48f41c3e02cf1556a1f27bbe48a513f6d862c8a7b3910ae72acee38
3
  size 333968953
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e758cb98894568f912ef81c5d40385e49e7d85140a2098a83844399fed337b4
3
  size 13553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708c0633fc2806448a36d54172c18b6fbe465f892fb67e669ae170a4b5034cdf
3
  size 13553
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82c10e76378cf039e5e8914bbf970cc36e6d097abfa79ea4180870a8d3b18ccc
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf45a7cece26021363c348414b2495d0d387d9868ac402ae5067b6d6efc43b5
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6775005242189138,
5
- "global_step": 8000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -110,11 +110,91 @@
110
  "learning_rate": 8.816663171873909e-06,
111
  "loss": 1.2822,
112
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
  ],
115
  "max_steps": 14307,
116
  "num_train_epochs": 3,
117
- "total_flos": 2090210694266880.0,
118
  "trial_name": null,
119
  "trial_params": null
120
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.935625917383099,
5
+ "global_step": 14000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
110
  "learning_rate": 8.816663171873909e-06,
111
  "loss": 1.2822,
112
  "step": 8000
113
+ },
114
+ {
115
+ "epoch": 1.78,
116
+ "learning_rate": 8.117704620116029e-06,
117
+ "loss": 1.2721,
118
+ "step": 8500
119
+ },
120
+ {
121
+ "epoch": 1.89,
122
+ "learning_rate": 7.418746068358147e-06,
123
+ "loss": 1.2596,
124
+ "step": 9000
125
+ },
126
+ {
127
+ "epoch": 1.99,
128
+ "learning_rate": 6.719787516600266e-06,
129
+ "loss": 1.2635,
130
+ "step": 9500
131
+ },
132
+ {
133
+ "epoch": 2.0,
134
+ "eval_loss": 1.3475877046585083,
135
+ "eval_runtime": 2879.1327,
136
+ "eval_samples_per_second": 12.22,
137
+ "eval_steps_per_second": 1.528,
138
+ "step": 9538
139
+ },
140
+ {
141
+ "epoch": 2.1,
142
+ "learning_rate": 6.020828964842386e-06,
143
+ "loss": 1.2592,
144
+ "step": 10000
145
+ },
146
+ {
147
+ "epoch": 2.2,
148
+ "learning_rate": 5.321870413084504e-06,
149
+ "loss": 1.2459,
150
+ "step": 10500
151
+ },
152
+ {
153
+ "epoch": 2.31,
154
+ "learning_rate": 4.6229118613266235e-06,
155
+ "loss": 1.2477,
156
+ "step": 11000
157
+ },
158
+ {
159
+ "epoch": 2.41,
160
+ "learning_rate": 3.923953309568743e-06,
161
+ "loss": 1.249,
162
+ "step": 11500
163
+ },
164
+ {
165
+ "epoch": 2.52,
166
+ "learning_rate": 3.224994757810862e-06,
167
+ "loss": 1.2376,
168
+ "step": 12000
169
+ },
170
+ {
171
+ "epoch": 2.62,
172
+ "learning_rate": 2.5260362060529814e-06,
173
+ "loss": 1.238,
174
+ "step": 12500
175
+ },
176
+ {
177
+ "epoch": 2.73,
178
+ "learning_rate": 1.8270776542951004e-06,
179
+ "loss": 1.2409,
180
+ "step": 13000
181
+ },
182
+ {
183
+ "epoch": 2.83,
184
+ "learning_rate": 1.1281191025372195e-06,
185
+ "loss": 1.2365,
186
+ "step": 13500
187
+ },
188
+ {
189
+ "epoch": 2.94,
190
+ "learning_rate": 4.291605507793388e-07,
191
+ "loss": 1.237,
192
+ "step": 14000
193
  }
194
  ],
195
  "max_steps": 14307,
196
  "num_train_epochs": 3,
197
+ "total_flos": 3657827887349760.0,
198
  "trial_name": null,
199
  "trial_params": null
200
  }