Abdullah104 commited on
Commit
0a42f8f
·
verified ·
1 Parent(s): b4d3bd4

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dac5437f6ae84224977f75f9084512e2f5a204352865d41726f02a807c53cd1
3
  size 1315238776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ae4eb11ccd76a846a2f705bf4d4ff76860942937dceb88845f097d06d43b4d
3
  size 1315238776
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f2d7876b2f76fb6a691661f937b240671a73c43c0ffaac97f498978f741b277
3
  size 2630727050
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5a174fb5366b53f40cfe85c697b0f93cae31a6d5b91f246c623d1cd21969a79
3
  size 2630727050
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:834cd886e0eeedcea906fed8c169bdb850f647da76f2202362f0874098e69d0e
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8a536803ce62f22b8221a8dd984affe7a0f8f81305e773e07a8fafbee4b6869
3
+ size 14372
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7aade0a5cf98168c48557061a263a0ce94127833f42effbb1795ad03b5bd88a5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa5974852ecb19394b20e36e43d8a03c1cb27cf510d3afdab7c91e54ecd54439
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.723589001447178,
6
  "eval_steps": 200,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -94,6 +94,100 @@
94
  "learning_rate": 0.00029939999999999996,
95
  "loss": 624.1802,
96
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
  ],
99
  "logging_steps": 50,
@@ -113,7 +207,7 @@
113
  "attributes": {}
114
  }
115
  },
116
- "total_flos": 3.82481936005718e+18,
117
  "train_batch_size": 16,
118
  "trial_name": null,
119
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.447178002894356,
6
  "eval_steps": 200,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
94
  "learning_rate": 0.00029939999999999996,
95
  "loss": 624.1802,
96
  "step": 500
97
+ },
98
+ {
99
+ "epoch": 0.7959479015918958,
100
+ "grad_norm": 5244923.0,
101
+ "learning_rate": 0.0002977067082683307,
102
+ "loss": 617.4588,
103
+ "step": 550
104
+ },
105
+ {
106
+ "epoch": 0.8683068017366136,
107
+ "grad_norm": 5422852.5,
108
+ "learning_rate": 0.00029536661466458657,
109
+ "loss": 610.4225,
110
+ "step": 600
111
+ },
112
+ {
113
+ "epoch": 0.8683068017366136,
114
+ "eval_loss": 599.3718872070312,
115
+ "eval_runtime": 240.859,
116
+ "eval_samples_per_second": 10.197,
117
+ "eval_steps_per_second": 0.639,
118
+ "step": 600
119
+ },
120
+ {
121
+ "epoch": 0.9406657018813314,
122
+ "grad_norm": 15222385.0,
123
+ "learning_rate": 0.0002930265210608424,
124
+ "loss": 605.7685,
125
+ "step": 650
126
+ },
127
+ {
128
+ "epoch": 1.0130246020260492,
129
+ "grad_norm": 10619589.0,
130
+ "learning_rate": 0.00029068642745709827,
131
+ "loss": 593.7761,
132
+ "step": 700
133
+ },
134
+ {
135
+ "epoch": 1.085383502170767,
136
+ "grad_norm": 5425615.5,
137
+ "learning_rate": 0.0002883463338533541,
138
+ "loss": 581.7033,
139
+ "step": 750
140
+ },
141
+ {
142
+ "epoch": 1.1577424023154848,
143
+ "grad_norm": 7700796.0,
144
+ "learning_rate": 0.00028600624024961,
145
+ "loss": 575.6007,
146
+ "step": 800
147
+ },
148
+ {
149
+ "epoch": 1.1577424023154848,
150
+ "eval_loss": 552.0042114257812,
151
+ "eval_runtime": 239.4632,
152
+ "eval_samples_per_second": 10.256,
153
+ "eval_steps_per_second": 0.643,
154
+ "step": 800
155
+ },
156
+ {
157
+ "epoch": 1.2301013024602026,
158
+ "grad_norm": 5580448.0,
159
+ "learning_rate": 0.0002836661466458658,
160
+ "loss": 557.8052,
161
+ "step": 850
162
+ },
163
+ {
164
+ "epoch": 1.3024602026049203,
165
+ "grad_norm": 5330764.0,
166
+ "learning_rate": 0.0002813260530421217,
167
+ "loss": 522.3477,
168
+ "step": 900
169
+ },
170
+ {
171
+ "epoch": 1.3748191027496381,
172
+ "grad_norm": 7907119.5,
173
+ "learning_rate": 0.00027898595943837753,
174
+ "loss": 499.9348,
175
+ "step": 950
176
+ },
177
+ {
178
+ "epoch": 1.447178002894356,
179
+ "grad_norm": 8895919.0,
180
+ "learning_rate": 0.0002766458658346334,
181
+ "loss": 470.3078,
182
+ "step": 1000
183
+ },
184
+ {
185
+ "epoch": 1.447178002894356,
186
+ "eval_loss": 434.424560546875,
187
+ "eval_runtime": 239.2094,
188
+ "eval_samples_per_second": 10.267,
189
+ "eval_steps_per_second": 0.644,
190
+ "step": 1000
191
  }
192
  ],
193
  "logging_steps": 50,
 
207
  "attributes": {}
208
  }
209
  },
210
+ "total_flos": 7.654320361699615e+18,
211
  "train_batch_size": 16,
212
  "trial_name": null,
213
  "trial_params": null