SystemAdmin123 commited on
Commit
d08164f
·
verified ·
1 Parent(s): 0ece737

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85b17fb7f911aac944c07f7d1e60d4aff773114a73adcd1c432996250386e07e
3
  size 4972163696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3d812570002e67e4c524bc9167715869351666b679db2c2113d31ed4a9130f
3
  size 4972163696
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52ffa09e7423c38be4314cbbc1311ec46c75e31ecc5a2d4f0df329edce91a7de
3
  size 2669366920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71f5715344e5f551de03351627f6738c6b66ba1ac393a2f90e551e5e4e2de1cf
3
  size 2669366920
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3370ea74be7a6019e3b178abb23d9154cad8f6ede71322ae3b44f18537ff33c8
3
  size 7762295162
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6d8e5d7ab5b6813cfdaaee212fdefc8fafe46fd7ea65d8db8ccc0bcbea79b75
3
  size 7762295162
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02b8d06323d7782eba280757f4398dbd280b072f40187b91d87fe4f49d7e6828
3
- size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f60241cb6cb86cf9966e8cfe2248be00bce643b5808e2c3b78c9cb618eea253
3
+ size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fe37d9f1cbca235c339a9316b5b88784b3b45f0c10ef4f8070517c1de6878f5
3
- size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e76feebe960d60536ad1ed0bcaee2e12a3f8432f33b7ee3b0cae559b12130c0
3
+ size 15024
last-checkpoint/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5988c600823ef881ed3900c9909420e69870efab70abf3dca0673a3c88b057
3
+ size 15024
last-checkpoint/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7090a3c6759641db81e3ee589636615551bb1b7ce0948f2fd4ab7d7beb35de9c
3
+ size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f07ce69a2abeb161a8250a6ab5003f92f86de92b90dabd232da31b8c613fe6b7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d7496cd5cad5fcd343f51f06f864ca525a833da3ba71e9c4d90915510423ac
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,199 +1,49 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8867924528301887,
5
- "eval_steps": 50,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.009433962264150943,
13
- "eval_loss": 1.9632965326309204,
14
- "eval_runtime": 38.8614,
15
- "eval_samples_per_second": 38.624,
16
- "eval_steps_per_second": 4.838,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 0.09433962264150944,
21
- "grad_norm": 1.8203125,
22
- "learning_rate": 3.0769230769230774e-05,
23
- "loss": 1.7057,
24
  "step": 10
25
  },
26
  {
27
- "epoch": 0.18867924528301888,
28
- "grad_norm": 1.3515625,
29
- "learning_rate": 6.153846153846155e-05,
30
- "loss": 1.6334,
31
  "step": 20
32
  },
33
- {
34
- "epoch": 0.2830188679245283,
35
- "grad_norm": 1.84375,
36
- "learning_rate": 9.230769230769232e-05,
37
- "loss": 1.5965,
38
- "step": 30
39
- },
40
  {
41
  "epoch": 0.37735849056603776,
42
- "grad_norm": 1.34375,
43
- "learning_rate": 0.0001230769230769231,
44
- "loss": 1.6454,
45
- "step": 40
46
- },
47
- {
48
- "epoch": 0.4716981132075472,
49
- "grad_norm": 1.375,
50
- "learning_rate": 0.00015384615384615385,
51
- "loss": 1.8336,
52
- "step": 50
53
- },
54
- {
55
- "epoch": 0.4716981132075472,
56
- "eval_loss": 1.5319143533706665,
57
- "eval_runtime": 37.268,
58
- "eval_samples_per_second": 40.276,
59
- "eval_steps_per_second": 5.045,
60
- "step": 50
61
- },
62
- {
63
- "epoch": 0.5660377358490566,
64
- "grad_norm": 1.390625,
65
- "learning_rate": 0.00018461538461538463,
66
- "loss": 1.6412,
67
- "step": 60
68
- },
69
- {
70
- "epoch": 0.660377358490566,
71
- "grad_norm": 1.1796875,
72
- "learning_rate": 0.0001999919114627769,
73
- "loss": 1.6797,
74
- "step": 70
75
- },
76
- {
77
- "epoch": 0.7547169811320755,
78
- "grad_norm": 0.94140625,
79
- "learning_rate": 0.00019992721101571236,
80
- "loss": 1.7777,
81
- "step": 80
82
- },
83
- {
84
- "epoch": 0.8490566037735849,
85
- "grad_norm": 1.1640625,
86
- "learning_rate": 0.00019979785198644806,
87
- "loss": 1.7622,
88
- "step": 90
89
- },
90
- {
91
- "epoch": 0.9433962264150944,
92
- "grad_norm": 1.5078125,
93
- "learning_rate": 0.00019960391807762463,
94
- "loss": 1.7494,
95
- "step": 100
96
- },
97
- {
98
- "epoch": 0.9433962264150944,
99
- "eval_loss": 1.6423826217651367,
100
- "eval_runtime": 38.0472,
101
- "eval_samples_per_second": 39.451,
102
- "eval_steps_per_second": 4.941,
103
- "step": 100
104
- },
105
- {
106
- "epoch": 1.0377358490566038,
107
- "grad_norm": 1.6875,
108
- "learning_rate": 0.00019934553477549794,
109
- "loss": 1.5189,
110
- "step": 110
111
- },
112
- {
113
- "epoch": 1.1320754716981132,
114
- "grad_norm": 1.21875,
115
- "learning_rate": 0.0001990228692687429,
116
- "loss": 1.117,
117
- "step": 120
118
- },
119
- {
120
- "epoch": 1.2264150943396226,
121
- "grad_norm": 1.03125,
122
- "learning_rate": 0.00019863613034027224,
123
- "loss": 1.3558,
124
- "step": 130
125
- },
126
- {
127
- "epoch": 1.320754716981132,
128
- "grad_norm": 1.1484375,
129
- "learning_rate": 0.00019818556823214268,
130
- "loss": 1.1093,
131
- "step": 140
132
- },
133
- {
134
- "epoch": 1.4150943396226414,
135
- "grad_norm": 1.109375,
136
- "learning_rate": 0.00019767147448363366,
137
- "loss": 1.0954,
138
- "step": 150
139
- },
140
- {
141
- "epoch": 1.4150943396226414,
142
- "eval_loss": 1.7661364078521729,
143
- "eval_runtime": 37.4546,
144
- "eval_samples_per_second": 40.075,
145
- "eval_steps_per_second": 5.019,
146
- "step": 150
147
- },
148
- {
149
- "epoch": 1.509433962264151,
150
- "grad_norm": 1.109375,
151
- "learning_rate": 0.0001970941817426052,
152
- "loss": 1.1547,
153
- "step": 160
154
- },
155
- {
156
- "epoch": 1.6037735849056602,
157
- "grad_norm": 1.0625,
158
- "learning_rate": 0.00019645406355025565,
159
- "loss": 1.1128,
160
- "step": 170
161
- },
162
- {
163
- "epoch": 1.6981132075471699,
164
- "grad_norm": 1.359375,
165
- "learning_rate": 0.0001957515340994193,
166
- "loss": 1.1186,
167
- "step": 180
168
- },
169
- {
170
- "epoch": 1.7924528301886793,
171
- "grad_norm": 1.15625,
172
- "learning_rate": 0.00019498704796656018,
173
- "loss": 1.1258,
174
- "step": 190
175
- },
176
- {
177
- "epoch": 1.8867924528301887,
178
- "grad_norm": 1.046875,
179
- "learning_rate": 0.00019416109981763526,
180
- "loss": 1.3996,
181
- "step": 200
182
- },
183
- {
184
- "epoch": 1.8867924528301887,
185
- "eval_loss": 1.7602568864822388,
186
- "eval_runtime": 36.8589,
187
- "eval_samples_per_second": 40.723,
188
- "eval_steps_per_second": 5.101,
189
- "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
193
- "max_steps": 1300,
194
  "num_input_tokens_seen": 0,
195
- "num_train_epochs": 13,
196
- "save_steps": 50,
197
  "stateful_callbacks": {
198
  "TrainerControl": {
199
  "args": {
@@ -206,7 +56,7 @@
206
  "attributes": {}
207
  }
208
  },
209
- "total_flos": 7.313993787611546e+16,
210
  "train_batch_size": 4,
211
  "trial_name": null,
212
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.37735849056603776,
5
+ "eval_steps": 20,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.018867924528301886,
13
+ "eval_loss": 1.9572468996047974,
14
+ "eval_runtime": 19.4145,
15
+ "eval_samples_per_second": 77.313,
16
+ "eval_steps_per_second": 4.842,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 0.18867924528301888,
21
+ "grad_norm": 1.59375,
22
+ "learning_rate": 6.666666666666667e-05,
23
+ "loss": 1.6912,
24
  "step": 10
25
  },
26
  {
27
+ "epoch": 0.37735849056603776,
28
+ "grad_norm": 1.1015625,
29
+ "learning_rate": 0.00013333333333333334,
30
+ "loss": 1.6237,
31
  "step": 20
32
  },
 
 
 
 
 
 
 
33
  {
34
  "epoch": 0.37735849056603776,
35
+ "eval_loss": 1.5020643472671509,
36
+ "eval_runtime": 18.3981,
37
+ "eval_samples_per_second": 81.584,
38
+ "eval_steps_per_second": 5.109,
39
+ "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
  ],
42
  "logging_steps": 10,
43
+ "max_steps": 600,
44
  "num_input_tokens_seen": 0,
45
+ "num_train_epochs": 12,
46
+ "save_steps": 20,
47
  "stateful_callbacks": {
48
  "TrainerControl": {
49
  "args": {
 
56
  "attributes": {}
57
  }
58
  },
59
+ "total_flos": 1.463713580187648e+16,
60
  "train_batch_size": 4,
61
  "trial_name": null,
62
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:899fb22d8728c672dc9aaedc431359fdbdbfbd5b7a9873d1f61a5375b7b7faef
3
  size 7032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6fc8e85610e9c1d7355072f44fd8ceaef90645fb06533af173045356dc826ea
3
  size 7032