Yuvrajg2107 commited on
Commit
f376907
·
verified ·
1 Parent(s): b78636a

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7993fcf68be828d73fa2ee6b4a5dc32e8cace8dc797569a99953ce1a3ae5d83
3
  size 498612824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42690867e778f4db141a39c02caa1a4a181fb1838de64648f76992024bb090d6
3
  size 498612824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b02915a79788add5d714c8aaeec8c9d2926b4de85c51dc0ca45b65e8d73f63ed
3
  size 997345931
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cee156cc7324a335bd0cf2e1fa7a6de4b52ebb3d6ee59259f88882100a16a0a
3
  size 997345931
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3106ce26d8876d17e74b10716d47cadecb00ff3a2233d08bdb01a7952809d554
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41afff64258c100464839a9401c63f65038ccc9dbd2dd2c5f61f68f3b93f2b79
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b4eb0c27b559cfd649f7c80a6d3394a55880d92b3ee2ca98f71b67e7b2333a
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ae2a2128444abab378aa06c09a61a84665f758fcc19fc46f5789b0bc1b5665
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe28f7d29e7f98f2600c3c6c7c0177a5ef0e8009ada6a2b943e31e01e5a4324e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed290419ddbbaf4c4f46d92ab98980c1b33e3e9b93e34d7bc94d8d13097423b4
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,220 +1,36 @@
1
  {
2
- "best_global_step": 9000,
3
- "best_metric": 0.035129010677337646,
4
- "best_model_checkpoint": "./training_output/checkpoint-9000",
5
- "epoch": 0.45,
6
  "eval_steps": 1000,
7
- "global_step": 9000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.025,
14
- "grad_norm": 0.034924205392599106,
15
  "learning_rate": 1.9501000000000002e-05,
16
- "loss": 0.1359,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.05,
21
- "grad_norm": 11.011366844177246,
22
  "learning_rate": 1.9001e-05,
23
- "loss": 0.1002,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.05,
28
- "eval_accuracy": 0.91145,
29
- "eval_loss": 0.12729284167289734,
30
- "eval_runtime": 363.6281,
31
- "eval_samples_per_second": 55.001,
32
- "eval_steps_per_second": 3.438,
33
  "step": 1000
34
- },
35
- {
36
- "epoch": 0.075,
37
- "grad_norm": 0.017511729151010513,
38
- "learning_rate": 1.8501e-05,
39
- "loss": 0.0956,
40
- "step": 1500
41
- },
42
- {
43
- "epoch": 0.1,
44
- "grad_norm": 0.022739391773939133,
45
- "learning_rate": 1.8001000000000003e-05,
46
- "loss": 0.0721,
47
- "step": 2000
48
- },
49
- {
50
- "epoch": 0.1,
51
- "eval_accuracy": 0.88555,
52
- "eval_loss": 0.21215863525867462,
53
- "eval_runtime": 363.0614,
54
- "eval_samples_per_second": 55.087,
55
- "eval_steps_per_second": 3.443,
56
- "step": 2000
57
- },
58
- {
59
- "epoch": 0.125,
60
- "grad_norm": 0.02591518871486187,
61
- "learning_rate": 1.7501e-05,
62
- "loss": 0.0668,
63
- "step": 2500
64
- },
65
- {
66
- "epoch": 0.15,
67
- "grad_norm": 0.07041072845458984,
68
- "learning_rate": 1.7001000000000002e-05,
69
- "loss": 0.0606,
70
- "step": 3000
71
- },
72
- {
73
- "epoch": 0.15,
74
- "eval_accuracy": 0.9291,
75
- "eval_loss": 0.12173645943403244,
76
- "eval_runtime": 362.8269,
77
- "eval_samples_per_second": 55.123,
78
- "eval_steps_per_second": 3.445,
79
- "step": 3000
80
- },
81
- {
82
- "epoch": 0.175,
83
- "grad_norm": 37.91666793823242,
84
- "learning_rate": 1.6501e-05,
85
- "loss": 0.0647,
86
- "step": 3500
87
- },
88
- {
89
- "epoch": 0.2,
90
- "grad_norm": 0.18077421188354492,
91
- "learning_rate": 1.6001e-05,
92
- "loss": 0.0587,
93
- "step": 4000
94
- },
95
- {
96
- "epoch": 0.2,
97
- "eval_accuracy": 0.9575,
98
- "eval_loss": 0.04343162104487419,
99
- "eval_runtime": 361.6433,
100
- "eval_samples_per_second": 55.303,
101
- "eval_steps_per_second": 3.456,
102
- "step": 4000
103
- },
104
- {
105
- "epoch": 0.225,
106
- "grad_norm": 0.002397062722593546,
107
- "learning_rate": 1.5501000000000003e-05,
108
- "loss": 0.0491,
109
- "step": 4500
110
- },
111
- {
112
- "epoch": 0.25,
113
- "grad_norm": 0.02433067187666893,
114
- "learning_rate": 1.5001000000000001e-05,
115
- "loss": 0.0511,
116
- "step": 5000
117
- },
118
- {
119
- "epoch": 0.25,
120
- "eval_accuracy": 0.9532,
121
- "eval_loss": 0.07725899666547775,
122
- "eval_runtime": 363.254,
123
- "eval_samples_per_second": 55.058,
124
- "eval_steps_per_second": 3.441,
125
- "step": 5000
126
- },
127
- {
128
- "epoch": 0.275,
129
- "grad_norm": 0.2668807804584503,
130
- "learning_rate": 1.4501e-05,
131
- "loss": 0.0442,
132
- "step": 5500
133
- },
134
- {
135
- "epoch": 0.3,
136
- "grad_norm": 0.0010613143676891923,
137
- "learning_rate": 1.4001e-05,
138
- "loss": 0.042,
139
- "step": 6000
140
- },
141
- {
142
- "epoch": 0.3,
143
- "eval_accuracy": 0.93495,
144
- "eval_loss": 0.15857931971549988,
145
- "eval_runtime": 363.6803,
146
- "eval_samples_per_second": 54.993,
147
- "eval_steps_per_second": 3.437,
148
- "step": 6000
149
- },
150
- {
151
- "epoch": 0.325,
152
- "grad_norm": 0.002066701650619507,
153
- "learning_rate": 1.3501000000000002e-05,
154
- "loss": 0.039,
155
- "step": 6500
156
- },
157
- {
158
- "epoch": 0.35,
159
- "grad_norm": 0.06389991194009781,
160
- "learning_rate": 1.3001000000000001e-05,
161
- "loss": 0.0416,
162
- "step": 7000
163
- },
164
- {
165
- "epoch": 0.35,
166
- "eval_accuracy": 0.9366,
167
- "eval_loss": 0.11077161878347397,
168
- "eval_runtime": 363.1946,
169
- "eval_samples_per_second": 55.067,
170
- "eval_steps_per_second": 3.442,
171
- "step": 7000
172
- },
173
- {
174
- "epoch": 0.375,
175
- "grad_norm": 0.01402213703840971,
176
- "learning_rate": 1.2501000000000001e-05,
177
- "loss": 0.0418,
178
- "step": 7500
179
- },
180
- {
181
- "epoch": 0.4,
182
- "grad_norm": 0.30465877056121826,
183
- "learning_rate": 1.2001e-05,
184
- "loss": 0.0428,
185
- "step": 8000
186
- },
187
- {
188
- "epoch": 0.4,
189
- "eval_accuracy": 0.92455,
190
- "eval_loss": 0.11909274756908417,
191
- "eval_runtime": 362.4651,
192
- "eval_samples_per_second": 55.178,
193
- "eval_steps_per_second": 3.449,
194
- "step": 8000
195
- },
196
- {
197
- "epoch": 0.425,
198
- "grad_norm": 0.002914857817813754,
199
- "learning_rate": 1.1501e-05,
200
- "loss": 0.0383,
201
- "step": 8500
202
- },
203
- {
204
- "epoch": 0.45,
205
- "grad_norm": 40.46672821044922,
206
- "learning_rate": 1.1001000000000002e-05,
207
- "loss": 0.0387,
208
- "step": 9000
209
- },
210
- {
211
- "epoch": 0.45,
212
- "eval_accuracy": 0.9744,
213
- "eval_loss": 0.035129010677337646,
214
- "eval_runtime": 362.2389,
215
- "eval_samples_per_second": 55.212,
216
- "eval_steps_per_second": 3.451,
217
- "step": 9000
218
  }
219
  ],
220
  "logging_steps": 500,
@@ -234,7 +50,7 @@
234
  "attributes": {}
235
  }
236
  },
237
- "total_flos": 9471997992960000.0,
238
  "train_batch_size": 2,
239
  "trial_name": null,
240
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.10802757740020752,
4
+ "best_model_checkpoint": "./training_output/checkpoint-1000",
5
+ "epoch": 0.05,
6
  "eval_steps": 1000,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.025,
14
+ "grad_norm": 0.8834348917007446,
15
  "learning_rate": 1.9501000000000002e-05,
16
+ "loss": 0.1437,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.05,
21
+ "grad_norm": 9.513919830322266,
22
  "learning_rate": 1.9001e-05,
23
+ "loss": 0.1085,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.05,
28
+ "eval_accuracy": 0.91835,
29
+ "eval_loss": 0.10802757740020752,
30
+ "eval_runtime": 381.1922,
31
+ "eval_samples_per_second": 52.467,
32
+ "eval_steps_per_second": 3.279,
33
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  ],
36
  "logging_steps": 500,
 
50
  "attributes": {}
51
  }
52
  },
53
+ "total_flos": 1052444221440000.0,
54
  "train_batch_size": 2,
55
  "trial_name": null,
56
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:058d9b420a19f886dad1393fb6d59bbf96d0b7d2e42eee2aba9c86caa3a2da81
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4672bef08597eaac4166af409003080f023f24990f8f58c185b2b990119dc93e
3
  size 5841