krishna-exe commited on
Commit
01dba73
·
verified ·
1 Parent(s): 776135f

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.938271604938271,
3
- "eval_accuracy": 0.9616724738675958,
4
- "eval_loss": 0.11273417621850967,
5
- "eval_runtime": 2.4706,
6
- "eval_samples_per_second": 116.166,
7
- "eval_steps_per_second": 7.286,
8
  "total_flos": 3.1727957353537536e+17,
9
- "train_loss": 0.06341908916831017,
10
- "train_runtime": 245.9692,
11
- "train_samples_per_second": 52.507,
12
- "train_steps_per_second": 0.813
13
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9156020131629887,
4
+ "eval_loss": 0.22556395828723907,
5
+ "eval_runtime": 22.7546,
6
+ "eval_samples_per_second": 113.516,
7
+ "eval_steps_per_second": 3.56,
8
  "total_flos": 3.1727957353537536e+17,
9
+ "train_loss": 0.4883076953887939,
10
+ "train_runtime": 265.3223,
11
+ "train_samples_per_second": 48.677,
12
+ "train_steps_per_second": 0.377
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.938271604938271,
3
- "eval_accuracy": 0.9616724738675958,
4
- "eval_loss": 0.11273417621850967,
5
- "eval_runtime": 2.4706,
6
- "eval_samples_per_second": 116.166,
7
- "eval_steps_per_second": 7.286
8
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9372822299651568,
4
+ "eval_loss": 0.1715514212846756,
5
+ "eval_runtime": 2.7579,
6
+ "eval_samples_per_second": 104.066,
7
+ "eval_steps_per_second": 3.263
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:035707ba7def84500aaa8a6501235bc2da59c7aa8f09dcb90e5701c17922b7e4
3
  size 110348984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c78aab4276c550f24351312fae389bc05b9ce88326f8ed0162336967cb32961e
3
  size 110348984
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.938271604938271,
3
- "total_flos": 3.1727957353537536e+17,
4
- "train_loss": 0.06341908916831017,
5
- "train_runtime": 245.9692,
6
- "train_samples_per_second": 52.507,
7
- "train_steps_per_second": 0.813
8
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9156020131629887,
4
+ "eval_loss": 0.22556395828723907,
5
+ "eval_runtime": 22.7546,
6
+ "eval_samples_per_second": 113.516,
7
+ "eval_steps_per_second": 3.56
8
  }
trainer_state.json CHANGED
@@ -1,210 +1,158 @@
1
  {
2
- "best_metric": 0.9651567944250871,
3
- "best_model_checkpoint": "brain-tumor-classification/checkpoint-200",
4
  "epoch": 4.938271604938271,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.24691358024691357,
13
- "grad_norm": 9.583415985107422,
14
- "learning_rate": 2.5e-05,
15
- "loss": 0.0798,
16
- "step": 10
17
- },
18
  {
19
  "epoch": 0.49382716049382713,
20
- "grad_norm": 4.763514518737793,
21
  "learning_rate": 5e-05,
22
- "loss": 0.0516,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.7407407407407407,
27
- "grad_norm": 3.712924003601074,
28
- "learning_rate": 4.722222222222222e-05,
29
- "loss": 0.0708,
30
- "step": 30
31
  },
32
  {
33
  "epoch": 0.9876543209876543,
34
- "grad_norm": 3.5341458320617676,
35
  "learning_rate": 4.4444444444444447e-05,
36
- "loss": 0.0628,
37
- "step": 40
38
  },
39
  {
40
  "epoch": 0.9876543209876543,
41
- "eval_accuracy": 0.9512195121951219,
42
- "eval_loss": 0.14935272932052612,
43
- "eval_runtime": 2.3845,
44
- "eval_samples_per_second": 120.36,
45
- "eval_steps_per_second": 7.549,
46
- "step": 40
47
- },
48
- {
49
- "epoch": 1.2345679012345678,
50
- "grad_norm": 2.3918871879577637,
51
- "learning_rate": 4.166666666666667e-05,
52
- "loss": 0.0562,
53
- "step": 50
54
  },
55
  {
56
  "epoch": 1.4814814814814814,
57
- "grad_norm": 7.054160118103027,
58
  "learning_rate": 3.888888888888889e-05,
59
- "loss": 0.0587,
60
- "step": 60
61
- },
62
- {
63
- "epoch": 1.7283950617283952,
64
- "grad_norm": 16.58973503112793,
65
- "learning_rate": 3.611111111111111e-05,
66
- "loss": 0.0679,
67
- "step": 70
68
  },
69
  {
70
  "epoch": 1.9753086419753085,
71
- "grad_norm": 11.970051765441895,
72
  "learning_rate": 3.3333333333333335e-05,
73
- "loss": 0.0499,
74
- "step": 80
75
- },
76
- {
77
- "epoch": 2.0,
78
- "eval_accuracy": 0.9372822299651568,
79
- "eval_loss": 0.18301770091056824,
80
- "eval_runtime": 2.3848,
81
- "eval_samples_per_second": 120.346,
82
- "eval_steps_per_second": 7.548,
83
- "step": 81
84
  },
85
  {
86
- "epoch": 2.2222222222222223,
87
- "grad_norm": 6.739134788513184,
88
- "learning_rate": 3.055555555555556e-05,
89
- "loss": 0.0352,
90
- "step": 90
 
 
91
  },
92
  {
93
  "epoch": 2.4691358024691357,
94
- "grad_norm": 23.700721740722656,
95
  "learning_rate": 2.777777777777778e-05,
96
- "loss": 0.1,
97
- "step": 100
98
- },
99
- {
100
- "epoch": 2.7160493827160495,
101
- "grad_norm": 6.7289581298828125,
102
- "learning_rate": 2.5e-05,
103
- "loss": 0.0504,
104
- "step": 110
105
  },
106
  {
107
  "epoch": 2.962962962962963,
108
- "grad_norm": 11.606633186340332,
109
  "learning_rate": 2.2222222222222223e-05,
110
- "loss": 0.0697,
111
- "step": 120
112
- },
113
- {
114
- "epoch": 2.9876543209876543,
115
- "eval_accuracy": 0.9547038327526133,
116
- "eval_loss": 0.10937974601984024,
117
- "eval_runtime": 2.9179,
118
- "eval_samples_per_second": 98.36,
119
- "eval_steps_per_second": 6.169,
120
- "step": 121
121
  },
122
  {
123
- "epoch": 3.2098765432098766,
124
- "grad_norm": 17.90212059020996,
125
- "learning_rate": 1.9444444444444445e-05,
126
- "loss": 0.0546,
127
- "step": 130
 
 
128
  },
129
  {
130
  "epoch": 3.45679012345679,
131
- "grad_norm": 8.192621231079102,
132
  "learning_rate": 1.6666666666666667e-05,
133
- "loss": 0.0601,
134
- "step": 140
135
- },
136
- {
137
- "epoch": 3.7037037037037037,
138
- "grad_norm": 3.0072553157806396,
139
- "learning_rate": 1.388888888888889e-05,
140
- "loss": 0.07,
141
- "step": 150
142
  },
143
  {
144
  "epoch": 3.950617283950617,
145
- "grad_norm": 5.344573974609375,
146
  "learning_rate": 1.1111111111111112e-05,
147
- "loss": 0.0646,
148
- "step": 160
149
  },
150
  {
151
  "epoch": 4.0,
152
- "eval_accuracy": 0.9581881533101045,
153
- "eval_loss": 0.13176245987415314,
154
- "eval_runtime": 2.8306,
155
- "eval_samples_per_second": 101.391,
156
- "eval_steps_per_second": 6.359,
157
- "step": 162
158
- },
159
- {
160
- "epoch": 4.197530864197531,
161
- "grad_norm": 12.885249137878418,
162
- "learning_rate": 8.333333333333334e-06,
163
- "loss": 0.0515,
164
- "step": 170
165
  },
166
  {
167
  "epoch": 4.444444444444445,
168
- "grad_norm": 13.122868537902832,
169
  "learning_rate": 5.555555555555556e-06,
170
- "loss": 0.0784,
171
- "step": 180
172
- },
173
- {
174
- "epoch": 4.6913580246913575,
175
- "grad_norm": 8.814871788024902,
176
- "learning_rate": 2.777777777777778e-06,
177
- "loss": 0.088,
178
- "step": 190
179
  },
180
  {
181
  "epoch": 4.938271604938271,
182
- "grad_norm": 1.9182705879211426,
183
  "learning_rate": 0.0,
184
- "loss": 0.0481,
185
- "step": 200
186
  },
187
  {
188
  "epoch": 4.938271604938271,
189
- "eval_accuracy": 0.9651567944250871,
190
- "eval_loss": 0.11668162047863007,
191
- "eval_runtime": 2.6735,
192
- "eval_samples_per_second": 107.351,
193
- "eval_steps_per_second": 6.733,
194
- "step": 200
195
  },
196
  {
197
  "epoch": 4.938271604938271,
198
- "step": 200,
199
  "total_flos": 3.1727957353537536e+17,
200
- "train_loss": 0.06341908916831017,
201
- "train_runtime": 245.9692,
202
- "train_samples_per_second": 52.507,
203
- "train_steps_per_second": 0.813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  }
205
  ],
206
  "logging_steps": 10,
207
- "max_steps": 200,
208
  "num_input_tokens_seen": 0,
209
  "num_train_epochs": 5,
210
  "save_steps": 500,
@@ -221,7 +169,7 @@
221
  }
222
  },
223
  "total_flos": 3.1727957353537536e+17,
224
- "train_batch_size": 16,
225
  "trial_name": null,
226
  "trial_params": null
227
  }
 
1
  {
2
+ "best_metric": 0.9372822299651568,
3
+ "best_model_checkpoint": "brain-tumor-classification/checkpoint-81",
4
  "epoch": 4.938271604938271,
5
  "eval_steps": 500,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.49382716049382713,
13
+ "grad_norm": 11.951680183410645,
14
  "learning_rate": 5e-05,
15
+ "loss": 1.2238,
16
+ "step": 10
 
 
 
 
 
 
 
17
  },
18
  {
19
  "epoch": 0.9876543209876543,
20
+ "grad_norm": 11.546021461486816,
21
  "learning_rate": 4.4444444444444447e-05,
22
+ "loss": 0.8509,
23
+ "step": 20
24
  },
25
  {
26
  "epoch": 0.9876543209876543,
27
+ "eval_accuracy": 0.8466898954703833,
28
+ "eval_loss": 0.5305207967758179,
29
+ "eval_runtime": 2.7821,
30
+ "eval_samples_per_second": 103.16,
31
+ "eval_steps_per_second": 3.235,
32
+ "step": 20
 
 
 
 
 
 
 
33
  },
34
  {
35
  "epoch": 1.4814814814814814,
36
+ "grad_norm": 13.172475814819336,
37
  "learning_rate": 3.888888888888889e-05,
38
+ "loss": 0.5721,
39
+ "step": 30
 
 
 
 
 
 
 
40
  },
41
  {
42
  "epoch": 1.9753086419753085,
43
+ "grad_norm": 7.819632530212402,
44
  "learning_rate": 3.3333333333333335e-05,
45
+ "loss": 0.4478,
46
+ "step": 40
 
 
 
 
 
 
 
 
 
47
  },
48
  {
49
+ "epoch": 1.9753086419753085,
50
+ "eval_accuracy": 0.9094076655052264,
51
+ "eval_loss": 0.3092304468154907,
52
+ "eval_runtime": 2.5037,
53
+ "eval_samples_per_second": 114.631,
54
+ "eval_steps_per_second": 3.595,
55
+ "step": 40
56
  },
57
  {
58
  "epoch": 2.4691358024691357,
59
+ "grad_norm": 10.437914848327637,
60
  "learning_rate": 2.777777777777778e-05,
61
+ "loss": 0.3668,
62
+ "step": 50
 
 
 
 
 
 
 
63
  },
64
  {
65
  "epoch": 2.962962962962963,
66
+ "grad_norm": 9.35677433013916,
67
  "learning_rate": 2.2222222222222223e-05,
68
+ "loss": 0.3313,
69
+ "step": 60
 
 
 
 
 
 
 
 
 
70
  },
71
  {
72
+ "epoch": 2.962962962962963,
73
+ "eval_accuracy": 0.9233449477351916,
74
+ "eval_loss": 0.24220147728919983,
75
+ "eval_runtime": 2.4542,
76
+ "eval_samples_per_second": 116.944,
77
+ "eval_steps_per_second": 3.667,
78
+ "step": 60
79
  },
80
  {
81
  "epoch": 3.45679012345679,
82
+ "grad_norm": 8.183242797851562,
83
  "learning_rate": 1.6666666666666667e-05,
84
+ "loss": 0.2979,
85
+ "step": 70
 
 
 
 
 
 
 
86
  },
87
  {
88
  "epoch": 3.950617283950617,
89
+ "grad_norm": 8.208118438720703,
90
  "learning_rate": 1.1111111111111112e-05,
91
+ "loss": 0.2777,
92
+ "step": 80
93
  },
94
  {
95
  "epoch": 4.0,
96
+ "eval_accuracy": 0.9372822299651568,
97
+ "eval_loss": 0.1715514212846756,
98
+ "eval_runtime": 2.7133,
99
+ "eval_samples_per_second": 105.776,
100
+ "eval_steps_per_second": 3.317,
101
+ "step": 81
 
 
 
 
 
 
 
102
  },
103
  {
104
  "epoch": 4.444444444444445,
105
+ "grad_norm": 8.672185897827148,
106
  "learning_rate": 5.555555555555556e-06,
107
+ "loss": 0.2684,
108
+ "step": 90
 
 
 
 
 
 
 
109
  },
110
  {
111
  "epoch": 4.938271604938271,
112
+ "grad_norm": 10.231292724609375,
113
  "learning_rate": 0.0,
114
+ "loss": 0.2465,
115
+ "step": 100
116
  },
117
  {
118
  "epoch": 4.938271604938271,
119
+ "eval_accuracy": 0.9372822299651568,
120
+ "eval_loss": 0.16363371908664703,
121
+ "eval_runtime": 2.7577,
122
+ "eval_samples_per_second": 104.074,
123
+ "eval_steps_per_second": 3.264,
124
+ "step": 100
125
  },
126
  {
127
  "epoch": 4.938271604938271,
128
+ "step": 100,
129
  "total_flos": 3.1727957353537536e+17,
130
+ "train_loss": 0.4883076953887939,
131
+ "train_runtime": 265.3223,
132
+ "train_samples_per_second": 48.677,
133
+ "train_steps_per_second": 0.377
134
+ },
135
+ {
136
+ "epoch": 4.938271604938271,
137
+ "eval_accuracy": 0.9372822299651568,
138
+ "eval_loss": 0.1715514212846756,
139
+ "eval_runtime": 2.7579,
140
+ "eval_samples_per_second": 104.066,
141
+ "eval_steps_per_second": 3.263,
142
+ "step": 100
143
+ },
144
+ {
145
+ "epoch": 4.938271604938271,
146
+ "eval_accuracy": 0.9156020131629887,
147
+ "eval_loss": 0.22556395828723907,
148
+ "eval_runtime": 22.7546,
149
+ "eval_samples_per_second": 113.516,
150
+ "eval_steps_per_second": 3.56,
151
+ "step": 100
152
  }
153
  ],
154
  "logging_steps": 10,
155
+ "max_steps": 100,
156
  "num_input_tokens_seen": 0,
157
  "num_train_epochs": 5,
158
  "save_steps": 500,
 
169
  }
170
  },
171
  "total_flos": 3.1727957353537536e+17,
172
+ "train_batch_size": 32,
173
  "trial_name": null,
174
  "trial_params": null
175
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ac1b4cd2d8159af1fc8fd51f45445f24aa9bf4ee3eaf6c19dbdcdb4ce9f1e54
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df4ade6f59d89ee284f128277b27cd59499314a50c762b40744a848779e24e7
3
  size 5240