bombshelll commited on
Commit
63657ef
·
verified ·
1 Parent(s): 2996774

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 9.0,
3
- "eval_accuracy": 0.9851851851851852,
4
- "eval_loss": 0.063043974339962,
5
- "eval_runtime": 0.7297,
6
- "eval_samples_per_second": 185.014,
7
- "eval_steps_per_second": 6.852,
8
- "total_flos": 1.4093739557001216e+17,
9
- "train_loss": 0.3985253996319241,
10
- "train_runtime": 73.599,
11
- "train_samples_per_second": 171.198,
12
- "train_steps_per_second": 1.359
13
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_accuracy": 0.8484848484848485,
4
+ "eval_loss": 0.5320981740951538,
5
+ "eval_runtime": 0.1987,
6
+ "eval_samples_per_second": 166.048,
7
+ "eval_steps_per_second": 10.064,
8
+ "total_flos": 3.121999450369229e+16,
9
+ "train_loss": 0.7512132167816162,
10
+ "train_runtime": 20.6283,
11
+ "train_samples_per_second": 76.109,
12
+ "train_steps_per_second": 0.485
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.0,
3
- "eval_accuracy": 0.9851851851851852,
4
- "eval_loss": 0.063043974339962,
5
- "eval_runtime": 0.7297,
6
- "eval_samples_per_second": 185.014,
7
- "eval_steps_per_second": 6.852
8
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_accuracy": 0.9090909090909091,
4
+ "eval_loss": 0.5003632307052612,
5
+ "eval_runtime": 0.1947,
6
+ "eval_samples_per_second": 169.491,
7
+ "eval_steps_per_second": 10.272
8
  }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8.0,
3
+ "eval_accuracy": 0.8484848484848485,
4
+ "eval_loss": 0.5320981740951538,
5
+ "eval_runtime": 0.1987,
6
+ "eval_samples_per_second": 166.048,
7
+ "eval_steps_per_second": 10.064
8
+ }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.0,
3
- "total_flos": 1.4093739557001216e+17,
4
- "train_loss": 0.3985253996319241,
5
- "train_runtime": 73.599,
6
- "train_samples_per_second": 171.198,
7
- "train_steps_per_second": 1.359
8
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "total_flos": 3.121999450369229e+16,
4
+ "train_loss": 0.7512132167816162,
5
+ "train_runtime": 20.6283,
6
+ "train_samples_per_second": 76.109,
7
+ "train_steps_per_second": 0.485
8
  }
trainer_state.json CHANGED
@@ -1,182 +1,157 @@
1
  {
2
- "best_metric": 0.9851851851851852,
3
- "best_model_checkpoint": "/kaggle/working/swin-brain-plane-classification/checkpoint-45",
4
- "epoch": 9.0,
5
  "eval_steps": 500,
6
- "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "grad_norm": 5.971311092376709,
14
- "learning_rate": 2.5e-05,
15
- "loss": 1.0732,
16
- "step": 5
17
  },
18
  {
19
- "epoch": 1.0,
20
- "eval_accuracy": 0.6666666666666666,
21
- "eval_loss": 0.9284645318984985,
22
- "eval_runtime": 0.7118,
23
- "eval_samples_per_second": 189.658,
24
- "eval_steps_per_second": 7.024,
25
- "step": 5
26
  },
27
  {
28
- "epoch": 2.0,
29
- "grad_norm": 10.538399696350098,
30
- "learning_rate": 5e-05,
31
- "loss": 0.829,
32
- "step": 10
33
  },
34
  {
35
- "epoch": 2.0,
36
- "eval_accuracy": 0.8592592592592593,
37
- "eval_loss": 0.527397871017456,
38
- "eval_runtime": 0.7119,
39
- "eval_samples_per_second": 189.625,
40
- "eval_steps_per_second": 7.023,
41
- "step": 10
42
  },
43
  {
44
- "epoch": 3.0,
45
- "grad_norm": 6.285182476043701,
46
- "learning_rate": 4.722222222222222e-05,
47
- "loss": 0.5189,
48
- "step": 15
49
  },
50
  {
51
- "epoch": 3.0,
52
- "eval_accuracy": 0.9333333333333333,
53
- "eval_loss": 0.24730348587036133,
54
- "eval_runtime": 0.6998,
55
- "eval_samples_per_second": 192.917,
56
- "eval_steps_per_second": 7.145,
57
- "step": 15
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 9.056550979614258,
62
- "learning_rate": 4.4444444444444447e-05,
63
- "loss": 0.3517,
64
- "step": 20
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.9629629629629629,
69
- "eval_loss": 0.1404803842306137,
70
- "eval_runtime": 0.7065,
71
- "eval_samples_per_second": 191.084,
72
- "eval_steps_per_second": 7.077,
73
- "step": 20
74
  },
75
  {
76
- "epoch": 5.0,
77
- "grad_norm": 7.212975025177002,
78
- "learning_rate": 4.166666666666667e-05,
79
- "loss": 0.2075,
80
- "step": 25
81
  },
82
  {
83
- "epoch": 5.0,
84
- "eval_accuracy": 0.9333333333333333,
85
- "eval_loss": 0.17972101271152496,
86
- "eval_runtime": 0.703,
87
- "eval_samples_per_second": 192.032,
88
- "eval_steps_per_second": 7.112,
89
- "step": 25
90
  },
91
  {
92
- "epoch": 6.0,
93
- "grad_norm": 5.206702709197998,
94
- "learning_rate": 3.888888888888889e-05,
95
- "loss": 0.1871,
96
- "step": 30
97
  },
98
  {
99
- "epoch": 6.0,
100
- "eval_accuracy": 0.9703703703703703,
101
- "eval_loss": 0.09362433105707169,
102
- "eval_runtime": 0.759,
103
- "eval_samples_per_second": 177.874,
104
- "eval_steps_per_second": 6.588,
105
- "step": 30
106
  },
107
  {
108
- "epoch": 7.0,
109
- "grad_norm": 5.035118579864502,
110
- "learning_rate": 3.611111111111111e-05,
111
- "loss": 0.1567,
112
- "step": 35
113
  },
114
  {
115
- "epoch": 7.0,
116
- "eval_accuracy": 0.9703703703703703,
117
- "eval_loss": 0.1113051250576973,
118
- "eval_runtime": 0.7175,
119
- "eval_samples_per_second": 188.151,
120
- "eval_steps_per_second": 6.969,
121
- "step": 35
 
 
 
 
 
 
 
122
  },
123
  {
124
  "epoch": 8.0,
125
- "grad_norm": 4.850553035736084,
126
- "learning_rate": 3.3333333333333335e-05,
127
- "loss": 0.1482,
128
- "step": 40
 
 
129
  },
130
  {
131
  "epoch": 8.0,
132
- "eval_accuracy": 0.9777777777777777,
133
- "eval_loss": 0.0790533721446991,
134
- "eval_runtime": 0.7241,
135
- "eval_samples_per_second": 186.438,
136
- "eval_steps_per_second": 6.905,
137
- "step": 40
138
- },
139
- {
140
- "epoch": 9.0,
141
- "grad_norm": 4.634683132171631,
142
- "learning_rate": 3.055555555555556e-05,
143
- "loss": 0.1143,
144
- "step": 45
145
- },
146
- {
147
- "epoch": 9.0,
148
- "eval_accuracy": 0.9851851851851852,
149
- "eval_loss": 0.063043974339962,
150
- "eval_runtime": 0.7126,
151
- "eval_samples_per_second": 189.438,
152
- "eval_steps_per_second": 7.016,
153
- "step": 45
154
- },
155
- {
156
- "epoch": 9.0,
157
- "step": 45,
158
- "total_flos": 1.4093739557001216e+17,
159
- "train_loss": 0.3985253996319241,
160
- "train_runtime": 73.599,
161
- "train_samples_per_second": 171.198,
162
- "train_steps_per_second": 1.359
163
  }
164
  ],
165
- "logging_steps": 500,
166
- "max_steps": 100,
167
  "num_input_tokens_seen": 0,
168
- "num_train_epochs": 20,
169
  "save_steps": 500,
170
  "stateful_callbacks": {
171
- "CustomEarlyStoppingCallback": {
172
- "args": {
173
- "early_stopping_patience": 1,
174
- "early_stopping_threshold": 0.0
175
- },
176
- "attributes": {
177
- "early_stopping_patience_counter": 0
178
- }
179
- },
180
  "TrainerControl": {
181
  "args": {
182
  "should_epoch_stop": false,
@@ -188,7 +163,7 @@
188
  "attributes": {}
189
  }
190
  },
191
- "total_flos": 1.4093739557001216e+17,
192
  "train_batch_size": 32,
193
  "trial_name": null,
194
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9090909090909091,
3
+ "best_model_checkpoint": "/kaggle/working/swin-brain-plane-classification/checkpoint-6",
4
+ "epoch": 8.0,
5
  "eval_steps": 500,
6
+ "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.8,
13
+ "grad_norm": 7.547826766967773,
14
+ "learning_rate": 5e-05,
15
+ "loss": 1.3313,
16
+ "step": 1
17
  },
18
  {
19
+ "epoch": 0.8,
20
+ "eval_accuracy": 0.30303030303030304,
21
+ "eval_loss": 1.0681976079940796,
22
+ "eval_runtime": 0.1993,
23
+ "eval_samples_per_second": 165.581,
24
+ "eval_steps_per_second": 10.035,
25
+ "step": 1
26
  },
27
  {
28
+ "epoch": 1.6,
29
+ "grad_norm": 4.482707500457764,
30
+ "learning_rate": 4.4444444444444447e-05,
31
+ "loss": 1.2576,
32
+ "step": 2
33
  },
34
  {
35
+ "epoch": 1.6,
36
+ "eval_accuracy": 0.7878787878787878,
37
+ "eval_loss": 0.8504449129104614,
38
+ "eval_runtime": 0.202,
39
+ "eval_samples_per_second": 163.354,
40
+ "eval_steps_per_second": 9.9,
41
+ "step": 2
42
  },
43
  {
44
+ "epoch": 2.4,
45
+ "grad_norm": 4.672352313995361,
46
+ "learning_rate": 3.888888888888889e-05,
47
+ "loss": 1.0651,
48
+ "step": 3
49
  },
50
  {
51
+ "epoch": 2.4,
52
+ "eval_accuracy": 0.7575757575757576,
53
+ "eval_loss": 0.7269154191017151,
54
+ "eval_runtime": 0.202,
55
+ "eval_samples_per_second": 163.337,
56
+ "eval_steps_per_second": 9.899,
57
+ "step": 3
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 6.733062267303467,
62
+ "learning_rate": 2.777777777777778e-05,
63
+ "loss": 0.4774,
64
+ "step": 5
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.8484848484848485,
69
+ "eval_loss": 0.5601949691772461,
70
+ "eval_runtime": 0.1975,
71
+ "eval_samples_per_second": 167.092,
72
+ "eval_steps_per_second": 10.127,
73
+ "step": 5
74
  },
75
  {
76
+ "epoch": 4.8,
77
+ "grad_norm": 5.887814521789551,
78
+ "learning_rate": 2.2222222222222223e-05,
79
+ "loss": 0.8084,
80
+ "step": 6
81
  },
82
  {
83
+ "epoch": 4.8,
84
+ "eval_accuracy": 0.9090909090909091,
85
+ "eval_loss": 0.5003632307052612,
86
+ "eval_runtime": 0.2283,
87
+ "eval_samples_per_second": 144.536,
88
+ "eval_steps_per_second": 8.76,
89
+ "step": 6
90
  },
91
  {
92
+ "epoch": 5.6,
93
+ "grad_norm": 5.614295959472656,
94
+ "learning_rate": 1.6666666666666667e-05,
95
+ "loss": 0.7194,
96
+ "step": 7
97
  },
98
  {
99
+ "epoch": 5.6,
100
+ "eval_accuracy": 0.9090909090909091,
101
+ "eval_loss": 0.4589892327785492,
102
+ "eval_runtime": 0.1953,
103
+ "eval_samples_per_second": 168.965,
104
+ "eval_steps_per_second": 10.24,
105
+ "step": 7
106
  },
107
  {
108
+ "epoch": 6.4,
109
+ "grad_norm": 7.968162536621094,
110
+ "learning_rate": 1.1111111111111112e-05,
111
+ "loss": 0.6712,
112
+ "step": 8
113
  },
114
  {
115
+ "epoch": 6.4,
116
+ "eval_accuracy": 0.9090909090909091,
117
+ "eval_loss": 0.4304216802120209,
118
+ "eval_runtime": 0.2015,
119
+ "eval_samples_per_second": 163.754,
120
+ "eval_steps_per_second": 9.924,
121
+ "step": 8
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "grad_norm": 6.553189754486084,
126
+ "learning_rate": 0.0,
127
+ "loss": 0.3522,
128
+ "step": 10
129
  },
130
  {
131
  "epoch": 8.0,
132
+ "eval_accuracy": 0.9090909090909091,
133
+ "eval_loss": 0.4053419828414917,
134
+ "eval_runtime": 0.2196,
135
+ "eval_samples_per_second": 150.303,
136
+ "eval_steps_per_second": 9.109,
137
+ "step": 10
138
  },
139
  {
140
  "epoch": 8.0,
141
+ "step": 10,
142
+ "total_flos": 3.121999450369229e+16,
143
+ "train_loss": 0.7512132167816162,
144
+ "train_runtime": 20.6283,
145
+ "train_samples_per_second": 76.109,
146
+ "train_steps_per_second": 0.485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
+ "logging_steps": 1,
150
+ "max_steps": 10,
151
  "num_input_tokens_seen": 0,
152
+ "num_train_epochs": 10,
153
  "save_steps": 500,
154
  "stateful_callbacks": {
 
 
 
 
 
 
 
 
 
155
  "TrainerControl": {
156
  "args": {
157
  "should_epoch_stop": false,
 
163
  "attributes": {}
164
  }
165
  },
166
+ "total_flos": 3.121999450369229e+16,
167
  "train_batch_size": 32,
168
  "trial_name": null,
169
  "trial_params": null