bombshelll commited on
Commit
79a2d9f
·
verified ·
1 Parent(s): 0d137d1

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 3.121999450369229e+16,
4
- "train_loss": 0.9097571969032288,
5
- "train_runtime": 20.8735,
6
- "train_samples_per_second": 75.215,
7
- "train_steps_per_second": 0.479
8
  }
 
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 3.121999450369229e+16,
4
+ "train_loss": 0.3916594386100769,
5
+ "train_runtime": 22.6465,
6
+ "train_samples_per_second": 69.326,
7
+ "train_steps_per_second": 0.442
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:618448ea8b09686f397ca2cfc5bd546a7d1d7dda2e9f27f2444db833e16103ce
3
  size 110345908
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c9b7c846af195b3ff69a581a04af42f33487a5e009c549bf8cc81dcb93fdb43
3
  size 110345908
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 3.121999450369229e+16,
4
- "train_loss": 0.9097571969032288,
5
- "train_runtime": 20.8735,
6
- "train_samples_per_second": 75.215,
7
- "train_steps_per_second": 0.479
8
  }
 
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 3.121999450369229e+16,
4
+ "train_loss": 0.3916594386100769,
5
+ "train_runtime": 22.6465,
6
+ "train_samples_per_second": 69.326,
7
+ "train_steps_per_second": 0.442
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8484848484848485,
3
- "best_model_checkpoint": "/kaggle/working/swin-brain-plane-classification/checkpoint-10",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
6
  "global_step": 10,
@@ -10,140 +10,140 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.8,
13
- "grad_norm": 5.878930568695068,
14
  "learning_rate": 5e-05,
15
- "loss": 1.5198,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.8,
20
- "eval_accuracy": 0.24242424242424243,
21
- "eval_loss": 1.2785723209381104,
22
- "eval_runtime": 0.1966,
23
- "eval_samples_per_second": 167.856,
24
- "eval_steps_per_second": 10.173,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 1.6,
29
- "grad_norm": 7.667544841766357,
30
  "learning_rate": 4.4444444444444447e-05,
31
- "loss": 1.4251,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 1.6,
36
- "eval_accuracy": 0.5151515151515151,
37
- "eval_loss": 1.027370572090149,
38
- "eval_runtime": 0.2035,
39
- "eval_samples_per_second": 162.152,
40
- "eval_steps_per_second": 9.827,
41
  "step": 2
42
  },
43
  {
44
  "epoch": 2.4,
45
- "grad_norm": 7.181507587432861,
46
  "learning_rate": 3.888888888888889e-05,
47
- "loss": 1.2808,
48
  "step": 3
49
  },
50
  {
51
  "epoch": 2.4,
52
- "eval_accuracy": 0.5454545454545454,
53
- "eval_loss": 0.9328542351722717,
54
- "eval_runtime": 0.1968,
55
- "eval_samples_per_second": 167.67,
56
- "eval_steps_per_second": 10.162,
57
  "step": 3
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 6.0301384925842285,
62
  "learning_rate": 2.777777777777778e-05,
63
- "loss": 0.5728,
64
  "step": 5
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.696969696969697,
69
- "eval_loss": 0.7996734976768494,
70
- "eval_runtime": 0.2395,
71
- "eval_samples_per_second": 137.809,
72
- "eval_steps_per_second": 8.352,
73
  "step": 5
74
  },
75
  {
76
  "epoch": 4.8,
77
- "grad_norm": 5.295766353607178,
78
  "learning_rate": 2.2222222222222223e-05,
79
- "loss": 1.0285,
80
  "step": 6
81
  },
82
  {
83
  "epoch": 4.8,
84
- "eval_accuracy": 0.8181818181818182,
85
- "eval_loss": 0.7359862327575684,
86
- "eval_runtime": 0.1935,
87
- "eval_samples_per_second": 170.512,
88
- "eval_steps_per_second": 10.334,
89
  "step": 6
90
  },
91
  {
92
  "epoch": 5.6,
93
- "grad_norm": 5.034783363342285,
94
  "learning_rate": 1.6666666666666667e-05,
95
- "loss": 0.9435,
96
  "step": 7
97
  },
98
  {
99
  "epoch": 5.6,
100
- "eval_accuracy": 0.8181818181818182,
101
- "eval_loss": 0.6815890669822693,
102
- "eval_runtime": 0.2079,
103
- "eval_samples_per_second": 158.746,
104
- "eval_steps_per_second": 9.621,
105
  "step": 7
106
  },
107
  {
108
  "epoch": 6.4,
109
- "grad_norm": 4.618497848510742,
110
  "learning_rate": 1.1111111111111112e-05,
111
- "loss": 0.8821,
112
  "step": 8
113
  },
114
  {
115
  "epoch": 6.4,
116
- "eval_accuracy": 0.8181818181818182,
117
- "eval_loss": 0.6379525065422058,
118
- "eval_runtime": 0.1974,
119
- "eval_samples_per_second": 167.211,
120
- "eval_steps_per_second": 10.134,
121
  "step": 8
122
  },
123
  {
124
  "epoch": 8.0,
125
- "grad_norm": 4.643581390380859,
126
  "learning_rate": 0.0,
127
- "loss": 0.4361,
128
  "step": 10
129
  },
130
  {
131
  "epoch": 8.0,
132
- "eval_accuracy": 0.8484848484848485,
133
- "eval_loss": 0.5965157151222229,
134
- "eval_runtime": 0.1934,
135
- "eval_samples_per_second": 170.622,
136
- "eval_steps_per_second": 10.341,
137
  "step": 10
138
  },
139
  {
140
  "epoch": 8.0,
141
  "step": 10,
142
  "total_flos": 3.121999450369229e+16,
143
- "train_loss": 0.9097571969032288,
144
- "train_runtime": 20.8735,
145
- "train_samples_per_second": 75.215,
146
- "train_steps_per_second": 0.479
147
  }
148
  ],
149
  "logging_steps": 1,
 
1
  {
2
+ "best_metric": 0.9696969696969697,
3
+ "best_model_checkpoint": "/kaggle/working/swin-brain-plane-classification/checkpoint-6",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
6
  "global_step": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.8,
13
+ "grad_norm": 4.3462605476379395,
14
  "learning_rate": 5e-05,
15
+ "loss": 0.7819,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.8,
20
+ "eval_accuracy": 0.8484848484848485,
21
+ "eval_loss": 0.5965157151222229,
22
+ "eval_runtime": 0.2012,
23
+ "eval_samples_per_second": 164.047,
24
+ "eval_steps_per_second": 9.942,
25
  "step": 1
26
  },
27
  {
28
  "epoch": 1.6,
29
+ "grad_norm": 4.70327091217041,
30
  "learning_rate": 4.4444444444444447e-05,
31
+ "loss": 0.689,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 1.6,
36
+ "eval_accuracy": 0.8484848484848485,
37
+ "eval_loss": 0.5047040581703186,
38
+ "eval_runtime": 0.1988,
39
+ "eval_samples_per_second": 165.985,
40
+ "eval_steps_per_second": 10.06,
41
  "step": 2
42
  },
43
  {
44
  "epoch": 2.4,
45
+ "grad_norm": 5.389795780181885,
46
  "learning_rate": 3.888888888888889e-05,
47
+ "loss": 0.58,
48
  "step": 3
49
  },
50
  {
51
  "epoch": 2.4,
52
+ "eval_accuracy": 0.9090909090909091,
53
+ "eval_loss": 0.3954509198665619,
54
+ "eval_runtime": 0.196,
55
+ "eval_samples_per_second": 168.338,
56
+ "eval_steps_per_second": 10.202,
57
  "step": 3
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 4.907708644866943,
62
  "learning_rate": 2.777777777777778e-05,
63
+ "loss": 0.2252,
64
  "step": 5
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.9393939393939394,
69
+ "eval_loss": 0.2680495083332062,
70
+ "eval_runtime": 0.2061,
71
+ "eval_samples_per_second": 160.109,
72
+ "eval_steps_per_second": 9.704,
73
  "step": 5
74
  },
75
  {
76
  "epoch": 4.8,
77
+ "grad_norm": 6.3704328536987305,
78
  "learning_rate": 2.2222222222222223e-05,
79
+ "loss": 0.3826,
80
  "step": 6
81
  },
82
  {
83
  "epoch": 4.8,
84
+ "eval_accuracy": 0.9696969696969697,
85
+ "eval_loss": 0.2426709532737732,
86
+ "eval_runtime": 0.2047,
87
+ "eval_samples_per_second": 161.236,
88
+ "eval_steps_per_second": 9.772,
89
  "step": 6
90
  },
91
  {
92
  "epoch": 5.6,
93
+ "grad_norm": 5.068804740905762,
94
  "learning_rate": 1.6666666666666667e-05,
95
+ "loss": 0.3551,
96
  "step": 7
97
  },
98
  {
99
  "epoch": 5.6,
100
+ "eval_accuracy": 0.9090909090909091,
101
+ "eval_loss": 0.22610554099082947,
102
+ "eval_runtime": 0.2073,
103
+ "eval_samples_per_second": 159.187,
104
+ "eval_steps_per_second": 9.648,
105
  "step": 7
106
  },
107
  {
108
  "epoch": 6.4,
109
+ "grad_norm": 3.142571449279785,
110
  "learning_rate": 1.1111111111111112e-05,
111
+ "loss": 0.3171,
112
  "step": 8
113
  },
114
  {
115
  "epoch": 6.4,
116
+ "eval_accuracy": 0.9393939393939394,
117
+ "eval_loss": 0.21792414784431458,
118
+ "eval_runtime": 0.1962,
119
+ "eval_samples_per_second": 168.154,
120
+ "eval_steps_per_second": 10.191,
121
  "step": 8
122
  },
123
  {
124
  "epoch": 8.0,
125
+ "grad_norm": 4.538871765136719,
126
  "learning_rate": 0.0,
127
+ "loss": 0.1802,
128
  "step": 10
129
  },
130
  {
131
  "epoch": 8.0,
132
+ "eval_accuracy": 0.9393939393939394,
133
+ "eval_loss": 0.21566466987133026,
134
+ "eval_runtime": 0.219,
135
+ "eval_samples_per_second": 150.655,
136
+ "eval_steps_per_second": 9.131,
137
  "step": 10
138
  },
139
  {
140
  "epoch": 8.0,
141
  "step": 10,
142
  "total_flos": 3.121999450369229e+16,
143
+ "train_loss": 0.3916594386100769,
144
+ "train_runtime": 22.6465,
145
+ "train_samples_per_second": 69.326,
146
+ "train_steps_per_second": 0.442
147
  }
148
  ],
149
  "logging_steps": 1,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:160d2ff7f7aba7fbd1a945d5919ab9a2b355aa60da4419b36050aa28fc4acb0f
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18d7d09a2564e2fe2dc5fecbaf35fd9a2f15b628751efe8e90ee4aa98a26317d
3
  size 5240