galthran commited on
Commit
04bd873
·
1 Parent(s): bb702ea

Segformer

Browse files
Files changed (7) hide show
  1. config.json +80 -0
  2. optimizer.pt +3 -0
  3. pytorch_model.bin +3 -0
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +259 -0
  7. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nvidia/segformer-b3-finetuned-ade-512-512",
3
+ "architectures": [
4
+ "SegformerForSemanticSegmentation"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "classifier_dropout_prob": 0.1,
8
+ "decoder_hidden_size": 768,
9
+ "depths": [
10
+ 3,
11
+ 4,
12
+ 18,
13
+ 3
14
+ ],
15
+ "downsampling_rates": [
16
+ 1,
17
+ 4,
18
+ 8,
19
+ 16
20
+ ],
21
+ "drop_path_rate": 0.1,
22
+ "hidden_act": "gelu",
23
+ "hidden_dropout_prob": 0.0,
24
+ "hidden_sizes": [
25
+ 64,
26
+ 128,
27
+ 320,
28
+ 512
29
+ ],
30
+ "id2label": {
31
+ "0": "building",
32
+ "1": "door",
33
+ "2": "window"
34
+ },
35
+ "image_size": 224,
36
+ "initializer_range": 0.02,
37
+ "label2id": {
38
+ "building": 0,
39
+ "door": 1,
40
+ "window": 2
41
+ },
42
+ "layer_norm_eps": 1e-06,
43
+ "mlp_ratios": [
44
+ 4,
45
+ 4,
46
+ 4,
47
+ 4
48
+ ],
49
+ "model_type": "segformer",
50
+ "num_attention_heads": [
51
+ 1,
52
+ 2,
53
+ 5,
54
+ 8
55
+ ],
56
+ "num_channels": 3,
57
+ "num_encoder_blocks": 4,
58
+ "patch_sizes": [
59
+ 7,
60
+ 3,
61
+ 3,
62
+ 3
63
+ ],
64
+ "reshape_last_stage": true,
65
+ "semantic_loss_ignore_index": 255,
66
+ "sr_ratios": [
67
+ 8,
68
+ 4,
69
+ 2,
70
+ 1
71
+ ],
72
+ "strides": [
73
+ 4,
74
+ 2,
75
+ 2,
76
+ 2
77
+ ],
78
+ "torch_dtype": "float32",
79
+ "transformers_version": "4.27.2"
80
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:383a91adaeafe63d0faac95af8a998a3c40c630c1ac5f97effee36d22d542acb
3
+ size 378175711
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:856c5309063362ee0729503113c384c0e6f35ce6ebc798980a2e2e980c1d7ad1
3
+ size 189128349
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18e44d71690af658199ec19d1a46c14d400b81c55874b2088e85f13140705043
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:779be1a3bafdb7db1f4511b02816c8bf0a7868c7281d127b3e09c53223034706
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.5169988276670576,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.12,
12
+ "learning_rate": 5e-05,
13
+ "loss": 0.5593,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.23,
18
+ "learning_rate": 5e-05,
19
+ "loss": 0.3226,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.35,
24
+ "learning_rate": 5e-05,
25
+ "loss": 0.2389,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.35,
30
+ "eval_loss": 0.22972644865512848,
31
+ "eval_mean_iou": 0.5079903689346261,
32
+ "eval_runtime": 224.7647,
33
+ "eval_samples_per_second": 0.948,
34
+ "eval_steps_per_second": 0.948,
35
+ "step": 300
36
+ },
37
+ {
38
+ "epoch": 0.47,
39
+ "learning_rate": 5e-05,
40
+ "loss": 0.2187,
41
+ "step": 400
42
+ },
43
+ {
44
+ "epoch": 0.59,
45
+ "learning_rate": 5e-05,
46
+ "loss": 0.1961,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 0.7,
51
+ "learning_rate": 5e-05,
52
+ "loss": 0.1873,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 0.7,
57
+ "eval_loss": 0.18838702142238617,
58
+ "eval_mean_iou": 0.6565456095704835,
59
+ "eval_runtime": 223.134,
60
+ "eval_samples_per_second": 0.955,
61
+ "eval_steps_per_second": 0.955,
62
+ "step": 600
63
+ },
64
+ {
65
+ "epoch": 0.82,
66
+ "learning_rate": 5e-05,
67
+ "loss": 0.172,
68
+ "step": 700
69
+ },
70
+ {
71
+ "epoch": 0.94,
72
+ "learning_rate": 5e-05,
73
+ "loss": 0.2013,
74
+ "step": 800
75
+ },
76
+ {
77
+ "epoch": 1.06,
78
+ "learning_rate": 5e-05,
79
+ "loss": 0.1549,
80
+ "step": 900
81
+ },
82
+ {
83
+ "epoch": 1.06,
84
+ "eval_loss": 0.1733293980360031,
85
+ "eval_mean_iou": 0.6822287052114181,
86
+ "eval_runtime": 226.0674,
87
+ "eval_samples_per_second": 0.942,
88
+ "eval_steps_per_second": 0.942,
89
+ "step": 900
90
+ },
91
+ {
92
+ "epoch": 1.17,
93
+ "learning_rate": 5e-05,
94
+ "loss": 0.1401,
95
+ "step": 1000
96
+ },
97
+ {
98
+ "epoch": 1.29,
99
+ "learning_rate": 5e-05,
100
+ "loss": 0.1365,
101
+ "step": 1100
102
+ },
103
+ {
104
+ "epoch": 1.41,
105
+ "learning_rate": 5e-05,
106
+ "loss": 0.151,
107
+ "step": 1200
108
+ },
109
+ {
110
+ "epoch": 1.41,
111
+ "eval_loss": 0.17005586624145508,
112
+ "eval_mean_iou": 0.6851204106483545,
113
+ "eval_runtime": 217.995,
114
+ "eval_samples_per_second": 0.977,
115
+ "eval_steps_per_second": 0.977,
116
+ "step": 1200
117
+ },
118
+ {
119
+ "epoch": 1.52,
120
+ "learning_rate": 5e-05,
121
+ "loss": 0.1461,
122
+ "step": 1300
123
+ },
124
+ {
125
+ "epoch": 1.64,
126
+ "learning_rate": 5e-05,
127
+ "loss": 0.1385,
128
+ "step": 1400
129
+ },
130
+ {
131
+ "epoch": 1.76,
132
+ "learning_rate": 5e-05,
133
+ "loss": 0.1737,
134
+ "step": 1500
135
+ },
136
+ {
137
+ "epoch": 1.76,
138
+ "eval_loss": 0.16983915865421295,
139
+ "eval_mean_iou": 0.6635217840293423,
140
+ "eval_runtime": 232.534,
141
+ "eval_samples_per_second": 0.916,
142
+ "eval_steps_per_second": 0.916,
143
+ "step": 1500
144
+ },
145
+ {
146
+ "epoch": 1.88,
147
+ "learning_rate": 5e-05,
148
+ "loss": 0.1277,
149
+ "step": 1600
150
+ },
151
+ {
152
+ "epoch": 1.99,
153
+ "learning_rate": 5e-05,
154
+ "loss": 0.1366,
155
+ "step": 1700
156
+ },
157
+ {
158
+ "epoch": 2.0,
159
+ "eval_loss": 0.13025489449501038,
160
+ "eval_mean_iou": 0.7721716611246382,
161
+ "eval_runtime": 227.8304,
162
+ "eval_samples_per_second": 0.935,
163
+ "eval_steps_per_second": 0.935,
164
+ "step": 1706
165
+ },
166
+ {
167
+ "epoch": 2.11,
168
+ "learning_rate": 5e-05,
169
+ "loss": 0.1235,
170
+ "step": 1800
171
+ },
172
+ {
173
+ "epoch": 2.23,
174
+ "learning_rate": 5e-05,
175
+ "loss": 0.1202,
176
+ "step": 1900
177
+ },
178
+ {
179
+ "epoch": 2.34,
180
+ "learning_rate": 5e-05,
181
+ "loss": 0.1199,
182
+ "step": 2000
183
+ },
184
+ {
185
+ "epoch": 2.46,
186
+ "learning_rate": 5e-05,
187
+ "loss": 0.1254,
188
+ "step": 2100
189
+ },
190
+ {
191
+ "epoch": 2.58,
192
+ "learning_rate": 5e-05,
193
+ "loss": 0.1214,
194
+ "step": 2200
195
+ },
196
+ {
197
+ "epoch": 2.7,
198
+ "learning_rate": 5e-05,
199
+ "loss": 0.1228,
200
+ "step": 2300
201
+ },
202
+ {
203
+ "epoch": 2.81,
204
+ "learning_rate": 5e-05,
205
+ "loss": 0.123,
206
+ "step": 2400
207
+ },
208
+ {
209
+ "epoch": 2.93,
210
+ "learning_rate": 5e-05,
211
+ "loss": 0.1113,
212
+ "step": 2500
213
+ },
214
+ {
215
+ "epoch": 3.0,
216
+ "eval_loss": 0.1386958658695221,
217
+ "eval_mean_iou": 0.7781228705976458,
218
+ "eval_runtime": 228.3799,
219
+ "eval_samples_per_second": 0.933,
220
+ "eval_steps_per_second": 0.933,
221
+ "step": 2559
222
+ },
223
+ {
224
+ "epoch": 3.05,
225
+ "learning_rate": 5e-05,
226
+ "loss": 0.1162,
227
+ "step": 2600
228
+ },
229
+ {
230
+ "epoch": 3.17,
231
+ "learning_rate": 5e-05,
232
+ "loss": 0.0971,
233
+ "step": 2700
234
+ },
235
+ {
236
+ "epoch": 3.28,
237
+ "learning_rate": 5e-05,
238
+ "loss": 0.097,
239
+ "step": 2800
240
+ },
241
+ {
242
+ "epoch": 3.4,
243
+ "learning_rate": 5e-05,
244
+ "loss": 0.091,
245
+ "step": 2900
246
+ },
247
+ {
248
+ "epoch": 3.52,
249
+ "learning_rate": 5e-05,
250
+ "loss": 0.0887,
251
+ "step": 3000
252
+ }
253
+ ],
254
+ "max_steps": 170600,
255
+ "num_train_epochs": 200,
256
+ "total_flos": 6.68503279927296e+17,
257
+ "trial_name": null,
258
+ "trial_params": null
259
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e02bb2def3ec1407bed61a388338418c1e79cc077cbb8f28e3ade86d34c1666b
3
+ size 3579