Rolv-Arild commited on
Commit
03ae956
·
verified ·
1 Parent(s): cf75b18

Training in progress, epoch 1

Browse files
README.md ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ base_model: nvidia/mit-b0
5
+ tags:
6
+ - image-segmentation
7
+ - vision
8
+ - generated_from_trainer
9
+ datasets:
10
+ - generator
11
+ model-index:
12
+ - name: autocrop-test
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # autocrop-test
20
+
21
+ This model is a fine-tuned version of [nvidia/mit-b0](https://huggingface.co/nvidia/mit-b0) on the /mnt/disk1/autocrop-data/datasets/tekst/ dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 0.0196
24
+ - Mean Iou: 0.4964
25
+ - Mean Accuracy: 0.9928
26
+ - Overall Accuracy: 0.9928
27
+ - Accuracy Background: nan
28
+ - Accuracy Crop: 0.9928
29
+ - Iou Background: 0.0
30
+ - Iou Crop: 0.9928
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 6e-05
50
+ - train_batch_size: 8
51
+ - eval_batch_size: 8
52
+ - seed: 42
53
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
54
+ - lr_scheduler_type: cosine
55
+ - lr_scheduler_warmup_steps: 0.1
56
+ - num_epochs: 50.0
57
+ - mixed_precision_training: Native AMP
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Mean Iou | Mean Accuracy | Overall Accuracy | Accuracy Background | Accuracy Crop | Iou Background | Iou Crop |
62
+ |:-------------:|:-----:|:-----:|:---------------:|:--------:|:-------------:|:----------------:|:-------------------:|:-------------:|:--------------:|:--------:|
63
+ | 0.4334 | 1.0 | 625 | 0.1014 | 0.4884 | 0.9768 | 0.9768 | nan | 0.9768 | 0.0 | 0.9768 |
64
+ | 0.1136 | 2.0 | 1250 | 0.0503 | 0.4942 | 0.9883 | 0.9883 | nan | 0.9883 | 0.0 | 0.9883 |
65
+ | 0.0626 | 3.0 | 1875 | 0.0365 | 0.4952 | 0.9903 | 0.9903 | nan | 0.9903 | 0.0 | 0.9903 |
66
+ | 0.0376 | 4.0 | 2500 | 0.0304 | 0.4957 | 0.9913 | 0.9913 | nan | 0.9913 | 0.0 | 0.9913 |
67
+ | 0.0331 | 5.0 | 3125 | 0.0277 | 0.4945 | 0.9890 | 0.9890 | nan | 0.9890 | 0.0 | 0.9890 |
68
+ | 0.0312 | 6.0 | 3750 | 0.0258 | 0.4961 | 0.9922 | 0.9922 | nan | 0.9922 | 0.0 | 0.9922 |
69
+ | 0.0290 | 7.0 | 4375 | 0.0252 | 0.4970 | 0.9941 | 0.9941 | nan | 0.9941 | 0.0 | 0.9941 |
70
+ | 0.0272 | 8.0 | 5000 | 0.0247 | 0.4950 | 0.9900 | 0.9900 | nan | 0.9900 | 0.0 | 0.9900 |
71
+ | 0.0264 | 9.0 | 5625 | 0.0232 | 0.4963 | 0.9925 | 0.9925 | nan | 0.9925 | 0.0 | 0.9925 |
72
+ | 0.0260 | 10.0 | 6250 | 0.0230 | 0.4964 | 0.9927 | 0.9927 | nan | 0.9927 | 0.0 | 0.9927 |
73
+ | 0.0257 | 11.0 | 6875 | 0.0223 | 0.4969 | 0.9937 | 0.9937 | nan | 0.9937 | 0.0 | 0.9937 |
74
+ | 0.0244 | 12.0 | 7500 | 0.0217 | 0.4966 | 0.9932 | 0.9932 | nan | 0.9932 | 0.0 | 0.9932 |
75
+ | 0.0240 | 13.0 | 8125 | 0.0223 | 0.4960 | 0.9920 | 0.9920 | nan | 0.9920 | 0.0 | 0.9920 |
76
+ | 0.0230 | 14.0 | 8750 | 0.0220 | 0.4972 | 0.9943 | 0.9943 | nan | 0.9943 | 0.0 | 0.9943 |
77
+ | 0.0230 | 15.0 | 9375 | 0.0213 | 0.4963 | 0.9926 | 0.9926 | nan | 0.9926 | 0.0 | 0.9926 |
78
+ | 0.0228 | 16.0 | 10000 | 0.0208 | 0.4964 | 0.9928 | 0.9928 | nan | 0.9928 | 0.0 | 0.9928 |
79
+ | 0.0220 | 17.0 | 10625 | 0.0206 | 0.4965 | 0.9930 | 0.9930 | nan | 0.9930 | 0.0 | 0.9930 |
80
+ | 0.0217 | 18.0 | 11250 | 0.0205 | 0.4960 | 0.9921 | 0.9921 | nan | 0.9921 | 0.0 | 0.9921 |
81
+ | 0.0212 | 19.0 | 11875 | 0.0207 | 0.4958 | 0.9915 | 0.9915 | nan | 0.9915 | 0.0 | 0.9915 |
82
+ | 0.0209 | 20.0 | 12500 | 0.0204 | 0.4973 | 0.9946 | 0.9946 | nan | 0.9946 | 0.0 | 0.9946 |
83
+ | 0.0203 | 21.0 | 13125 | 0.0198 | 0.4969 | 0.9937 | 0.9937 | nan | 0.9937 | 0.0 | 0.9937 |
84
+ | 0.0202 | 22.0 | 13750 | 0.0196 | 0.4964 | 0.9928 | 0.9928 | nan | 0.9928 | 0.0 | 0.9928 |
85
+ | 0.0202 | 23.0 | 14375 | 0.0203 | 0.4971 | 0.9942 | 0.9942 | nan | 0.9942 | 0.0 | 0.9942 |
86
+ | 0.0198 | 24.0 | 15000 | 0.0196 | 0.4966 | 0.9932 | 0.9932 | nan | 0.9932 | 0.0 | 0.9932 |
87
+ | 0.0194 | 25.0 | 15625 | 0.0197 | 0.4966 | 0.9932 | 0.9932 | nan | 0.9932 | 0.0 | 0.9932 |
88
+
89
+
90
+ ### Framework versions
91
+
92
+ - Transformers 5.8.0
93
+ - Pytorch 2.11.0+cu130
94
+ - Datasets 4.8.5
95
+ - Tokenizers 0.22.2
all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy_background": NaN,
4
+ "eval_accuracy_crop": 0.9927992276007593,
5
+ "eval_iou_background": 0.0,
6
+ "eval_iou_crop": 0.9927992276007593,
7
+ "eval_loss": 0.01961207203567028,
8
+ "eval_mean_accuracy": 0.9927992276007593,
9
+ "eval_mean_iou": 0.49639961380037967,
10
+ "eval_overall_accuracy": 0.9927992276007593,
11
+ "eval_runtime": 14.9481,
12
+ "eval_samples_per_second": 59.004,
13
+ "eval_steps_per_second": 7.426,
14
+ "total_flos": 2.1901180424159232e+18,
15
+ "train_loss": 0.0420909201965332,
16
+ "train_runtime": 1897.2835,
17
+ "train_samples_per_second": 131.715,
18
+ "train_steps_per_second": 16.471
19
+ }
config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SegformerForSemanticSegmentation"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "classifier_dropout_prob": 0.1,
7
+ "decoder_hidden_size": 256,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 2,
12
+ 2
13
+ ],
14
+ "downsampling_rates": [
15
+ 1,
16
+ 4,
17
+ 8,
18
+ 16
19
+ ],
20
+ "drop_path_rate": 0.1,
21
+ "dtype": "float32",
22
+ "hidden_act": "gelu",
23
+ "hidden_dropout_prob": 0.0,
24
+ "hidden_sizes": [
25
+ 32,
26
+ 64,
27
+ 160,
28
+ 256
29
+ ],
30
+ "id2label": {
31
+ "0": "background",
32
+ "1": "crop"
33
+ },
34
+ "image_size": 224,
35
+ "initializer_range": 0.02,
36
+ "label2id": {
37
+ "background": "0",
38
+ "crop": "1"
39
+ },
40
+ "layer_norm_eps": 1e-06,
41
+ "mlp_ratios": [
42
+ 4,
43
+ 4,
44
+ 4,
45
+ 4
46
+ ],
47
+ "model_type": "segformer",
48
+ "num_attention_heads": [
49
+ 1,
50
+ 2,
51
+ 5,
52
+ 8
53
+ ],
54
+ "num_channels": 3,
55
+ "num_encoder_blocks": 4,
56
+ "patch_sizes": [
57
+ 7,
58
+ 3,
59
+ 3,
60
+ 3
61
+ ],
62
+ "reshape_last_stage": true,
63
+ "semantic_loss_ignore_index": 255,
64
+ "sr_ratios": [
65
+ 8,
66
+ 4,
67
+ 2,
68
+ 1
69
+ ],
70
+ "strides": [
71
+ 4,
72
+ 2,
73
+ 2,
74
+ 2
75
+ ],
76
+ "transformers_version": "5.8.0",
77
+ "use_cache": false
78
+ }
eval_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy_background": NaN,
4
+ "eval_accuracy_crop": 0.9927992276007593,
5
+ "eval_iou_background": 0.0,
6
+ "eval_iou_crop": 0.9927992276007593,
7
+ "eval_loss": 0.01961207203567028,
8
+ "eval_mean_accuracy": 0.9927992276007593,
9
+ "eval_mean_iou": 0.49639961380037967,
10
+ "eval_overall_accuracy": 0.9927992276007593,
11
+ "eval_runtime": 14.9481,
12
+ "eval_samples_per_second": 59.004,
13
+ "eval_steps_per_second": 7.426
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473d2385ca7d9427c1d16561c1e568474a8f44be033485d987c7599eb9cc6190
3
+ size 14884776
preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_reduce_labels": false,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_processor_type": "SegformerImageProcessor",
12
+ "image_std": [
13
+ 0.229,
14
+ 0.224,
15
+ 0.225
16
+ ],
17
+ "reduce_labels": false,
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 512,
22
+ "width": 512
23
+ }
24
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "total_flos": 2.1901180424159232e+18,
4
+ "train_loss": 0.0420909201965332,
5
+ "train_runtime": 1897.2835,
6
+ "train_samples_per_second": 131.715,
7
+ "train_steps_per_second": 16.471
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 13750,
3
+ "best_metric": 0.01961207203567028,
4
+ "best_model_checkpoint": "trainer_output/checkpoint-13750",
5
+ "epoch": 25.0,
6
+ "eval_steps": 500,
7
+ "global_step": 15625,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.8,
14
+ "grad_norm": 1.1524319648742676,
15
+ "learning_rate": 9.5808e-06,
16
+ "loss": 0.43339532470703124,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy_background": NaN,
22
+ "eval_accuracy_crop": 0.9768196565895517,
23
+ "eval_iou_background": 0.0,
24
+ "eval_iou_crop": 0.9768196565895517,
25
+ "eval_loss": 0.10140044242143631,
26
+ "eval_mean_accuracy": 0.9768196565895517,
27
+ "eval_mean_iou": 0.48840982829477586,
28
+ "eval_overall_accuracy": 0.9768196565895517,
29
+ "eval_runtime": 16.2848,
30
+ "eval_samples_per_second": 54.161,
31
+ "eval_steps_per_second": 6.816,
32
+ "step": 625
33
+ },
34
+ {
35
+ "epoch": 1.6,
36
+ "grad_norm": 0.8539880514144897,
37
+ "learning_rate": 1.91808e-05,
38
+ "loss": 0.11356404113769532,
39
+ "step": 1000
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_accuracy_background": NaN,
44
+ "eval_accuracy_crop": 0.988345679319867,
45
+ "eval_iou_background": 0.0,
46
+ "eval_iou_crop": 0.988345679319867,
47
+ "eval_loss": 0.05029028654098511,
48
+ "eval_mean_accuracy": 0.988345679319867,
49
+ "eval_mean_iou": 0.4941728396599335,
50
+ "eval_overall_accuracy": 0.988345679319867,
51
+ "eval_runtime": 14.8084,
52
+ "eval_samples_per_second": 59.561,
53
+ "eval_steps_per_second": 7.496,
54
+ "step": 1250
55
+ },
56
+ {
57
+ "epoch": 2.4,
58
+ "grad_norm": 0.45036041736602783,
59
+ "learning_rate": 2.87808e-05,
60
+ "loss": 0.06263476181030274,
61
+ "step": 1500
62
+ },
63
+ {
64
+ "epoch": 3.0,
65
+ "eval_accuracy_background": NaN,
66
+ "eval_accuracy_crop": 0.9903214677470595,
67
+ "eval_iou_background": 0.0,
68
+ "eval_iou_crop": 0.9903214677470595,
69
+ "eval_loss": 0.03654526174068451,
70
+ "eval_mean_accuracy": 0.9903214677470595,
71
+ "eval_mean_iou": 0.49516073387352977,
72
+ "eval_overall_accuracy": 0.9903214677470595,
73
+ "eval_runtime": 15.7287,
74
+ "eval_samples_per_second": 56.076,
75
+ "eval_steps_per_second": 7.057,
76
+ "step": 1875
77
+ },
78
+ {
79
+ "epoch": 3.2,
80
+ "grad_norm": 0.3090341091156006,
81
+ "learning_rate": 3.8380800000000004e-05,
82
+ "loss": 0.04462788009643555,
83
+ "step": 2000
84
+ },
85
+ {
86
+ "epoch": 4.0,
87
+ "grad_norm": 0.24643999338150024,
88
+ "learning_rate": 4.79808e-05,
89
+ "loss": 0.03756232452392578,
90
+ "step": 2500
91
+ },
92
+ {
93
+ "epoch": 4.0,
94
+ "eval_accuracy_background": NaN,
95
+ "eval_accuracy_crop": 0.9913295270743318,
96
+ "eval_iou_background": 0.0,
97
+ "eval_iou_crop": 0.9913295270743318,
98
+ "eval_loss": 0.030365047976374626,
99
+ "eval_mean_accuracy": 0.9913295270743318,
100
+ "eval_mean_iou": 0.4956647635371659,
101
+ "eval_overall_accuracy": 0.9913295270743318,
102
+ "eval_runtime": 13.4769,
103
+ "eval_samples_per_second": 65.445,
104
+ "eval_steps_per_second": 8.236,
105
+ "step": 2500
106
+ },
107
+ {
108
+ "epoch": 4.8,
109
+ "grad_norm": 0.5337187647819519,
110
+ "learning_rate": 5.75808e-05,
111
+ "loss": 0.03314236831665039,
112
+ "step": 3000
113
+ },
114
+ {
115
+ "epoch": 5.0,
116
+ "eval_accuracy_background": NaN,
117
+ "eval_accuracy_crop": 0.9890027583743638,
118
+ "eval_iou_background": 0.0,
119
+ "eval_iou_crop": 0.9890027583743638,
120
+ "eval_loss": 0.027659796178340912,
121
+ "eval_mean_accuracy": 0.9890027583743638,
122
+ "eval_mean_iou": 0.4945013791871819,
123
+ "eval_overall_accuracy": 0.9890027583743638,
124
+ "eval_runtime": 14.1791,
125
+ "eval_samples_per_second": 62.204,
126
+ "eval_steps_per_second": 7.828,
127
+ "step": 3125
128
+ },
129
+ {
130
+ "epoch": 5.6,
131
+ "grad_norm": 0.49186381697654724,
132
+ "learning_rate": 5.9973825042635214e-05,
133
+ "loss": 0.031177324295043947,
134
+ "step": 3500
135
+ },
136
+ {
137
+ "epoch": 6.0,
138
+ "eval_accuracy_background": NaN,
139
+ "eval_accuracy_crop": 0.9922120865322441,
140
+ "eval_iou_background": 0.0,
141
+ "eval_iou_crop": 0.9922120865322441,
142
+ "eval_loss": 0.02575760707259178,
143
+ "eval_mean_accuracy": 0.9922120865322441,
144
+ "eval_mean_iou": 0.49610604326612207,
145
+ "eval_overall_accuracy": 0.9922120865322441,
146
+ "eval_runtime": 15.0604,
147
+ "eval_samples_per_second": 58.564,
148
+ "eval_steps_per_second": 7.37,
149
+ "step": 3750
150
+ },
151
+ {
152
+ "epoch": 6.4,
153
+ "grad_norm": 0.26261016726493835,
154
+ "learning_rate": 5.985714881662627e-05,
155
+ "loss": 0.028994924545288087,
156
+ "step": 4000
157
+ },
158
+ {
159
+ "epoch": 7.0,
160
+ "eval_accuracy_background": NaN,
161
+ "eval_accuracy_crop": 0.994066035736691,
162
+ "eval_iou_background": 0.0,
163
+ "eval_iou_crop": 0.994066035736691,
164
+ "eval_loss": 0.025171734392642975,
165
+ "eval_mean_accuracy": 0.994066035736691,
166
+ "eval_mean_iou": 0.4970330178683455,
167
+ "eval_overall_accuracy": 0.994066035736691,
168
+ "eval_runtime": 13.3769,
169
+ "eval_samples_per_second": 65.935,
170
+ "eval_steps_per_second": 8.298,
171
+ "step": 4375
172
+ },
173
+ {
174
+ "epoch": 7.2,
175
+ "grad_norm": 0.3545992374420166,
176
+ "learning_rate": 5.9647363918899715e-05,
177
+ "loss": 0.028564287185668946,
178
+ "step": 4500
179
+ },
180
+ {
181
+ "epoch": 8.0,
182
+ "grad_norm": 0.3758007884025574,
183
+ "learning_rate": 5.9345124557708774e-05,
184
+ "loss": 0.027177539825439453,
185
+ "step": 5000
186
+ },
187
+ {
188
+ "epoch": 8.0,
189
+ "eval_accuracy_background": NaN,
190
+ "eval_accuracy_crop": 0.9899967291210848,
191
+ "eval_iou_background": 0.0,
192
+ "eval_iou_crop": 0.9899967291210848,
193
+ "eval_loss": 0.024666031822562218,
194
+ "eval_mean_accuracy": 0.9899967291210848,
195
+ "eval_mean_iou": 0.4949983645605424,
196
+ "eval_overall_accuracy": 0.9899967291210848,
197
+ "eval_runtime": 12.936,
198
+ "eval_samples_per_second": 68.182,
199
+ "eval_steps_per_second": 8.581,
200
+ "step": 5000
201
+ },
202
+ {
203
+ "epoch": 8.8,
204
+ "grad_norm": 0.22928953170776367,
205
+ "learning_rate": 5.895137325792861e-05,
206
+ "loss": 0.026362817764282228,
207
+ "step": 5500
208
+ },
209
+ {
210
+ "epoch": 9.0,
211
+ "eval_accuracy_background": NaN,
212
+ "eval_accuracy_crop": 0.9925482629257046,
213
+ "eval_iou_background": 0.0,
214
+ "eval_iou_crop": 0.9925482629257046,
215
+ "eval_loss": 0.023219820111989975,
216
+ "eval_mean_accuracy": 0.9925482629257046,
217
+ "eval_mean_iou": 0.4962741314628523,
218
+ "eval_overall_accuracy": 0.9925482629257046,
219
+ "eval_runtime": 12.8673,
220
+ "eval_samples_per_second": 68.546,
221
+ "eval_steps_per_second": 8.627,
222
+ "step": 5625
223
+ },
224
+ {
225
+ "epoch": 9.6,
226
+ "grad_norm": 0.3915350139141083,
227
+ "learning_rate": 5.846733792181926e-05,
228
+ "loss": 0.02600261688232422,
229
+ "step": 6000
230
+ },
231
+ {
232
+ "epoch": 10.0,
233
+ "eval_accuracy_background": NaN,
234
+ "eval_accuracy_crop": 0.9927480206245599,
235
+ "eval_iou_background": 0.0,
236
+ "eval_iou_crop": 0.9927480206245599,
237
+ "eval_loss": 0.02302992343902588,
238
+ "eval_mean_accuracy": 0.9927480206245599,
239
+ "eval_mean_iou": 0.49637401031227996,
240
+ "eval_overall_accuracy": 0.9927480206245599,
241
+ "eval_runtime": 14.5996,
242
+ "eval_samples_per_second": 60.412,
243
+ "eval_steps_per_second": 7.603,
244
+ "step": 6250
245
+ },
246
+ {
247
+ "epoch": 10.4,
248
+ "grad_norm": 0.26642245054244995,
249
+ "learning_rate": 5.78945279998472e-05,
250
+ "loss": 0.025672937393188478,
251
+ "step": 6500
252
+ },
253
+ {
254
+ "epoch": 11.0,
255
+ "eval_accuracy_background": NaN,
256
+ "eval_accuracy_crop": 0.9937102307036272,
257
+ "eval_iou_background": 0.0,
258
+ "eval_iou_crop": 0.9937102307036272,
259
+ "eval_loss": 0.022344686090946198,
260
+ "eval_mean_accuracy": 0.9937102307036272,
261
+ "eval_mean_iou": 0.4968551153518136,
262
+ "eval_overall_accuracy": 0.9937102307036272,
263
+ "eval_runtime": 14.6595,
264
+ "eval_samples_per_second": 60.166,
265
+ "eval_steps_per_second": 7.572,
266
+ "step": 6875
267
+ },
268
+ {
269
+ "epoch": 11.2,
270
+ "grad_norm": 0.5744491815567017,
271
+ "learning_rate": 5.723472978350694e-05,
272
+ "loss": 0.024709026336669923,
273
+ "step": 7000
274
+ },
275
+ {
276
+ "epoch": 12.0,
277
+ "grad_norm": 0.16804039478302002,
278
+ "learning_rate": 5.649000083482167e-05,
279
+ "loss": 0.024350887298583986,
280
+ "step": 7500
281
+ },
282
+ {
283
+ "epoch": 12.0,
284
+ "eval_accuracy_background": NaN,
285
+ "eval_accuracy_crop": 0.9931893459362804,
286
+ "eval_iou_background": 0.0,
287
+ "eval_iou_crop": 0.9931893459362804,
288
+ "eval_loss": 0.021655429154634476,
289
+ "eval_mean_accuracy": 0.9931893459362804,
290
+ "eval_mean_iou": 0.4965946729681402,
291
+ "eval_overall_accuracy": 0.9931893459362804,
292
+ "eval_runtime": 14.9256,
293
+ "eval_samples_per_second": 59.093,
294
+ "eval_steps_per_second": 7.437,
295
+ "step": 7500
296
+ },
297
+ {
298
+ "epoch": 12.8,
299
+ "grad_norm": 0.2182096242904663,
300
+ "learning_rate": 5.566266356989437e-05,
301
+ "loss": 0.02396395683288574,
302
+ "step": 8000
303
+ },
304
+ {
305
+ "epoch": 13.0,
306
+ "eval_accuracy_background": NaN,
307
+ "eval_accuracy_crop": 0.9920321818804473,
308
+ "eval_iou_background": 0.0,
309
+ "eval_iou_crop": 0.9920321818804473,
310
+ "eval_loss": 0.022262275218963623,
311
+ "eval_mean_accuracy": 0.9920321818804473,
312
+ "eval_mean_iou": 0.49601609094022364,
313
+ "eval_overall_accuracy": 0.9920321818804473,
314
+ "eval_runtime": 14.7545,
315
+ "eval_samples_per_second": 59.778,
316
+ "eval_steps_per_second": 7.523,
317
+ "step": 8125
318
+ },
319
+ {
320
+ "epoch": 13.6,
321
+ "grad_norm": 0.2837521731853485,
322
+ "learning_rate": 5.4755298016519184e-05,
323
+ "loss": 0.023011560440063476,
324
+ "step": 8500
325
+ },
326
+ {
327
+ "epoch": 14.0,
328
+ "eval_accuracy_background": NaN,
329
+ "eval_accuracy_crop": 0.9943180523216922,
330
+ "eval_iou_background": 0.0,
331
+ "eval_iou_crop": 0.9943180523216922,
332
+ "eval_loss": 0.022013485431671143,
333
+ "eval_mean_accuracy": 0.9943180523216922,
334
+ "eval_mean_iou": 0.4971590261608461,
335
+ "eval_overall_accuracy": 0.9943180523216922,
336
+ "eval_runtime": 14.3463,
337
+ "eval_samples_per_second": 61.479,
338
+ "eval_steps_per_second": 7.737,
339
+ "step": 8750
340
+ },
341
+ {
342
+ "epoch": 14.4,
343
+ "grad_norm": 0.2068459540605545,
344
+ "learning_rate": 5.3770733768437804e-05,
345
+ "loss": 0.022984485626220703,
346
+ "step": 9000
347
+ },
348
+ {
349
+ "epoch": 15.0,
350
+ "eval_accuracy_background": NaN,
351
+ "eval_accuracy_crop": 0.9925804513700699,
352
+ "eval_iou_background": 0.0,
353
+ "eval_iou_crop": 0.9925804513700699,
354
+ "eval_loss": 0.021278690546751022,
355
+ "eval_mean_accuracy": 0.9925804513700699,
356
+ "eval_mean_iou": 0.49629022568503495,
357
+ "eval_overall_accuracy": 0.9925804513700699,
358
+ "eval_runtime": 15.0289,
359
+ "eval_samples_per_second": 58.687,
360
+ "eval_steps_per_second": 7.386,
361
+ "step": 9375
362
+ },
363
+ {
364
+ "epoch": 15.2,
365
+ "grad_norm": 0.28135085105895996,
366
+ "learning_rate": 5.271204116133174e-05,
367
+ "loss": 0.022900413513183594,
368
+ "step": 9500
369
+ },
370
+ {
371
+ "epoch": 16.0,
372
+ "grad_norm": 0.3188186287879944,
373
+ "learning_rate": 5.158252169806754e-05,
374
+ "loss": 0.02281988716125488,
375
+ "step": 10000
376
+ },
377
+ {
378
+ "epoch": 16.0,
379
+ "eval_accuracy_background": NaN,
380
+ "eval_accuracy_crop": 0.9927684136520576,
381
+ "eval_iou_background": 0.0,
382
+ "eval_iou_crop": 0.9927684136520576,
383
+ "eval_loss": 0.02082865871489048,
384
+ "eval_mean_accuracy": 0.9927684136520576,
385
+ "eval_mean_iou": 0.4963842068260288,
386
+ "eval_overall_accuracy": 0.9927684136520576,
387
+ "eval_runtime": 14.8833,
388
+ "eval_samples_per_second": 59.261,
389
+ "eval_steps_per_second": 7.458,
390
+ "step": 10000
391
+ },
392
+ {
393
+ "epoch": 16.8,
394
+ "grad_norm": 0.2763194143772125,
395
+ "learning_rate": 5.038569775305373e-05,
396
+ "loss": 0.02196409034729004,
397
+ "step": 10500
398
+ },
399
+ {
400
+ "epoch": 17.0,
401
+ "eval_accuracy_background": NaN,
402
+ "eval_accuracy_crop": 0.993036650688435,
403
+ "eval_iou_background": 0.0,
404
+ "eval_iou_crop": 0.993036650688435,
405
+ "eval_loss": 0.020577579736709595,
406
+ "eval_mean_accuracy": 0.993036650688435,
407
+ "eval_mean_iou": 0.4965183253442175,
408
+ "eval_overall_accuracy": 0.993036650688435,
409
+ "eval_runtime": 14.9738,
410
+ "eval_samples_per_second": 58.903,
411
+ "eval_steps_per_second": 7.413,
412
+ "step": 10625
413
+ },
414
+ {
415
+ "epoch": 17.6,
416
+ "grad_norm": 0.20888900756835938,
417
+ "learning_rate": 4.912530158781619e-05,
418
+ "loss": 0.02165799331665039,
419
+ "step": 11000
420
+ },
421
+ {
422
+ "epoch": 18.0,
423
+ "eval_accuracy_background": NaN,
424
+ "eval_accuracy_crop": 0.9920524486787513,
425
+ "eval_iou_background": 0.0,
426
+ "eval_iou_crop": 0.9920524486787513,
427
+ "eval_loss": 0.020513063296675682,
428
+ "eval_mean_accuracy": 0.9920524486787513,
429
+ "eval_mean_iou": 0.49602622433937565,
430
+ "eval_overall_accuracy": 0.9920524486787513,
431
+ "eval_runtime": 14.9673,
432
+ "eval_samples_per_second": 58.929,
433
+ "eval_steps_per_second": 7.416,
434
+ "step": 11250
435
+ },
436
+ {
437
+ "epoch": 18.4,
438
+ "grad_norm": 0.23501233756542206,
439
+ "learning_rate": 4.780526371204638e-05,
440
+ "loss": 0.021172378540039064,
441
+ "step": 11500
442
+ },
443
+ {
444
+ "epoch": 19.0,
445
+ "eval_accuracy_background": NaN,
446
+ "eval_accuracy_crop": 0.9915265427945928,
447
+ "eval_iou_background": 0.0,
448
+ "eval_iou_crop": 0.9915265427945928,
449
+ "eval_loss": 0.020704658702015877,
450
+ "eval_mean_accuracy": 0.9915265427945928,
451
+ "eval_mean_iou": 0.4957632713972964,
452
+ "eval_overall_accuracy": 0.9915265427945928,
453
+ "eval_runtime": 14.982,
454
+ "eval_samples_per_second": 58.871,
455
+ "eval_steps_per_second": 7.409,
456
+ "step": 11875
457
+ },
458
+ {
459
+ "epoch": 19.2,
460
+ "grad_norm": 1.4721801280975342,
461
+ "learning_rate": 4.642970062641836e-05,
462
+ "loss": 0.02130653762817383,
463
+ "step": 12000
464
+ },
465
+ {
466
+ "epoch": 20.0,
467
+ "grad_norm": 0.5824402570724487,
468
+ "learning_rate": 4.5002901985398264e-05,
469
+ "loss": 0.02092706298828125,
470
+ "step": 12500
471
+ },
472
+ {
473
+ "epoch": 20.0,
474
+ "eval_accuracy_background": NaN,
475
+ "eval_accuracy_crop": 0.9945885825217533,
476
+ "eval_iou_background": 0.0,
477
+ "eval_iou_crop": 0.9945885825217533,
478
+ "eval_loss": 0.02044781483709812,
479
+ "eval_mean_accuracy": 0.9945885825217533,
480
+ "eval_mean_iou": 0.49729429126087665,
481
+ "eval_overall_accuracy": 0.9945885825217533,
482
+ "eval_runtime": 14.9609,
483
+ "eval_samples_per_second": 58.954,
484
+ "eval_steps_per_second": 7.419,
485
+ "step": 12500
486
+ },
487
+ {
488
+ "epoch": 20.8,
489
+ "grad_norm": 0.17007386684417725,
490
+ "learning_rate": 4.3529317220078455e-05,
491
+ "loss": 0.020286674499511718,
492
+ "step": 13000
493
+ },
494
+ {
495
+ "epoch": 21.0,
496
+ "eval_accuracy_background": NaN,
497
+ "eval_accuracy_crop": 0.9937273277466255,
498
+ "eval_iou_background": 0.0,
499
+ "eval_iou_crop": 0.9937273277466255,
500
+ "eval_loss": 0.019753679633140564,
501
+ "eval_mean_accuracy": 0.9937273277466255,
502
+ "eval_mean_iou": 0.49686366387331277,
503
+ "eval_overall_accuracy": 0.9937273277466255,
504
+ "eval_runtime": 14.9415,
505
+ "eval_samples_per_second": 59.03,
506
+ "eval_steps_per_second": 7.429,
507
+ "step": 13125
508
+ },
509
+ {
510
+ "epoch": 21.6,
511
+ "grad_norm": 0.4324241876602173,
512
+ "learning_rate": 4.2013541662752865e-05,
513
+ "loss": 0.020156242370605468,
514
+ "step": 13500
515
+ },
516
+ {
517
+ "epoch": 22.0,
518
+ "eval_accuracy_background": NaN,
519
+ "eval_accuracy_crop": 0.9927992276007593,
520
+ "eval_iou_background": 0.0,
521
+ "eval_iou_crop": 0.9927992276007593,
522
+ "eval_loss": 0.01961207203567028,
523
+ "eval_mean_accuracy": 0.9927992276007593,
524
+ "eval_mean_iou": 0.49639961380037967,
525
+ "eval_overall_accuracy": 0.9927992276007593,
526
+ "eval_runtime": 15.1325,
527
+ "eval_samples_per_second": 58.285,
528
+ "eval_steps_per_second": 7.335,
529
+ "step": 13750
530
+ },
531
+ {
532
+ "epoch": 22.4,
533
+ "grad_norm": 0.1799526959657669,
534
+ "learning_rate": 4.0460302216503615e-05,
535
+ "loss": 0.020241693496704103,
536
+ "step": 14000
537
+ },
538
+ {
539
+ "epoch": 23.0,
540
+ "eval_accuracy_background": NaN,
541
+ "eval_accuracy_crop": 0.9942146776248755,
542
+ "eval_iou_background": 0.0,
543
+ "eval_iou_crop": 0.9942146776248755,
544
+ "eval_loss": 0.020346596837043762,
545
+ "eval_mean_accuracy": 0.9942146776248755,
546
+ "eval_mean_iou": 0.49710733881243774,
547
+ "eval_overall_accuracy": 0.9942146776248755,
548
+ "eval_runtime": 15.1085,
549
+ "eval_samples_per_second": 58.378,
550
+ "eval_steps_per_second": 7.347,
551
+ "step": 14375
552
+ },
553
+ {
554
+ "epoch": 23.2,
555
+ "grad_norm": 0.19619110226631165,
556
+ "learning_rate": 3.887444261448782e-05,
557
+ "loss": 0.019864578247070313,
558
+ "step": 14500
559
+ },
560
+ {
561
+ "epoch": 24.0,
562
+ "grad_norm": 0.24014325439929962,
563
+ "learning_rate": 3.726090831489309e-05,
564
+ "loss": 0.01975071907043457,
565
+ "step": 15000
566
+ },
567
+ {
568
+ "epoch": 24.0,
569
+ "eval_accuracy_background": NaN,
570
+ "eval_accuracy_crop": 0.9932115552616192,
571
+ "eval_iou_background": 0.0,
572
+ "eval_iou_crop": 0.9932115552616192,
573
+ "eval_loss": 0.01964355632662773,
574
+ "eval_mean_accuracy": 0.9932115552616192,
575
+ "eval_mean_iou": 0.4966057776308096,
576
+ "eval_overall_accuracy": 0.9932115552616192,
577
+ "eval_runtime": 14.9831,
578
+ "eval_samples_per_second": 58.866,
579
+ "eval_steps_per_second": 7.408,
580
+ "step": 15000
581
+ },
582
+ {
583
+ "epoch": 24.8,
584
+ "grad_norm": 0.17266370356082916,
585
+ "learning_rate": 3.5624731078666494e-05,
586
+ "loss": 0.01938687515258789,
587
+ "step": 15500
588
+ },
589
+ {
590
+ "epoch": 25.0,
591
+ "eval_accuracy_background": NaN,
592
+ "eval_accuracy_crop": 0.9932365837056147,
593
+ "eval_iou_background": 0.0,
594
+ "eval_iou_crop": 0.9932365837056147,
595
+ "eval_loss": 0.019678112119436264,
596
+ "eval_mean_accuracy": 0.9932365837056147,
597
+ "eval_mean_iou": 0.49661829185280737,
598
+ "eval_overall_accuracy": 0.9932365837056147,
599
+ "eval_runtime": 15.0672,
600
+ "eval_samples_per_second": 58.538,
601
+ "eval_steps_per_second": 7.367,
602
+ "step": 15625
603
+ },
604
+ {
605
+ "epoch": 25.0,
606
+ "step": 15625,
607
+ "total_flos": 2.1901180424159232e+18,
608
+ "train_loss": 0.0420909201965332,
609
+ "train_runtime": 1897.2835,
610
+ "train_samples_per_second": 131.715,
611
+ "train_steps_per_second": 16.471
612
+ }
613
+ ],
614
+ "logging_steps": 500,
615
+ "max_steps": 31250,
616
+ "num_input_tokens_seen": 0,
617
+ "num_train_epochs": 50,
618
+ "save_steps": 500,
619
+ "stateful_callbacks": {
620
+ "EarlyStoppingCallback": {
621
+ "args": {
622
+ "early_stopping_patience": 3,
623
+ "early_stopping_threshold": 0.0
624
+ },
625
+ "attributes": {
626
+ "early_stopping_patience_counter": 3
627
+ }
628
+ },
629
+ "TrainerControl": {
630
+ "args": {
631
+ "should_epoch_stop": false,
632
+ "should_evaluate": false,
633
+ "should_log": false,
634
+ "should_save": true,
635
+ "should_training_stop": true
636
+ },
637
+ "attributes": {}
638
+ }
639
+ },
640
+ "total_flos": 2.1901180424159232e+18,
641
+ "train_batch_size": 8,
642
+ "trial_name": null,
643
+ "trial_params": null
644
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f90cac500d7f39f5a5166c24c0652bb66ac9ab0b7692e80bab038d7b46f8fd6
3
+ size 5329