PheniX-Lab commited on
Commit
ef3d793
·
verified ·
1 Parent(s): 96e488d

Delete FoMo4Wheat/configs/distill_default_config_large.yaml

Browse files
FoMo4Wheat/configs/distill_default_config_large.yaml DELETED
@@ -1,132 +0,0 @@
1
- MODEL:
2
- WEIGHTS: ''
3
- compute_precision:
4
- grad_scaler: true
5
- teacher:
6
- backbone:
7
- sharding_strategy: SHARD_GRAD_OP
8
- mixed_precision:
9
- param_dtype: fp16
10
- reduce_dtype: fp16
11
- buffer_dtype: fp32
12
- dino_head:
13
- sharding_strategy: SHARD_GRAD_OP
14
- mixed_precision:
15
- param_dtype: fp16
16
- reduce_dtype: fp16
17
- buffer_dtype: fp32
18
- ibot_head:
19
- sharding_strategy: SHARD_GRAD_OP
20
- mixed_precision:
21
- param_dtype: fp16
22
- reduce_dtype: fp16
23
- buffer_dtype: fp32
24
- student:
25
- backbone:
26
- sharding_strategy: SHARD_GRAD_OP
27
- mixed_precision:
28
- param_dtype: bf16
29
- reduce_dtype: bf16
30
- buffer_dtype: fp32
31
- dino_head:
32
- sharding_strategy: SHARD_GRAD_OP
33
- mixed_precision:
34
- param_dtype: bf16
35
- reduce_dtype: fp32
36
- buffer_dtype: fp32
37
- ibot_head:
38
- sharding_strategy: SHARD_GRAD_OP
39
- mixed_precision:
40
- param_dtype: bf16
41
- reduce_dtype: fp32
42
- buffer_dtype: fp32
43
- dino:
44
- loss_weight: 1.0
45
- head_n_prototypes: 131072
46
- head_bottleneck_dim: 384
47
- head_nlayers: 3
48
- head_hidden_dim: 2048
49
- koleo_loss_weight: -1
50
- ibot:
51
- loss_weight: 1.0
52
- mask_sample_probability: 0.5
53
- mask_ratio_min_max:
54
- - 0.1
55
- - 0.5
56
- separate_head: True
57
- head_n_prototypes: 131072
58
- head_bottleneck_dim: 256
59
- head_nlayers: 3
60
- head_hidden_dim: 2048
61
- train:
62
- batch_size_per_gpu: 16
63
- dataset_path: ImageNet:split=TRAIN
64
- output_dir: .
65
- saveckp_freq: 20
66
- seed: 0
67
- num_workers: 16
68
- OFFICIAL_EPOCH_LENGTH: 1250
69
- cache_dataset: true
70
- centering: sinkhorn_knopp
71
- student:
72
- arch: vit_large
73
- patch_size: 14
74
- drop_path_rate: 0.0
75
- layerscale: 1.0e-05
76
- drop_path_uniform: true
77
- pretrained_weights: ''
78
- ffn_layer: "mlp"
79
- block_chunks: 4
80
- qkv_bias: true
81
- proj_bias: true
82
- ffn_bias: true
83
- num_register_tokens: 4
84
- interpolate_offset: 0.1
85
- interpolate_antialias : false
86
- teacher:
87
- arch: vit_giant2
88
- patch_size: 14
89
- drop_path_rate: 0.4
90
- layerscale: 1.0e-05
91
- drop_path_uniform: true
92
- pretrained_weights: '/hpc/home/2023222003/Phenix/wheat/foundation_model/distill_pretrain/518_vitg/teacher_checkpoint.pth'
93
- ffn_layer: "swiglufused"
94
- block_chunks: 4
95
- qkv_bias: true
96
- proj_bias: true
97
- ffn_bias: true
98
- momentum_teacher: 0.994
99
- final_momentum_teacher: 1
100
- warmup_teacher_temp: 0.04
101
- teacher_temp: 0.07
102
- warmup_teacher_temp_epochs: 30
103
- num_register_tokens: 4
104
- interpolate_offset: 0.1
105
- interpolate_antialias : false
106
- optim:
107
- epochs: 100
108
- weight_decay: 0.04
109
- weight_decay_end: 0.2
110
- base_lr: 1e-04 # learning rate for a batch size of 1024
111
- lr: 0. # will be set after applying scaling rule
112
- warmup_epochs: 10
113
- min_lr: 1.0e-06
114
- clip_grad: 3.0
115
- freeze_last_layer_epochs: 0
116
- scaling_rule: sqrt_wrt_1024
117
- patch_embed_lr_mult: 0.2
118
- layerwise_decay: 1
119
- adamw_beta1: 0.9
120
- adamw_beta2: 0.999
121
- crops:
122
- global_crops_scale:
123
- - 0.32
124
- - 1.0
125
- local_crops_number: 8
126
- local_crops_scale:
127
- - 0.05
128
- - 0.32
129
- global_crops_size: 518
130
- local_crops_size: 98
131
- evaluation:
132
- eval_period_iterations: 2500