PheniX-Lab
/

FoMo4Wheat

Model card Files Files and versions

xet

Community

PheniX-Lab commited on Aug 19, 2025

Commit

ef3d793

verified ·

1 Parent(s): 96e488d

Delete FoMo4Wheat/configs/distill_default_config_large.yaml

Browse files

Files changed (1) hide show

FoMo4Wheat/configs/distill_default_config_large.yaml +0 -132

FoMo4Wheat/configs/distill_default_config_large.yaml DELETED Viewed

@@ -1,132 +0,0 @@
-MODEL:
-  WEIGHTS: ''
-compute_precision:
-  grad_scaler: true
-  teacher:
-    backbone:
-      sharding_strategy: SHARD_GRAD_OP
-      mixed_precision:
-        param_dtype: fp16
-        reduce_dtype: fp16
-        buffer_dtype: fp32
-    dino_head:
-      sharding_strategy: SHARD_GRAD_OP
-      mixed_precision:
-        param_dtype: fp16
-        reduce_dtype: fp16
-        buffer_dtype: fp32
-    ibot_head:
-      sharding_strategy: SHARD_GRAD_OP
-      mixed_precision:
-        param_dtype: fp16
-        reduce_dtype: fp16
-        buffer_dtype: fp32
-  student:
-    backbone:
-      sharding_strategy: SHARD_GRAD_OP
-      mixed_precision:
-        param_dtype: bf16
-        reduce_dtype: bf16
-        buffer_dtype: fp32
-    dino_head:
-      sharding_strategy: SHARD_GRAD_OP
-      mixed_precision:
-        param_dtype: bf16
-        reduce_dtype: fp32
-        buffer_dtype: fp32
-    ibot_head:
-      sharding_strategy: SHARD_GRAD_OP
-      mixed_precision:
-        param_dtype: bf16
-        reduce_dtype: fp32
-        buffer_dtype: fp32
-dino:
-  loss_weight: 1.0
-  head_n_prototypes: 131072
-  head_bottleneck_dim: 384
-  head_nlayers: 3
-  head_hidden_dim: 2048
-  koleo_loss_weight: -1
-ibot:
-  loss_weight: 1.0
-  mask_sample_probability: 0.5
-  mask_ratio_min_max:
-  - 0.1
-  - 0.5
-  separate_head: True
-  head_n_prototypes: 131072
-  head_bottleneck_dim: 256
-  head_nlayers: 3
-  head_hidden_dim: 2048
-train:
-  batch_size_per_gpu: 16
-  dataset_path: ImageNet:split=TRAIN
-  output_dir: .
-  saveckp_freq: 20
-  seed: 0
-  num_workers: 16
-  OFFICIAL_EPOCH_LENGTH: 1250
-  cache_dataset: true
-  centering: sinkhorn_knopp
-student:
-  arch: vit_large
-  patch_size: 14
-  drop_path_rate: 0.0
-  layerscale: 1.0e-05
-  drop_path_uniform: true
-  pretrained_weights: ''
-  ffn_layer: "mlp"
-  block_chunks: 4
-  qkv_bias: true
-  proj_bias: true
-  ffn_bias: true
-  num_register_tokens: 4
-  interpolate_offset: 0.1
-  interpolate_antialias : false
-teacher:
-  arch: vit_giant2
-  patch_size: 14
-  drop_path_rate: 0.4
-  layerscale: 1.0e-05
-  drop_path_uniform: true
-  pretrained_weights: '/hpc/home/2023222003/Phenix/wheat/foundation_model/distill_pretrain/518_vitg/teacher_checkpoint.pth'
-  ffn_layer: "swiglufused"
-  block_chunks: 4
-  qkv_bias: true
-  proj_bias: true
-  ffn_bias: true
-  momentum_teacher: 0.994
-  final_momentum_teacher: 1
-  warmup_teacher_temp: 0.04
-  teacher_temp: 0.07
-  warmup_teacher_temp_epochs: 30
-  num_register_tokens: 4
-  interpolate_offset: 0.1
-  interpolate_antialias : false
-optim:
-  epochs: 100
-  weight_decay: 0.04
-  weight_decay_end: 0.2
-  base_lr: 1e-04 # learning rate for a batch size of 1024
-  lr: 0.  # will be set after applying scaling rule
-  warmup_epochs: 10
-  min_lr: 1.0e-06
-  clip_grad: 3.0
-  freeze_last_layer_epochs: 0
-  scaling_rule: sqrt_wrt_1024
-  patch_embed_lr_mult: 0.2
-  layerwise_decay: 1
-  adamw_beta1: 0.9
-  adamw_beta2: 0.999
-crops:
-  global_crops_scale:
-  - 0.32
-  - 1.0
-  local_crops_number: 8
-  local_crops_scale:
-  - 0.05
-  - 0.32
-  global_crops_size: 518
-  local_crops_size: 98
-evaluation:
-  eval_period_iterations: 2500