hyper3-clip-beta / config.yaml
mnm-matin's picture
Replace with Hyper3-CLIP beta hier-beta scratch checkpoint
b341bc3 verified
Raw
History Blame Contribute Delete
2.7 kB
project:
name: hyper3-clip
experiment: hyper3_vitb_clip_uncha_hier_beta_argent_mp5_paper_scratch_8x500k_s31
seed: 31
output_dir: /sc/projects/sci-aisc/matin.mahmood/runs/hyper3_vitb_hierbeta_argent_mp5_paper_scratch_500k_v1/hyper3_vitb_clip_uncha_hier_beta_argent_mp5_paper_scratch_8x500k_s31
model:
objective: uncha
vision_backbone: vit_base_patch16_224
vision_pretrained: false
vision_global_pool: token
vision_use_sincos2d_pos: true
vision_timm_norm_layer: layer_norm
text_model_name: openai/clip-vit-base-patch32
text_pretrained: false
text_pooling: auto
embed_dim: 512
curv_init: 1.0
learn_curv: true
entail_weight: 0.2
inter_aperture_scale: 0.7
intra_aperture_scale: 1.2
uncha_piecewise_factor: 0.1
uncha_calibration_alpha: 10.0
uncha_stop_grad_calibration: true
uncha_entailment_geometry: lorentz
uncha_aggregate_weight: 0.0
uncha_entailment_loss: hier_beta_argent
uncha_argent_beta: 1.0
uncha_argent_norm_weight: 0.1
uncha_argent_aux_weight: 0.5
uncha_argent_aggregation: uncha
uncha_part_weight_power: 0.0
uncha_contrastive_loss: ce
uncha_sigmoid_bias_init: -10.0
uncha_sigmoid_negative_weight: 1.0
uncha_part_quality_mode: none
uncha_part_quality_topk: 5
uncha_part_quality_temperature: 4.0
uncha_entailment_warmup_steps: 0
uncha_global_local_mode: repeat
beta_clip_global_weight: 0.0
beta_clip_weight: 0.0
beta_clip_beta: 0.5
beta_clip_similarity: dot
beta_clip_num_heads: 8
beta_clip_mlp_ratio: 4.0
beta_clip_drop_cls_token: true
fuse_beta_query_encoder_forwards: true
group_beta_query_pooling: true
beta_clip_variant: ce
phyclip_product_metric: l1
training:
total_steps: 500000
global_batch_size: 768
grad_accum_steps: 1
num_workers: 8
lr: 0.0005
weight_decay: 0.2
betas:
- 0.9
- 0.98
warmup_steps: 4000
log_interval: 20
ckpt_interval: 10000
amp: true
max_grad_norm: 1.0
resume: true
resume_from: null
resume_from_env: RESUME_FROM_CHECKPOINT
find_unused_parameters: true
optimizer:
no_decay_params:
- logit_scale
- global_logit_scale
- local_logit_scale
- global_local_logit_scale
- visual_alpha
- textual_alpha
- log_curv
- global_logit_bias
- local_logit_bias
- global_local_logit_bias
data:
type: processed_grit
part_sampling: all
max_parts: 5
train_transform: tight_crop_color_jitter_gray
tarfiles:
- /sc/projects/sci-aisc/matin.mahmood/datasets/hycoclip/train/GRIT/processed/*.tar
shuffle_buffer: 4000
image_size: 224
max_text_length: 77
num_workers: 8
image_normalization: imagenet
beta_clip:
enabled: true
max_sentences: 5
max_phrases: 30
max_queries_per_image: 6
use_part_texts: true