project: name: hyper3-clip experiment: hyper3_vitb_clip_uncha_hier_beta_argent_mp5_paper_scratch_8x500k_s31 seed: 31 output_dir: /sc/projects/sci-aisc/matin.mahmood/runs/hyper3_vitb_hierbeta_argent_mp5_paper_scratch_500k_v1/hyper3_vitb_clip_uncha_hier_beta_argent_mp5_paper_scratch_8x500k_s31 model: objective: uncha vision_backbone: vit_base_patch16_224 vision_pretrained: false vision_global_pool: token vision_use_sincos2d_pos: true vision_timm_norm_layer: layer_norm text_model_name: openai/clip-vit-base-patch32 text_pretrained: false text_pooling: auto embed_dim: 512 curv_init: 1.0 learn_curv: true entail_weight: 0.2 inter_aperture_scale: 0.7 intra_aperture_scale: 1.2 uncha_piecewise_factor: 0.1 uncha_calibration_alpha: 10.0 uncha_stop_grad_calibration: true uncha_entailment_geometry: lorentz uncha_aggregate_weight: 0.0 uncha_entailment_loss: hier_beta_argent uncha_argent_beta: 1.0 uncha_argent_norm_weight: 0.1 uncha_argent_aux_weight: 0.5 uncha_argent_aggregation: uncha uncha_part_weight_power: 0.0 uncha_contrastive_loss: ce uncha_sigmoid_bias_init: -10.0 uncha_sigmoid_negative_weight: 1.0 uncha_part_quality_mode: none uncha_part_quality_topk: 5 uncha_part_quality_temperature: 4.0 uncha_entailment_warmup_steps: 0 uncha_global_local_mode: repeat beta_clip_global_weight: 0.0 beta_clip_weight: 0.0 beta_clip_beta: 0.5 beta_clip_similarity: dot beta_clip_num_heads: 8 beta_clip_mlp_ratio: 4.0 beta_clip_drop_cls_token: true fuse_beta_query_encoder_forwards: true group_beta_query_pooling: true beta_clip_variant: ce phyclip_product_metric: l1 training: total_steps: 500000 global_batch_size: 768 grad_accum_steps: 1 num_workers: 8 lr: 0.0005 weight_decay: 0.2 betas: - 0.9 - 0.98 warmup_steps: 4000 log_interval: 20 ckpt_interval: 10000 amp: true max_grad_norm: 1.0 resume: true resume_from: null resume_from_env: RESUME_FROM_CHECKPOINT find_unused_parameters: true optimizer: no_decay_params: - logit_scale - global_logit_scale - local_logit_scale - global_local_logit_scale - visual_alpha - textual_alpha - log_curv - global_logit_bias - local_logit_bias - global_local_logit_bias data: type: processed_grit part_sampling: all max_parts: 5 train_transform: tight_crop_color_jitter_gray tarfiles: - /sc/projects/sci-aisc/matin.mahmood/datasets/hycoclip/train/GRIT/processed/*.tar shuffle_buffer: 4000 image_size: 224 max_text_length: 77 num_workers: 8 image_normalization: imagenet beta_clip: enabled: true max_sentences: 5 max_phrases: 30 max_queries_per_image: 6 use_part_texts: true