| project: |
| name: hyper3-clip |
| experiment: hyper3_vitb_clip_uncha_hier_beta_argent_mp5_paper_scratch_8x500k_s31 |
| seed: 31 |
| output_dir: /sc/projects/sci-aisc/matin.mahmood/runs/hyper3_vitb_hierbeta_argent_mp5_paper_scratch_500k_v1/hyper3_vitb_clip_uncha_hier_beta_argent_mp5_paper_scratch_8x500k_s31 |
| model: |
| objective: uncha |
| vision_backbone: vit_base_patch16_224 |
| vision_pretrained: false |
| vision_global_pool: token |
| vision_use_sincos2d_pos: true |
| vision_timm_norm_layer: layer_norm |
| text_model_name: openai/clip-vit-base-patch32 |
| text_pretrained: false |
| text_pooling: auto |
| embed_dim: 512 |
| curv_init: 1.0 |
| learn_curv: true |
| entail_weight: 0.2 |
| inter_aperture_scale: 0.7 |
| intra_aperture_scale: 1.2 |
| uncha_piecewise_factor: 0.1 |
| uncha_calibration_alpha: 10.0 |
| uncha_stop_grad_calibration: true |
| uncha_entailment_geometry: lorentz |
| uncha_aggregate_weight: 0.0 |
| uncha_entailment_loss: hier_beta_argent |
| uncha_argent_beta: 1.0 |
| uncha_argent_norm_weight: 0.1 |
| uncha_argent_aux_weight: 0.5 |
| uncha_argent_aggregation: uncha |
| uncha_part_weight_power: 0.0 |
| uncha_contrastive_loss: ce |
| uncha_sigmoid_bias_init: -10.0 |
| uncha_sigmoid_negative_weight: 1.0 |
| uncha_part_quality_mode: none |
| uncha_part_quality_topk: 5 |
| uncha_part_quality_temperature: 4.0 |
| uncha_entailment_warmup_steps: 0 |
| uncha_global_local_mode: repeat |
| beta_clip_global_weight: 0.0 |
| beta_clip_weight: 0.0 |
| beta_clip_beta: 0.5 |
| beta_clip_similarity: dot |
| beta_clip_num_heads: 8 |
| beta_clip_mlp_ratio: 4.0 |
| beta_clip_drop_cls_token: true |
| fuse_beta_query_encoder_forwards: true |
| group_beta_query_pooling: true |
| beta_clip_variant: ce |
| phyclip_product_metric: l1 |
| training: |
| total_steps: 500000 |
| global_batch_size: 768 |
| grad_accum_steps: 1 |
| num_workers: 8 |
| lr: 0.0005 |
| weight_decay: 0.2 |
| betas: |
| - 0.9 |
| - 0.98 |
| warmup_steps: 4000 |
| log_interval: 20 |
| ckpt_interval: 10000 |
| amp: true |
| max_grad_norm: 1.0 |
| resume: true |
| resume_from: null |
| resume_from_env: RESUME_FROM_CHECKPOINT |
| find_unused_parameters: true |
| optimizer: |
| no_decay_params: |
| - logit_scale |
| - global_logit_scale |
| - local_logit_scale |
| - global_local_logit_scale |
| - visual_alpha |
| - textual_alpha |
| - log_curv |
| - global_logit_bias |
| - local_logit_bias |
| - global_local_logit_bias |
| data: |
| type: processed_grit |
| part_sampling: all |
| max_parts: 5 |
| train_transform: tight_crop_color_jitter_gray |
| tarfiles: |
| - /sc/projects/sci-aisc/matin.mahmood/datasets/hycoclip/train/GRIT/processed/*.tar |
| shuffle_buffer: 4000 |
| image_size: 224 |
| max_text_length: 77 |
| num_workers: 8 |
| image_normalization: imagenet |
| beta_clip: |
| enabled: true |
| max_sentences: 5 |
| max_phrases: 30 |
| max_queries_per_image: 6 |
| use_part_texts: true |
|
|