Replace with Hyper3-CLIP beta hier-beta scratch checkpoint

b341bc3 verified 17 days ago

2.7 kB

	project:
	name: hyper3-clip
	experiment: hyper3_vitb_clip_uncha_hier_beta_argent_mp5_paper_scratch_8x500k_s31
	seed: 31
	output_dir: /sc/projects/sci-aisc/matin.mahmood/runs/hyper3_vitb_hierbeta_argent_mp5_paper_scratch_500k_v1/hyper3_vitb_clip_uncha_hier_beta_argent_mp5_paper_scratch_8x500k_s31
	model:
	objective: uncha
	vision_backbone: vit_base_patch16_224
	vision_pretrained: false
	vision_global_pool: token
	vision_use_sincos2d_pos: true
	vision_timm_norm_layer: layer_norm
	text_model_name: openai/clip-vit-base-patch32
	text_pretrained: false
	text_pooling: auto
	embed_dim: 512
	curv_init: 1.0
	learn_curv: true
	entail_weight: 0.2
	inter_aperture_scale: 0.7
	intra_aperture_scale: 1.2
	uncha_piecewise_factor: 0.1
	uncha_calibration_alpha: 10.0
	uncha_stop_grad_calibration: true
	uncha_entailment_geometry: lorentz
	uncha_aggregate_weight: 0.0
	uncha_entailment_loss: hier_beta_argent
	uncha_argent_beta: 1.0
	uncha_argent_norm_weight: 0.1
	uncha_argent_aux_weight: 0.5
	uncha_argent_aggregation: uncha
	uncha_part_weight_power: 0.0
	uncha_contrastive_loss: ce
	uncha_sigmoid_bias_init: -10.0
	uncha_sigmoid_negative_weight: 1.0
	uncha_part_quality_mode: none
	uncha_part_quality_topk: 5
	uncha_part_quality_temperature: 4.0
	uncha_entailment_warmup_steps: 0
	uncha_global_local_mode: repeat
	beta_clip_global_weight: 0.0
	beta_clip_weight: 0.0
	beta_clip_beta: 0.5
	beta_clip_similarity: dot
	beta_clip_num_heads: 8
	beta_clip_mlp_ratio: 4.0
	beta_clip_drop_cls_token: true
	fuse_beta_query_encoder_forwards: true
	group_beta_query_pooling: true
	beta_clip_variant: ce
	phyclip_product_metric: l1
	training:
	total_steps: 500000
	global_batch_size: 768
	grad_accum_steps: 1
	num_workers: 8
	lr: 0.0005
	weight_decay: 0.2
	betas:
	- 0.9
	- 0.98
	warmup_steps: 4000
	log_interval: 20
	ckpt_interval: 10000
	amp: true
	max_grad_norm: 1.0
	resume: true
	resume_from: null
	resume_from_env: RESUME_FROM_CHECKPOINT
	find_unused_parameters: true
	optimizer:
	no_decay_params:
	- logit_scale
	- global_logit_scale
	- local_logit_scale
	- global_local_logit_scale
	- visual_alpha
	- textual_alpha
	- log_curv
	- global_logit_bias
	- local_logit_bias
	- global_local_logit_bias
	data:
	type: processed_grit
	part_sampling: all
	max_parts: 5
	train_transform: tight_crop_color_jitter_gray
	tarfiles:
	- /sc/projects/sci-aisc/matin.mahmood/datasets/hycoclip/train/GRIT/processed/*.tar
	shuffle_buffer: 4000
	image_size: 224
	max_text_length: 77
	num_workers: 8
	image_normalization: imagenet
	beta_clip:
	enabled: true
	max_sentences: 5
	max_phrases: 30
	max_queries_per_image: 6
	use_part_texts: true