iblfe
/

h2o3test

Model card Files Files and versions

Metrics Training metrics Community

h2o3test / config.yaml

iblfe's picture

Upload 12 files

e60d8d2 over 2 years ago

history blame contribute delete

3.03 kB

	model:
	names:
	- fusion_mlp
	- hf_text
	- numerical_mlp
	- timm_image
	numerical_mlp:
	hidden_size: 128
	activation: leaky_relu
	num_layers: 1
	drop_rate: 0.1
	normalization: layer_norm
	d_token: 8
	embedding_arch: null
	data_types:
	- numerical
	merge: concat
	hf_text:
	checkpoint_name: local://hf_text
	gradient_checkpointing: false
	pooling_mode: cls
	data_types:
	- text
	tokenizer_name: hf_auto
	max_text_len: 512
	insert_sep: true
	low_cpu_mem_usage: false
	text_segment_num: 2
	stochastic_chunk: false
	text_aug_detect_length: 10
	text_trivial_aug_maxscale: 0.0
	text_train_augment_types: null
	timm_image:
	checkpoint_name: swin_base_patch4_window7_224
	mix_choice: all_logits
	data_types:
	- image
	train_transforms:
	- resize_shorter_side
	- center_crop
	- trivial_augment
	val_transforms:
	- resize_shorter_side
	- center_crop
	image_norm: imagenet
	image_size: null
	max_img_num_per_col: 2
	fusion_mlp:
	weight: 0.1
	adapt_in_features: max
	hidden_sizes:
	- 128
	activation: leaky_relu
	drop_rate: 0.1
	normalization: layer_norm
	data_types: null
	data:
	image:
	missing_value_strategy: zero
	text:
	normalize_text: false
	categorical:
	minimum_cat_count: 100
	maximum_num_cat: 20
	convert_to_text: true
	numerical:
	convert_to_text: false
	scaler_with_mean: true
	scaler_with_std: true
	document:
	missing_value_strategy: zero
	label:
	numerical_label_preprocessing: standardscaler
	pos_label: null
	mixup:
	turn_on: false
	mixup_alpha: 0.8
	cutmix_alpha: 1.0
	cutmix_minmax: null
	prob: 1.0
	switch_prob: 0.5
	mode: batch
	turn_off_epoch: 5
	label_smoothing: 0.1
	templates:
	turn_on: false
	num_templates: 30
	template_length: 2048
	preset_templates:
	- super_glue
	- rte
	custom_templates: null
	optimization:
	optim_type: adamw
	learning_rate: 0.0001
	weight_decay: 0.001
	lr_choice: layerwise_decay
	lr_decay: 0.9
	lr_schedule: cosine_decay
	max_epochs: 10
	max_steps: -1
	warmup_steps: 0.1
	end_lr: 0
	lr_mult: 1
	patience: 10
	val_check_interval: 0.5
	check_val_every_n_epoch: 1
	skip_final_val: false
	gradient_clip_val: 1
	gradient_clip_algorithm: norm
	track_grad_norm: -1
	log_every_n_steps: 10
	top_k: 3
	top_k_average_method: greedy_soup
	efficient_finetune: null
	lora:
	module_filter: null
	filter:
	- query
	- value
	- ^q$
	- ^v$
	- ^k$
	- ^o$
	r: 8
	alpha: 8
	loss_function: auto
	focal_loss:
	alpha: null
	gamma: 2.0
	reduction: mean
	env:
	num_gpus: 1
	num_nodes: 1
	batch_size: 128
	per_gpu_batch_size: 8
	eval_batch_size_ratio: 4
	per_gpu_batch_size_evaluation: null
	precision: 16
	num_workers: 2
	num_workers_evaluation: 2
	fast_dev_run: false
	deterministic: false
	auto_select_gpus: true
	strategy: null
	deepspeed_allgather_size: 1000000000.0
	deepspeed_allreduce_size: 1000000000.0