libra-vision-tokenizer / vision_tokenizer_config.yaml

yifanxu

model version 1.0

a97a793 almost 2 years ago

671 Bytes

	freeze: True
	max_vision_token_length: 578 # 24*24 (resolution) + 2 (<img> and <\img>); corresponding to model_config.max_vision_token_length, dataset_config.image_size
	params:
	embed_dim: 1024 # debug
	ckpt_path: vqgan.ckpt
	codebook_size: 512
	num_codebook: 2
	ddconfig:
	# only_auto_encoder: True
	encoder_name: openai-clip-vit-large-patch14-336
	select_layer: [2,10,18,22]
	double_z: False
	z_channels: 1024
	resolution: 336 # 336
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult: [ 1,1,2,4,8] # num_down = len(ch_mult)-1
	num_res_blocks: 2
	attn_resolutions: [24]
	dropout: 0.0
	initial_resolution: 24
	num_attn_head: 8