PLAN-Lab
/

CALICO

Image Segmentation

text-generation

computer-vision

semantic-segmentation

co-segmentation

part-segmentation

multi-image-reasoning

vision-language

Model card Files Files and versions

CALICO / config.json

kanguyen-vn's picture

Upload folder using huggingface_hub

8295c96 verified 3 days ago

history blame contribute delete

1.62 kB

	{
	"_name_or_path": "PLAN-Lab/CALICO",
	"architectures": [
	"CALICOForCausalLM"
	],
	"attention_bias": false,
	"attention_dropout": 0.0,
	"auto_initialize_adaptors": true,
	"auto_initialize_qformer": true,
	"bbox_token_idx": 32002,
	"bos_token_id": 1,
	"dino_hidden_size": 768,
	"dino_model_name": "dinov2_vitb14_reg",
	"eos_token_id": 2,
	"global_image_size": 224,
	"grounding_encoder": "sam_vit_h",
	"grounding_image_size": 1024,
	"hidden_act": "silu",
	"hidden_size": 4096,
	"image_aspect": "square",
	"image_aspect_ratio": "square",
	"initializer_range": 0.02,
	"intermediate_size": 11008,
	"layer_type": "linear",
	"max_position_embeddings": 4096,
	"mlp_bias": false,
	"mm_projector_type": "linear",
	"model_type": "calico",
	"num_attention_heads": 32,
	"num_attn_heads": 8,
	"num_hidden_layers": 32,
	"num_key_value_heads": 32,
	"num_level_reg_features": 4,
	"num_query_tokens": 32,
	"out_dim": 256,
	"pad_token_id": 0,
	"pretraining_tp": 1,
	"q_former_model": "",
	"qformer_hidden_size": 768,
	"qformer_vision_encoder": "eva_clip_g",
	"qformer_vision_width": 1408,
	"rms_norm_eps": 1e-05,
	"rope_scaling": null,
	"rope_theta": 10000.0,
	"seg_image_tokens": [
	[
	32004,
	313,
	2382,
	29896
	],
	[
	32004,
	313,
	2382,
	29906
	]
	],
	"seg_token_idx": 32004,
	"tie_word_embeddings": false,
	"torch_dtype": "bfloat16",
	"train_mask_decoder": true,
	"transformers_version": "4.42.3",
	"update_layers": [
	11,
	22
	],
	"use_cache": false,
	"use_mm_proj": true,
	"vocab_size": 32007,
	"with_region": true
	}