clipseg_drywall / config.json
smcs's picture
Upload 2 files
c7d943d verified
{
"architectures": [
"CLIPSegForImageSegmentation"
],
"conditional_layer": 0,
"decoder_attention_dropout": 0.0,
"decoder_hidden_act": "quick_gelu",
"decoder_intermediate_size": 2048,
"decoder_num_attention_heads": 4,
"dtype": "float32",
"extract_layers": [
3,
6,
9
],
"initializer_factor": 1.0,
"logit_scale_init_value": 2.6592,
"model_type": "clipseg",
"projection_dim": 512,
"reduce_dim": 64,
"text_config": {
"attention_dropout": 0.0,
"bos_token_id": 0,
"dropout": 0.0,
"dtype": "float32",
"eos_token_id": 2,
"hidden_act": "quick_gelu",
"hidden_size": 512,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 2048,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 77,
"model_type": "clipseg_text_model",
"num_attention_heads": 8,
"num_hidden_layers": 12,
"vocab_size": 49408
},
"transformers_version": "4.57.3",
"use_complex_transposed_convolution": true,
"vision_config": {
"attention_dropout": 0.0,
"dropout": 0.0,
"dtype": "float32",
"hidden_act": "quick_gelu",
"hidden_size": 768,
"image_size": 224,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-05,
"model_type": "clipseg_vision_model",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"patch_size": 16
}
}