{"model_type": "clipseg", "text_config": {"vocab_size": 100, "hidden_size": 1, "intermediate_size": 1, "num_hidden_layers": 1, "num_attention_heads": 1, "max_position_embeddings": 77, "hidden_act": "quick_gelu", "layer_norm_eps": 1e-05, "attention_dropout": 0.0, "initializer_range": 0.02, "initializer_factor": 1.0, "pad_token_id": 1, "bos_token_id": 49406, "eos_token_id": 49407}, "vision_config": {"hidden_size": 1, "intermediate_size": 1, "num_hidden_layers": 1, "num_attention_heads": 1, "num_channels": 3, "image_size": 224, "patch_size": 32, "hidden_act": "quick_gelu", "layer_norm_eps": 1e-05, "attention_dropout": 0.0, "initializer_range": 0.02, "initializer_factor": 1.0}, "projection_dim": 1, "logit_scale_init_value": 2.6592, "extract_layers": [0], "reduce_dim": 1, "decoder_num_attention_heads": 1, "decoder_attention_dropout": 0.0, "decoder_hidden_act": "quick_gelu", "decoder_intermediate_size": 1, "conditional_layer": 0, "use_complex_transposed_convolution": false}