| { |
| "_name_or_path": "RAS-13B-General", |
| "architectures": [ |
| "RASLlamaForCausalLM" |
| ], |
| "attention_bias": false, |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "exclusive_mask_decoding": true, |
| "freeze_mask_classifier": false, |
| "freeze_mask_projector": false, |
| "freeze_mask_reprojector": false, |
| "freeze_vision_projector": false, |
| "freeze_vision_tower": true, |
| "hidden_act": "silu", |
| "hidden_size": 5120, |
| "image_aspect_ratio": "pad", |
| "initializer_range": 0.02, |
| "intermediate_size": 13824, |
| "mask_decoding_binary_threshold": 0.5, |
| "mask_loss_binary": false, |
| "mask_loss_binary_pos_weight": 5.0, |
| "mask_loss_direct_binary": true, |
| "mask_loss_set_pred": true, |
| "mask_loss_type": "mse+infonce", |
| "mask_projector_input_features": [ |
| "clip_-2", |
| "siglip", |
| "open_clip", |
| "dinov2", |
| "sincos_pe" |
| ], |
| "mask_projector_type": "mlp2x_gelu", |
| "mask_reprojector_type": "mlp2x_gelu", |
| "max_length": 4096, |
| "max_position_embeddings": 4096, |
| "mm_use_im_patch_token": false, |
| "mm_use_im_start_end": false, |
| "model_type": "ras_llama", |
| "num_attention_heads": 40, |
| "num_hidden_layers": 40, |
| "num_key_value_heads": 40, |
| "pad_token_id": 0, |
| "pretrain_projectors": false, |
| "pretraining_tp": 1, |
| "rms_norm_eps": 1e-05, |
| "rope_scaling": null, |
| "rope_theta": 10000.0, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.37.2", |
| "use_cache": true, |
| "version": "v1", |
| "vision_backbone_resolutions": [ |
| 336, |
| 378, |
| 1024, |
| 518, |
| 448 |
| ], |
| "vision_backbones": [ |
| "openai/clip-vit-large-patch14-336", |
| "google/siglip-so400m-patch14-384", |
| "convnext_xxlarge", |
| "facebook/dinov2-large", |
| "sincos_pe" |
| ], |
| "vision_hidden_size": [ |
| 1024, |
| 1152, |
| 3072, |
| 1024, |
| 1024 |
| ], |
| "vision_output_features": [ |
| "clip_-2", |
| "siglip", |
| "open_clip", |
| "dinov2", |
| "sincos_pe" |
| ], |
| "vision_pe_hidden_size": 1024, |
| "vision_pe_patches_per_side": 32, |
| "vision_pe_scale": 1.0, |
| "vision_processor": "openai/clip-vit-large-patch14-336", |
| "vision_projector_input_features": [ |
| "clip_-2" |
| ], |
| "vision_projector_type": "mlp2x_gelu", |
| "vocab_size": 32004 |
| } |
|
|