File size: 1,394 Bytes
991036c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
{
"architectures": [
"PS3Model"
],
"model_type": "ps3",
"vision_config": {
"architectures": [
"PS3VisionModel"
],
"model_type": "ps3_vision_model",
"model_name": "vit_large_patch16_224",
"hidden_size": 1024,
"pool": "map",
"ps3": true,
"ps3_scales": [
384,
768,
1536,
3840
],
"select_based_on_layer": [
0,
7,
14,
23
],
"min_select_num": 1,
"max_select_num": 2560,
"separate_pos_emb": true,
"highres_selection_feature": true,
"img_size": 4096,
"drop": 0.0,
"class_token": false,
"final_norm": false,
"radio": true,
"radio_adapter_mlp_version": "v2",
"radio_adapter_mlp_input_dim": 1024,
"radio_adapter_mlp_hidden_dim": 1520,
"radio_adapter_mlp_output_dim": 1152,
"radio_adapter_mlp_num_inner": 3
},
"text_config": {
"context_length": 64,
"vocab_size": 32000,
"hf_tokenizer_name": "timm/ViT-B-16-SigLIP",
"tokenizer_kwargs": {
"clean": "canonicalize"
},
"width": 1152,
"heads": 16,
"layers": 27,
"mlp_ratio": 3.7362,
"no_causal_mask": true,
"proj_bias": true,
"pool_type": "last",
"norm_kwargs": {
"eps": 1e-06
},
"architectures": [
"PS3TextModel"
],
"model_type": "ps3_text_model",
"output_dim": 1152,
"prompt_proj_dim": 1024
}
} |