File size: 1,690 Bytes
83420b6
 
 
 
764849c
0e5d047
83420b6
 
 
1c6c707
0e5d047
83420b6
 
0e5d047
 
 
 
1c6c707
 
78cfa89
0e5d047
1c6c707
5c521f6
1c6c707
0e5d047
78cfa89
0e5d047
1c6c707
83420b6
 
 
 
1c6c707
0e5d047
764849c
 
 
 
1c6c707
0e5d047
83420b6
0e5d047
764849c
83420b6
1c6c707
7e6ef8a
1c6c707
5c521f6
83420b6
1c6c707
 
0e5d047
1c6c707
83420b6
 
1c6c707
83420b6
0e5d047
83420b6
0e5d047
 
83420b6
 
0e5d047
 
83420b6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
{
  "architectures": [
    "LlavaForConditionalGeneration"
  ],
  "dtype": "float32",
  "ignore_index": -100,
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava",
  "multimodal_projector_bias": true,
  "pad_token_id": 32001,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_name_or_path": "lmsys/vicuna-7b-v1.5",
    "architectures": [
      "LlamaForCausalLM"
    ],
    "attention_bias": false,
    "attention_dropout": 0.0,
    "dtype": "float16",
    "head_dim": 128,
    "hidden_act": "silu",
    "hidden_size": 16,
    "initializer_range": 0.02,
    "intermediate_size": 11008,
    "layer_types": null,
    "max_position_embeddings": 4096,
    "mlp_bias": false,
    "model_type": "llama",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "pretraining_tp": 1,
    "rms_norm_eps": 1e-05,
    "rope_parameters": {
      "rope_theta": 10000.0,
      "rope_type": "default"
    },
    "use_cache": true,
    "vocab_size": 32064
  },
  "tie_word_embeddings": false,
  "transformers_version": "5.0.0.dev0",
  "vision_config": {
    "attention_dropout": 0.0,
    "embed_dim": 64,
    "hidden_act": "quick_gelu",
    "hidden_size": 16,
    "image_size": 336,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "intermediate_size": 4096,
    "layer_norm_eps": 1e-05,
    "model_type": "clip_vision_model",
    "num_attention_heads": 4,
    "num_channels": 3,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default",
  "vocab_size": 32064
}