File size: 1,829 Bytes
76ea727
 
 
 
90c7543
46a5905
76ea727
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2774e71
76ea727
 
95623d5
76ea727
 
46a5905
 
 
 
95623d5
90c7543
46a5905
95623d5
20e2c73
95623d5
46a5905
90c7543
46a5905
 
76ea727
 
 
46a5905
 
 
95623d5
46a5905
76ea727
 
90c7543
46a5905
76ea727
95623d5
8451fad
95623d5
20e2c73
76ea727
95623d5
 
46a5905
95623d5
76ea727
 
95623d5
76ea727
46a5905
76ea727
46a5905
 
76ea727
 
46a5905
 
76ea727
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
{
  "architectures": [
    "LlavaNextForConditionalGeneration"
  ],
  "dtype": "bfloat16",
  "ignore_index": -100,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_seq_length": 32,
  "image_token_index": 32000,
  "model_type": "llava_next",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
    "architectures": [
      "MistralForCausalLM"
    ],
    "attention_dropout": 0.0,
    "dtype": "bfloat16",
    "head_dim": null,
    "hidden_act": "silu",
    "hidden_size": 16,
    "initializer_range": 0.02,
    "intermediate_size": 14336,
    "layer_types": null,
    "max_position_embeddings": 32768,
    "model_type": "mistral",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "rms_norm_eps": 1e-05,
    "rope_theta": 1000000.0,
    "sliding_window": null,
    "use_cache": true,
    "vocab_size": 32064
  },
  "tie_word_embeddings": false,
  "transformers_version": "4.57.0.dev0",
  "use_image_newline_parameter": true,
  "vision_config": {
    "attention_dropout": 0.0,
    "embed_dim": 64,
    "hidden_act": "quick_gelu",
    "hidden_size": 16,
    "image_size": 336,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "intermediate_size": 4096,
    "layer_norm_eps": 1e-05,
    "model_type": "clip_vision_model",
    "num_attention_heads": 4,
    "num_channels": 3,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default",
  "vocab_size": 32064
}