File size: 1,800 Bytes
76ea727
 
 
 
bb007d0
46a5905
76ea727
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd7f219
76ea727
 
95623d5
76ea727
 
46a5905
 
 
 
95623d5
46a5905
95623d5
20e2c73
95623d5
46a5905
90c7543
46a5905
 
76ea727
 
 
46a5905
 
 
95623d5
46a5905
76ea727
 
14f1c82
46a5905
76ea727
95623d5
8451fad
95623d5
20e2c73
76ea727
95623d5
 
46a5905
95623d5
76ea727
 
95623d5
76ea727
46a5905
76ea727
46a5905
 
76ea727
 
46a5905
 
76ea727
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
{
  "architectures": [
    "LlavaNextForConditionalGeneration"
  ],
  "dtype": "bfloat16",
  "ignore_index": -100,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava_next",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
    "architectures": [
      "MistralForCausalLM"
    ],
    "attention_dropout": 0.0,
    "head_dim": null,
    "hidden_act": "silu",
    "hidden_size": 16,
    "initializer_range": 0.02,
    "intermediate_size": 14336,
    "layer_types": null,
    "max_position_embeddings": 32768,
    "model_type": "mistral",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "rms_norm_eps": 1e-05,
    "rope_theta": 1000000.0,
    "sliding_window": null,
    "use_cache": true,
    "vocab_size": 32064
  },
  "tie_word_embeddings": false,
  "transformers_version": "4.57.3",
  "use_image_newline_parameter": true,
  "vision_config": {
    "attention_dropout": 0.0,
    "embed_dim": 64,
    "hidden_act": "quick_gelu",
    "hidden_size": 16,
    "image_size": 336,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "intermediate_size": 4096,
    "layer_norm_eps": 1e-05,
    "model_type": "clip_vision_model",
    "num_attention_heads": 4,
    "num_channels": 3,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default",
  "vocab_size": 32064
}