File size: 1,830 Bytes
76ea727
 
 
 
90c7543
46a5905
76ea727
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95623d5
76ea727
 
46a5905
 
 
 
95623d5
90c7543
46a5905
95623d5
20e2c73
95623d5
46a5905
90c7543
46a5905
 
76ea727
 
 
46a5905
 
 
95623d5
46a5905
76ea727
 
90c7543
46a5905
76ea727
95623d5
797294f
95623d5
20e2c73
76ea727
95623d5
 
46a5905
95623d5
76ea727
 
95623d5
76ea727
46a5905
76ea727
46a5905
 
76ea727
 
46a5905
 
76ea727
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
{
  "architectures": [
    "LlavaNextForConditionalGeneration"
  ],
  "dtype": "bfloat16",
  "ignore_index": -100,
  "image_grid_pinpoints": [
    [
      336,
      672
    ],
    [
      672,
      336
    ],
    [
      672,
      672
    ],
    [
      1008,
      336
    ],
    [
      336,
      1008
    ]
  ],
  "image_seq_length": 576,
  "image_token_index": 32000,
  "model_type": "llava_next",
  "multimodal_projector_bias": true,
  "projector_hidden_act": "gelu",
  "text_config": {
    "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
    "architectures": [
      "MistralForCausalLM"
    ],
    "attention_dropout": 0.0,
    "dtype": "bfloat16",
    "head_dim": null,
    "hidden_act": "silu",
    "hidden_size": 16,
    "initializer_range": 0.02,
    "intermediate_size": 14336,
    "layer_types": null,
    "max_position_embeddings": 32768,
    "model_type": "mistral",
    "num_attention_heads": 4,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "rms_norm_eps": 1e-05,
    "rope_theta": 1000000.0,
    "sliding_window": null,
    "use_cache": true,
    "vocab_size": 32064
  },
  "tie_word_embeddings": false,
  "transformers_version": "4.57.0.dev0",
  "use_image_newline_parameter": true,
  "vision_config": {
    "attention_dropout": 0.0,
    "embed_dim": 64,
    "hidden_act": "quick_gelu",
    "hidden_size": 16,
    "image_size": 336,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "intermediate_size": 4096,
    "layer_norm_eps": 1e-05,
    "model_type": "clip_vision_model",
    "num_attention_heads": 4,
    "num_channels": 3,
    "num_hidden_layers": 2,
    "num_key_value_heads": 2,
    "patch_size": 14,
    "projection_dim": 768,
    "vocab_size": 32000
  },
  "vision_feature_layer": -2,
  "vision_feature_select_strategy": "default",
  "vocab_size": 32064
}