Portx commited on
Commit
25bad97
·
verified ·
1 Parent(s): 1e0519b

(Trained with Unsloth)

Browse files
config.json CHANGED
@@ -1,111 +1,143 @@
1
  {
2
- "architectures": [
3
- "MllamaForConditionalGeneration"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  ],
5
- "image_token_index": 128256,
6
- "model_type": "mllama",
7
- "pad_token_id": 128004,
8
- "text_config": {
9
- "_attn_implementation_autoset": false,
10
- "cross_attention_layers": [
11
- 3,
12
- 8,
13
- 13,
14
- 18,
15
- 23,
16
- 28,
17
- 33,
18
- 38
19
- ],
20
- "dropout": 0,
21
- "eos_token_id": [
22
- 128001,
23
- 128008,
24
- 128009
25
- ],
26
- "hidden_act": "silu",
27
- "hidden_size": 4096,
28
- "initializer_range": 0.02,
29
- "intermediate_size": 14336,
30
- "max_position_embeddings": 131072,
31
- "model_type": "mllama_text_model",
32
- "num_attention_heads": 32,
33
- "num_hidden_layers": 40,
34
- "num_key_value_heads": 8,
35
- "rms_norm_eps": 1e-05,
36
- "rope_scaling": {
37
- "factor": 8.0,
38
- "high_freq_factor": 4.0,
39
- "low_freq_factor": 1.0,
40
- "original_max_position_embeddings": 8192,
41
- "rope_type": "llama3"
42
- },
43
- "rope_theta": 500000.0,
44
- "torch_dtype": "bfloat16",
45
- "use_cache": true,
46
- "vocab_size": 128256
47
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  "torch_dtype": "bfloat16",
49
- "transformers_version": "4.55.4",
50
- "unsloth_fixed": true,
51
- "unsloth_version": "2025.9.7",
52
- "vision_config": {
53
- "_attn_implementation_autoset": false,
54
- "attention_heads": 16,
55
- "hidden_act": "gelu",
56
- "hidden_size": 1280,
57
- "image_size": 560,
58
- "initializer_range": 0.02,
59
- "intermediate_layers_indices": [
60
- 3,
61
- 7,
62
- 15,
63
- 23,
64
- 30
65
- ],
66
- "intermediate_size": 5120,
67
- "max_num_tiles": 4,
68
- "model_type": "mllama_vision_model",
69
- "norm_eps": 1e-05,
70
- "num_channels": 3,
71
- "num_global_layers": 8,
72
- "num_hidden_layers": 32,
73
- "patch_size": 14,
74
- "supported_aspect_ratios": [
75
- [
76
- 1,
77
- 1
78
- ],
79
- [
80
- 1,
81
- 2
82
- ],
83
- [
84
- 1,
85
- 3
86
- ],
87
- [
88
- 1,
89
- 4
90
- ],
91
- [
92
- 2,
93
- 1
94
- ],
95
- [
96
- 2,
97
- 2
98
- ],
99
- [
100
- 3,
101
- 1
102
- ],
103
- [
104
- 4,
105
- 1
106
- ]
107
- ],
108
- "torch_dtype": "bfloat16",
109
- "vision_output_dim": 7680
110
- }
111
- }
 
1
  {
2
+ "architectures": [
3
+ "MllamaForConditionalGeneration"
4
+ ],
5
+ "image_token_index": 128256,
6
+ "model_type": "mllama",
7
+ "pad_token_id": 128004,
8
+ "quantization_config": {
9
+ "bnb_4bit_compute_dtype": "bfloat16",
10
+ "bnb_4bit_quant_type": "nf4",
11
+ "bnb_4bit_use_double_quant": true,
12
+ "llm_int8_enable_fp32_cpu_offload": false,
13
+ "llm_int8_has_fp16_weight": false,
14
+ "llm_int8_skip_modules": [
15
+ "model.vision_model.transformer.layers.13.mlp.fc2",
16
+ "model.language_model.layers.1.self_attn.q_proj",
17
+ "model.language_model.layers.1.self_attn.k_proj",
18
+ "model.language_model.layers.1.self_attn.v_proj",
19
+ "model.language_model.layers.1.self_attn.o_proj",
20
+ "model.language_model.layers.1.mlp.gate_proj",
21
+ "model.language_model.layers.1.mlp.up_proj",
22
+ "model.language_model.layers.1.mlp.down_proj",
23
+ "model.language_model.layers.8.cross_attn.o_proj",
24
+ "model.language_model.layers.13.cross_attn.o_proj",
25
+ "model.language_model.layers.13.mlp.down_proj",
26
+ "model.language_model.layers.18.cross_attn.o_proj",
27
+ "model.language_model.layers.18.mlp.down_proj",
28
+ "model.language_model.layers.23.cross_attn.o_proj",
29
+ "model.language_model.layers.28.cross_attn.o_proj",
30
+ "model.language_model.layers.33.cross_attn.o_proj",
31
+ "model.language_model.layers.38.cross_attn.o_proj",
32
+ "model.multi_modal_projector",
33
+ "lm_head"
34
  ],
35
+ "llm_int8_threshold": 6.0,
36
+ "load_in_4bit": true,
37
+ "load_in_8bit": false,
38
+ "quant_method": "bitsandbytes"
39
+ },
40
+ "text_config": {
41
+ "_attn_implementation_autoset": false,
42
+ "cross_attention_layers": [
43
+ 3,
44
+ 8,
45
+ 13,
46
+ 18,
47
+ 23,
48
+ 28,
49
+ 33,
50
+ 38
51
+ ],
52
+ "dropout": 0,
53
+ "eos_token_id": [
54
+ 128001,
55
+ 128008,
56
+ 128009
57
+ ],
58
+ "hidden_act": "silu",
59
+ "hidden_size": 4096,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 14336,
62
+ "max_position_embeddings": 131072,
63
+ "model_type": "mllama_text_model",
64
+ "num_attention_heads": 32,
65
+ "num_hidden_layers": 40,
66
+ "num_key_value_heads": 8,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": {
69
+ "factor": 8.0,
70
+ "high_freq_factor": 4.0,
71
+ "low_freq_factor": 1.0,
72
+ "original_max_position_embeddings": 8192,
73
+ "rope_type": "llama3"
 
 
 
74
  },
75
+ "rope_theta": 500000.0,
76
+ "torch_dtype": "bfloat16",
77
+ "use_cache": true,
78
+ "vocab_size": 128256
79
+ },
80
+ "torch_dtype": "bfloat16",
81
+ "transformers_version": "4.55.4",
82
+ "unsloth_fixed": true,
83
+ "unsloth_version": "2025.9.7",
84
+ "vision_config": {
85
+ "_attn_implementation_autoset": false,
86
+ "attention_heads": 16,
87
+ "hidden_act": "gelu",
88
+ "hidden_size": 1280,
89
+ "image_size": 560,
90
+ "initializer_range": 0.02,
91
+ "intermediate_layers_indices": [
92
+ 3,
93
+ 7,
94
+ 15,
95
+ 23,
96
+ 30
97
+ ],
98
+ "intermediate_size": 5120,
99
+ "max_num_tiles": 4,
100
+ "model_type": "mllama_vision_model",
101
+ "norm_eps": 1e-05,
102
+ "num_channels": 3,
103
+ "num_global_layers": 8,
104
+ "num_hidden_layers": 32,
105
+ "patch_size": 14,
106
+ "supported_aspect_ratios": [
107
+ [
108
+ 1,
109
+ 1
110
+ ],
111
+ [
112
+ 1,
113
+ 2
114
+ ],
115
+ [
116
+ 1,
117
+ 3
118
+ ],
119
+ [
120
+ 1,
121
+ 4
122
+ ],
123
+ [
124
+ 2,
125
+ 1
126
+ ],
127
+ [
128
+ 2,
129
+ 2
130
+ ],
131
+ [
132
+ 3,
133
+ 1
134
+ ],
135
+ [
136
+ 4,
137
+ 1
138
+ ]
139
+ ],
140
  "torch_dtype": "bfloat16",
141
+ "vision_output_dim": 7680
142
+ }
143
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "pad_token_id": 128004,
10
+ "temperature": 0.6,
11
+ "top_p": 0.9,
12
+ "transformers_version": "4.55.4"
13
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0abf73f72539602349091f897259594f8b2007588c98aec6b45a0678d651be15
3
+ size 4971535109
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8ac54275e9ff35ac3048c67ffd55a2cb53e820edfcde6cec0f25fb0834167e
3
+ size 2937067209
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff