Riddler2024 commited on
Commit
b703057
·
verified ·
1 Parent(s): 06448bb
config.bak.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ColQwen2_5Omni"
4
+ ],
5
+ "audio_config": {
6
+ "activation_dropout": 0.0,
7
+ "activation_function": "gelu",
8
+ "attention_dropout": 0.0,
9
+ "d_model": 1280,
10
+ "dropout": 0.0,
11
+ "encoder_attention_heads": 20,
12
+ "encoder_ffn_dim": 5120,
13
+ "encoder_layerdrop": 0.0,
14
+ "encoder_layers": 32,
15
+ "init_std": 0.02,
16
+ "initializer_range": 0.02,
17
+ "max_source_positions": 1500,
18
+ "model_type": "qwen2_5_omni_audio_encoder",
19
+ "n_window": 100,
20
+ "num_hidden_layers": 32,
21
+ "num_mel_bins": 128,
22
+ "output_dim": 2048,
23
+ "scale_embedding": false,
24
+ "torch_dtype": "bfloat16"
25
+ },
26
+ "audio_end_token_id": 151648,
27
+ "audio_start_token_id": 151647,
28
+ "audio_token_index": 151646,
29
+ "bos_token_id": 151644,
30
+ "eos_token_id": 151645,
31
+ "ignore_index": -100,
32
+ "image_token_index": 151655,
33
+ "init_std": 0.02,
34
+ "initializer_range": 0.02,
35
+ "model_type": "qwen2_5_omni_thinker",
36
+ "pad_token_id": 151643,
37
+ "position_id_per_seconds": 25,
38
+ "seconds_per_chunk": 2,
39
+ "text_config": {
40
+ "attention_dropout": 0.0,
41
+ "hidden_act": "silu",
42
+ "hidden_size": 2048,
43
+ "init_std": 0.02,
44
+ "initializer_range": 0.02,
45
+ "intermediate_size": 11008,
46
+ "layer_types": [
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention",
74
+ "full_attention",
75
+ "full_attention",
76
+ "full_attention",
77
+ "full_attention",
78
+ "full_attention",
79
+ "full_attention",
80
+ "full_attention",
81
+ "full_attention",
82
+ "full_attention"
83
+ ],
84
+ "max_position_embeddings": 32768,
85
+ "max_window_layers": 70,
86
+ "model_type": "qwen2_5_omni_text",
87
+ "num_attention_heads": 16,
88
+ "num_hidden_layers": 36,
89
+ "num_key_value_heads": 2,
90
+ "rms_norm_eps": 1e-06,
91
+ "rope_scaling": {
92
+ "mrope_section": [
93
+ 16,
94
+ 24,
95
+ 24
96
+ ],
97
+ "rope_type": "default",
98
+ "type": "default"
99
+ },
100
+ "rope_theta": 1000000.0,
101
+ "sliding_window": null,
102
+ "torch_dtype": "bfloat16",
103
+ "use_cache": true,
104
+ "use_sliding_window": false,
105
+ "vocab_size": 151936
106
+ },
107
+ "torch_dtype": "bfloat16",
108
+ "transformers_version": "4.53.3",
109
+ "user_token_id": 872,
110
+ "video_token_index": 151656,
111
+ "vision_config": {
112
+ "depth": 32,
113
+ "embed_dim": 1280,
114
+ "fullatt_block_indexes": [
115
+ 7,
116
+ 15,
117
+ 23,
118
+ 31
119
+ ],
120
+ "hidden_act": "silu",
121
+ "hidden_size": 1280,
122
+ "in_channels": 3,
123
+ "in_chans": 3,
124
+ "init_std": 0.02,
125
+ "initializer_range": 0.02,
126
+ "intermediate_size": 3420,
127
+ "model_type": "qwen2_5_omni_vision_encoder",
128
+ "num_heads": 16,
129
+ "out_hidden_size": 2048,
130
+ "patch_size": 14,
131
+ "spatial_merge_size": 2,
132
+ "spatial_patch_size": 14,
133
+ "temporal_patch_size": 2,
134
+ "tokens_per_second": 25,
135
+ "torch_dtype": "bfloat16",
136
+ "window_size": 112
137
+ },
138
+ "vision_end_token_id": 151653,
139
+ "vision_start_token_id": 151652,
140
+ "vision_token_id": 151654
141
+ }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e61f7f60ab648bff8858e66d25d621a85b19223bae69ae7e20226ac0b5ffda57
3
  size 4994841696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fcc6f29f96bb6c3dd106a61b624ce7d91d37443a9a477630712731b55dce7d8
3
  size 4994841696
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a5601eed44959c485a645366602944a8127dc5ac7fc39f576237f0ad424c858
3
  size 3790443712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31fccc98077d546f494751c617eea0ae907d5ca6fec8bed3a8eef727724c50dc
3
  size 3790443712