array commited on
Commit
2942cc5
·
verified ·
1 Parent(s): 8a6f122

Upload folder using huggingface_hub

Browse files
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
config.json CHANGED
@@ -1,10 +1,11 @@
1
  {
 
2
  "architectures": [
3
  "Qwen2_5_VLForConditionalGeneration"
4
  ],
5
  "attention_dropout": 0.0,
6
  "auto_map": {
7
- "AutoModelForVision2Seq": "mmlatentdiscrete_qwen_vl.Qwen2_5_VLForConditionalGeneration"
8
  },
9
  "bos_token_id": 151643,
10
  "compress_strategy": "average",
@@ -40,111 +41,23 @@
40
  "rope_theta": 1000000.0,
41
  "sliding_window": 32768,
42
  "stage": "stage2",
43
- "text_config": {
44
- "_name_or_path": "/usr/local/google/home/arijitray/Documents/final_models/qwen_vl_mulldiscrete_grpo200",
45
- "architectures": [
46
- "Qwen2_5_VLForConditionalGeneration"
47
- ],
48
- "attention_dropout": 0.0,
49
- "bos_token_id": 151643,
50
- "compress_strategy": "average",
51
- "dtype": "bfloat16",
52
- "eos_token_id": 151645,
53
- "hidden_act": "silu",
54
- "hidden_size": 3584,
55
- "imagelatent_token_id": 151668,
56
- "initializer_range": 0.02,
57
- "intermediate_size": 18944,
58
- "latent_end_id": 151667,
59
- "latent_sample_temperature": 2,
60
- "latent_size": 4,
61
- "latent_start_id": 151666,
62
- "latent_token_id": 151665,
63
- "layer_types": [
64
- "full_attention",
65
- "full_attention",
66
- "full_attention",
67
- "full_attention",
68
- "full_attention",
69
- "full_attention",
70
- "full_attention",
71
- "full_attention",
72
- "full_attention",
73
- "full_attention",
74
- "full_attention",
75
- "full_attention",
76
- "full_attention",
77
- "full_attention",
78
- "full_attention",
79
- "full_attention",
80
- "full_attention",
81
- "full_attention",
82
- "full_attention",
83
- "full_attention",
84
- "full_attention",
85
- "full_attention",
86
- "full_attention",
87
- "full_attention",
88
- "full_attention",
89
- "full_attention",
90
- "full_attention",
91
- "full_attention"
92
- ],
93
- "max_position_embeddings": 128000,
94
- "max_window_layers": 28,
95
- "model_type": "qwen2_5_vl_text",
96
- "num_attention_heads": 28,
97
- "num_hidden_layers": 28,
98
- "num_key_value_heads": 4,
99
- "rms_norm_eps": 1e-06,
100
- "rope_scaling": {
101
- "mrope_section": [
102
- 16,
103
- 24,
104
- 24
105
- ],
106
- "rope_type": "default",
107
- "type": "default"
108
- },
109
- "rope_theta": 1000000.0,
110
- "sliding_window": null,
111
- "stage": "stage2",
112
- "use_cache": true,
113
- "use_latent_projection": false,
114
- "use_sliding_window": false,
115
- "vision_token_id": 151654,
116
- "vocab_size": 151669
117
- },
118
  "tie_word_embeddings": false,
119
- "transformers_version": "4.57.3",
 
120
  "use_cache": true,
121
  "use_latent_projection": false,
122
  "use_sliding_window": false,
123
  "video_token_id": 151656,
124
  "vision_config": {
125
- "depth": 32,
126
  "dtype": "bfloat16",
127
- "fullatt_block_indexes": [
128
- 7,
129
- 15,
130
- 23,
131
- 31
132
- ],
133
- "hidden_act": "silu",
134
  "hidden_size": 1280,
135
- "in_channels": 3,
136
  "in_chans": 3,
137
  "initializer_range": 0.02,
138
- "intermediate_size": 3420,
139
  "model_type": "qwen2_5_vl",
140
- "num_heads": 16,
141
- "out_hidden_size": 3584,
142
- "patch_size": 14,
143
- "spatial_merge_size": 2,
144
  "spatial_patch_size": 14,
145
- "temporal_patch_size": 2,
146
  "tokens_per_second": 2,
147
- "window_size": 112
148
  },
149
  "vision_end_token_id": 151653,
150
  "vision_start_token_id": 151652,
 
1
  {
2
+ "_name_or_path": "array/Qwen2.5-VL-MullGRPO",
3
  "architectures": [
4
  "Qwen2_5_VLForConditionalGeneration"
5
  ],
6
  "attention_dropout": 0.0,
7
  "auto_map": {
8
+ "AutoModelForVision2Seq": "array/Qwen2.5-VL-MullGRPO--mmlatentdiscrete_qwen_vl.Qwen2_5_VLForConditionalGeneration"
9
  },
10
  "bos_token_id": 151643,
11
  "compress_strategy": "average",
 
41
  "rope_theta": 1000000.0,
42
  "sliding_window": 32768,
43
  "stage": "stage2",
44
+ "text_config": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  "tie_word_embeddings": false,
46
+ "torch_dtype": "bfloat16",
47
+ "transformers_version": "4.49.0.dev0",
48
  "use_cache": true,
49
  "use_latent_projection": false,
50
  "use_sliding_window": false,
51
  "video_token_id": 151656,
52
  "vision_config": {
 
53
  "dtype": "bfloat16",
 
 
 
 
 
 
 
54
  "hidden_size": 1280,
 
55
  "in_chans": 3,
56
  "initializer_range": 0.02,
 
57
  "model_type": "qwen2_5_vl",
 
 
 
 
58
  "spatial_patch_size": 14,
 
59
  "tokens_per_second": 2,
60
+ "torch_dtype": "bfloat16"
61
  },
62
  "vision_end_token_id": 151653,
63
  "vision_start_token_id": 151652,
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  "pad_token_id": 151643,
9
  "repetition_penalty": 1.05,
10
  "temperature": 1e-06,
11
- "transformers_version": "4.57.3"
12
  }
 
8
  "pad_token_id": 151643,
9
  "repetition_penalty": 1.05,
10
  "temperature": 1e-06,
11
+ "transformers_version": "4.49.0.dev0"
12
  }
model.safetensors.index.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 8289335296,
4
  "total_size": 16578670592
5
  },
6
  "weight_map": {
 
1
  {
2
  "metadata": {
 
3
  "total_size": 16578670592
4
  },
5
  "weight_map": {
preprocessor_config.json CHANGED
@@ -8,7 +8,7 @@
8
  0.4578275,
9
  0.40821073
10
  ],
11
- "image_processor_type": "Qwen2VLImageProcessor",
12
  "image_std": [
13
  0.26862954,
14
  0.26130258,
 
8
  0.4578275,
9
  0.40821073
10
  ],
11
+ "image_processor_type": "Qwen2_5_VLImageProcessor",
12
  "image_std": [
13
  0.26862954,
14
  0.26130258,
tokenizer_config.json CHANGED
@@ -227,6 +227,7 @@
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
 
230
  "clean_up_tokenization_spaces": false,
231
  "eos_token": "<|im_end|>",
232
  "errors": "replace",
 
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
230
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
231
  "clean_up_tokenization_spaces": false,
232
  "eos_token": "<|im_end|>",
233
  "errors": "replace",