DavidNguyen commited on
Commit
13d8fd5
·
verified ·
1 Parent(s): 219bf08

5e32629157d341d75b196dad4ca64ac39e28fc68d4a6ee5f5634b0c3dd16aa61

Browse files
.gitattributes CHANGED
@@ -354,3 +354,5 @@ sft/665K36/Full_smoe_plus_plus/checkpoint-20791/trainer_state.json filter=lfs di
354
  sft/665K36/Full_smoe_sharev3_not_norm/checkpoint-20791/trainer_state.json filter=lfs diff=lfs merge=lfs -text
355
  sft/665K36/Full_smoe_sharev3/checkpoint-20791/trainer_state.json filter=lfs diff=lfs merge=lfs -text
356
  sft/665K36/revise_Full_smoe_sharev3/checkpoint-20791/trainer_state.json filter=lfs diff=lfs merge=lfs -text
 
 
 
354
  sft/665K36/Full_smoe_sharev3_not_norm/checkpoint-20791/trainer_state.json filter=lfs diff=lfs merge=lfs -text
355
  sft/665K36/Full_smoe_sharev3/checkpoint-20791/trainer_state.json filter=lfs diff=lfs merge=lfs -text
356
  sft/665K36/revise_Full_smoe_sharev3/checkpoint-20791/trainer_state.json filter=lfs diff=lfs merge=lfs -text
357
+ sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/infovqa_val.json filter=lfs diff=lfs merge=lfs -text
358
+ sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/mmbench_en_dev.json filter=lfs diff=lfs merge=lfs -text
sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/added_tokens.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
+ }
sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/config.json ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/pft",
3
+ "architectures": [
4
+ "LlavaPhiForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_phi3.Phi3Config",
10
+ "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
+ },
12
+ "bal_comp_loss_coef": 0.01,
13
+ "balance_loss_coef": 0.01,
14
+ "bos_token_id": 1,
15
+ "clip_smoe": true,
16
+ "diversity_loss_coef": 0.01,
17
+ "dropout": false,
18
+ "e_loss_coef": 0.001,
19
+ "embd_pdrop": 0.0,
20
+ "entropy_advance_loss": false,
21
+ "eos_token_id": 32000,
22
+ "freeze_backbone": false,
23
+ "freeze_mm_mlp_adapter": false,
24
+ "hidden_act": "silu",
25
+ "hidden_size": 3072,
26
+ "hybrid": false,
27
+ "image_aspect_ratio": "pad",
28
+ "init_weight": true,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 8192,
31
+ "is_cosine": false,
32
+ "is_norm_weight": false,
33
+ "local_rank": 0,
34
+ "loss1": "balanceloss",
35
+ "loss2": "zloss",
36
+ "luna": false,
37
+ "max_compete_in_iter": 3,
38
+ "max_position_embeddings": 131072,
39
+ "mlp_smoe": true,
40
+ "mm_hidden_size": 1152,
41
+ "mm_patch_merge_type": "flat",
42
+ "mm_projector_lr": null,
43
+ "mm_projector_type": "moe",
44
+ "mm_use_im_patch_token": false,
45
+ "mm_use_im_start_end": false,
46
+ "mm_vision_select_feature": "patch",
47
+ "mm_vision_select_layer": -2,
48
+ "mm_vision_tower": "google/siglip-so400m-patch14-224",
49
+ "model_name_or_path": "/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/pft",
50
+ "model_type": "llava_phi",
51
+ "moe_name": "smoe_sharev3",
52
+ "moe_relu_l1_reg_coeff_multiplier": 1.2,
53
+ "mp_pixel_shuffle_factor": 1,
54
+ "norm_softmax": false,
55
+ "normalization": true,
56
+ "num_attention_heads": 32,
57
+ "num_experts": 6,
58
+ "num_hidden_layers": 32,
59
+ "num_key_value_heads": 32,
60
+ "num_layers": 3,
61
+ "num_selected": 3,
62
+ "number_of_previous_tokens": 2,
63
+ "original_max_position_embeddings": 4096,
64
+ "pad_token_id": 32000,
65
+ "pretrain_mm_mlp_adapter": "/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/pft/mm_projector.bin",
66
+ "rate_compete": 0.2,
67
+ "rate_flip": 0.05,
68
+ "resid_pdrop": 0.0,
69
+ "rms_norm_eps": 1e-05,
70
+ "rope_scaling": {
71
+ "long_factor": [
72
+ 1.0800000429153442,
73
+ 1.1100000143051147,
74
+ 1.1399999856948853,
75
+ 1.340000033378601,
76
+ 1.5899999141693115,
77
+ 1.600000023841858,
78
+ 1.6200000047683716,
79
+ 2.620000123977661,
80
+ 3.2300000190734863,
81
+ 3.2300000190734863,
82
+ 4.789999961853027,
83
+ 7.400000095367432,
84
+ 7.700000286102295,
85
+ 9.09000015258789,
86
+ 12.199999809265137,
87
+ 17.670000076293945,
88
+ 24.46000099182129,
89
+ 28.57000160217285,
90
+ 30.420001983642578,
91
+ 30.840002059936523,
92
+ 32.590003967285156,
93
+ 32.93000411987305,
94
+ 42.320003509521484,
95
+ 44.96000289916992,
96
+ 50.340003967285156,
97
+ 50.45000457763672,
98
+ 57.55000305175781,
99
+ 57.93000411987305,
100
+ 58.21000289916992,
101
+ 60.1400032043457,
102
+ 62.61000442504883,
103
+ 62.62000274658203,
104
+ 62.71000289916992,
105
+ 63.1400032043457,
106
+ 63.1400032043457,
107
+ 63.77000427246094,
108
+ 63.93000411987305,
109
+ 63.96000289916992,
110
+ 63.970001220703125,
111
+ 64.02999877929688,
112
+ 64.06999969482422,
113
+ 64.08000183105469,
114
+ 64.12000274658203,
115
+ 64.41000366210938,
116
+ 64.4800033569336,
117
+ 64.51000213623047,
118
+ 64.52999877929688,
119
+ 64.83999633789062
120
+ ],
121
+ "short_factor": [
122
+ 1.0,
123
+ 1.0199999809265137,
124
+ 1.0299999713897705,
125
+ 1.0299999713897705,
126
+ 1.0499999523162842,
127
+ 1.0499999523162842,
128
+ 1.0499999523162842,
129
+ 1.0499999523162842,
130
+ 1.0499999523162842,
131
+ 1.0699999332427979,
132
+ 1.0999999046325684,
133
+ 1.1099998950958252,
134
+ 1.1599998474121094,
135
+ 1.1599998474121094,
136
+ 1.1699998378753662,
137
+ 1.2899998426437378,
138
+ 1.339999794960022,
139
+ 1.679999828338623,
140
+ 1.7899998426437378,
141
+ 1.8199998140335083,
142
+ 1.8499997854232788,
143
+ 1.8799997568130493,
144
+ 1.9099997282028198,
145
+ 1.9399996995925903,
146
+ 1.9899996519088745,
147
+ 2.0199997425079346,
148
+ 2.0199997425079346,
149
+ 2.0199997425079346,
150
+ 2.0199997425079346,
151
+ 2.0199997425079346,
152
+ 2.0199997425079346,
153
+ 2.0299997329711914,
154
+ 2.0299997329711914,
155
+ 2.0299997329711914,
156
+ 2.0299997329711914,
157
+ 2.0299997329711914,
158
+ 2.0299997329711914,
159
+ 2.0299997329711914,
160
+ 2.0299997329711914,
161
+ 2.0299997329711914,
162
+ 2.0799996852874756,
163
+ 2.0899996757507324,
164
+ 2.189999580383301,
165
+ 2.2199995517730713,
166
+ 2.5899994373321533,
167
+ 2.729999542236328,
168
+ 2.749999523162842,
169
+ 2.8399994373321533
170
+ ],
171
+ "type": "longrope"
172
+ },
173
+ "rope_theta": 10000.0,
174
+ "router_loss_coef": 0.01,
175
+ "router_theta": 0.1,
176
+ "router_z_loss_coef": 0.001,
177
+ "scales": [
178
+ 1,
179
+ 3
180
+ ],
181
+ "sliding_window": 262144,
182
+ "sparse_upcycling": true,
183
+ "std_gate": 0.02,
184
+ "strategy_train": "base",
185
+ "tie_word_embeddings": false,
186
+ "tokenizer_model_max_length": 2048,
187
+ "tokenizer_padding_side": "right",
188
+ "topk_max": 2,
189
+ "topk_min": 1,
190
+ "torch_dtype": "bfloat16",
191
+ "training": true,
192
+ "transformers_version": "4.43.0",
193
+ "tune_mm_mlp_adapter": false,
194
+ "unit_test": true,
195
+ "use_cache": false,
196
+ "use_mm_proj": true,
197
+ "use_old": false,
198
+ "version": "phi35",
199
+ "vision_tower": "google/siglip-so400m-patch14-224",
200
+ "vision_tower_dir": "/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/pft/clip.bin",
201
+ "vocab_size": 32064,
202
+ "warm_up": 0.05
203
+ }
sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 32007,
7
+ 32001,
8
+ 32000
9
+ ],
10
+ "pad_token_id": 32000,
11
+ "transformers_version": "4.43.0"
12
+ }
sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step12477
sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/ai2d.json ADDED
The diff for this file is too large to render. See raw diff
 
sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/infovqa_val.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e20481b3f09f9a284e75f4666e2dbcd3ba5edd37af17da5ffe3ce7daa956a795
3
+ size 576426958
sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/mmbench_en_dev.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e2e0c174443a675dcdefe282bd72cdcc5fd7e614aa8bfc77aca9f81cf5f0f19
3
+ size 14568322
sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/mme.json ADDED
The diff for this file is too large to render. See raw diff