DavidNguyen commited on
Commit
b990fbd
·
verified ·
1 Parent(s): b6aa927

Delete sft/moce/checkpoint-13312/config.json

Browse files
Files changed (1) hide show
  1. sft/moce/checkpoint-13312/config.json +0 -193
sft/moce/checkpoint-13312/config.json DELETED
@@ -1,193 +0,0 @@
1
- {
2
- "_name_or_path": "/cm/archive/namnv78/checkpoints/phi35-siglip224/pft",
3
- "architectures": [
4
- "LlavaPhiForCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "auto_map": {
9
- "AutoConfig": "configuration_phi3.Phi3Config",
10
- "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM"
11
- },
12
- "balance_loss_coef": 0.01,
13
- "bos_token_id": 1,
14
- "clip_smoe": true,
15
- "diversity_loss_coef": 0.01,
16
- "dropout": false,
17
- "e_loss_coef": 0.001,
18
- "embd_pdrop": 0.0,
19
- "entropy_advance_loss": false,
20
- "eos_token_id": 32000,
21
- "freeze_backbone": false,
22
- "freeze_mm_mlp_adapter": false,
23
- "hidden_act": "silu",
24
- "hidden_size": 3072,
25
- "image_aspect_ratio": "pad",
26
- "initializer_range": 0.02,
27
- "intermediate_size": 8192,
28
- "local_rank": 0,
29
- "loss1": "balanceloss",
30
- "loss2": "zloss",
31
- "luna": false,
32
- "max_position_embeddings": 131072,
33
- "mlp_smoe": true,
34
- "mm_hidden_size": 1152,
35
- "mm_patch_merge_type": "flat",
36
- "mm_projector_lr": null,
37
- "mm_projector_type": "moe",
38
- "mm_use_im_patch_token": false,
39
- "mm_use_im_start_end": false,
40
- "mm_vision_select_feature": "patch",
41
- "mm_vision_select_layer": -2,
42
- "mm_vision_tower": "google/siglip-so400m-patch14-224",
43
- "model_name_or_path": "/cm/archive/namnv78/checkpoints/phi35-siglip224/pft",
44
- "model_type": "llava_phi",
45
- "moe_name": "competesmoev30",
46
- "normalization": true,
47
- "num_attention_heads": 32,
48
- "num_experts": 4,
49
- "num_hidden_layers": 32,
50
- "num_key_value_heads": 32,
51
- "num_layers": 3,
52
- "num_selected": 2,
53
- "number_of_previous_tokens": 2,
54
- "original_max_position_embeddings": 4096,
55
- "pad_token_id": 32000,
56
- "pretrain_mm_mlp_adapter": "/cm/archive/namnv78/checkpoints/phi35-siglip224/pft/mm_projector.bin",
57
- "rate_compete": 0.2,
58
- "rate_flip": 0.07,
59
- "resid_pdrop": 0.0,
60
- "rms_norm_eps": 1e-05,
61
- "rope_scaling": {
62
- "long_factor": [
63
- 1.0800000429153442,
64
- 1.1100000143051147,
65
- 1.1399999856948853,
66
- 1.340000033378601,
67
- 1.5899999141693115,
68
- 1.600000023841858,
69
- 1.6200000047683716,
70
- 2.620000123977661,
71
- 3.2300000190734863,
72
- 3.2300000190734863,
73
- 4.789999961853027,
74
- 7.400000095367432,
75
- 7.700000286102295,
76
- 9.09000015258789,
77
- 12.199999809265137,
78
- 17.670000076293945,
79
- 24.46000099182129,
80
- 28.57000160217285,
81
- 30.420001983642578,
82
- 30.840002059936523,
83
- 32.590003967285156,
84
- 32.93000411987305,
85
- 42.320003509521484,
86
- 44.96000289916992,
87
- 50.340003967285156,
88
- 50.45000457763672,
89
- 57.55000305175781,
90
- 57.93000411987305,
91
- 58.21000289916992,
92
- 60.1400032043457,
93
- 62.61000442504883,
94
- 62.62000274658203,
95
- 62.71000289916992,
96
- 63.1400032043457,
97
- 63.1400032043457,
98
- 63.77000427246094,
99
- 63.93000411987305,
100
- 63.96000289916992,
101
- 63.970001220703125,
102
- 64.02999877929688,
103
- 64.06999969482422,
104
- 64.08000183105469,
105
- 64.12000274658203,
106
- 64.41000366210938,
107
- 64.4800033569336,
108
- 64.51000213623047,
109
- 64.52999877929688,
110
- 64.83999633789062
111
- ],
112
- "short_factor": [
113
- 1.0,
114
- 1.0199999809265137,
115
- 1.0299999713897705,
116
- 1.0299999713897705,
117
- 1.0499999523162842,
118
- 1.0499999523162842,
119
- 1.0499999523162842,
120
- 1.0499999523162842,
121
- 1.0499999523162842,
122
- 1.0699999332427979,
123
- 1.0999999046325684,
124
- 1.1099998950958252,
125
- 1.1599998474121094,
126
- 1.1599998474121094,
127
- 1.1699998378753662,
128
- 1.2899998426437378,
129
- 1.339999794960022,
130
- 1.679999828338623,
131
- 1.7899998426437378,
132
- 1.8199998140335083,
133
- 1.8499997854232788,
134
- 1.8799997568130493,
135
- 1.9099997282028198,
136
- 1.9399996995925903,
137
- 1.9899996519088745,
138
- 2.0199997425079346,
139
- 2.0199997425079346,
140
- 2.0199997425079346,
141
- 2.0199997425079346,
142
- 2.0199997425079346,
143
- 2.0199997425079346,
144
- 2.0299997329711914,
145
- 2.0299997329711914,
146
- 2.0299997329711914,
147
- 2.0299997329711914,
148
- 2.0299997329711914,
149
- 2.0299997329711914,
150
- 2.0299997329711914,
151
- 2.0299997329711914,
152
- 2.0299997329711914,
153
- 2.0799996852874756,
154
- 2.0899996757507324,
155
- 2.189999580383301,
156
- 2.2199995517730713,
157
- 2.5899994373321533,
158
- 2.729999542236328,
159
- 2.749999523162842,
160
- 2.8399994373321533
161
- ],
162
- "type": "longrope"
163
- },
164
- "rope_theta": 10000.0,
165
- "router_loss_coef": 0.01,
166
- "router_theta": 0.1,
167
- "router_z_loss_coef": 0.001,
168
- "scales": [
169
- 1,
170
- 3
171
- ],
172
- "sliding_window": 262144,
173
- "sparse_upcycling": true,
174
- "strategy_train": "base",
175
- "tie_word_embeddings": false,
176
- "tokenizer_model_max_length": 2048,
177
- "tokenizer_padding_side": "right",
178
- "topk_max": 2,
179
- "topk_min": 1,
180
- "torch_dtype": "bfloat16",
181
- "training": true,
182
- "transformers_version": "4.43.0",
183
- "tune_mm_mlp_adapter": false,
184
- "unit_test": true,
185
- "use_cache": false,
186
- "use_mm_proj": true,
187
- "use_old": false,
188
- "version": "phi35",
189
- "vision_tower": "google/siglip-so400m-patch14-224",
190
- "vision_tower_dir": "/cm/archive/namnv78/checkpoints/phi35-siglip224/pft/clip.bin",
191
- "vocab_size": 32064,
192
- "warm_up": 0.05
193
- }