spicyneuron commited on
Commit
816d837
·
verified ·
1 Parent(s): 7b1a697

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -58,19 +58,19 @@
58
  "mode": "affine"
59
  },
60
  "model.layers.0.mlp.switch_mlp.gate_proj": {
61
- "group_size": 32,
62
- "bits": 4,
63
- "mode": "mxfp4"
64
  },
65
  "model.layers.0.mlp.switch_mlp.up_proj": {
66
- "group_size": 32,
67
- "bits": 4,
68
- "mode": "mxfp4"
69
  },
70
  "model.layers.0.mlp.switch_mlp.down_proj": {
71
- "group_size": 32,
72
- "bits": 4,
73
- "mode": "mxfp4"
74
  },
75
  "model.layers.0.mlp.shared_expert.gate_proj": {
76
  "group_size": 64,
@@ -103,19 +103,19 @@
103
  "mode": "affine"
104
  },
105
  "model.layers.1.mlp.switch_mlp.gate_proj": {
106
- "group_size": 32,
107
- "bits": 4,
108
- "mode": "mxfp4"
109
  },
110
  "model.layers.1.mlp.switch_mlp.up_proj": {
111
- "group_size": 32,
112
- "bits": 4,
113
- "mode": "mxfp4"
114
  },
115
  "model.layers.1.mlp.switch_mlp.down_proj": {
116
- "group_size": 32,
117
- "bits": 4,
118
- "mode": "mxfp4"
119
  },
120
  "model.layers.1.mlp.shared_expert.gate_proj": {
121
  "group_size": 64,
@@ -148,19 +148,19 @@
148
  "mode": "affine"
149
  },
150
  "model.layers.2.mlp.switch_mlp.gate_proj": {
151
- "group_size": 32,
152
- "bits": 4,
153
- "mode": "mxfp4"
154
  },
155
  "model.layers.2.mlp.switch_mlp.up_proj": {
156
- "group_size": 32,
157
- "bits": 4,
158
- "mode": "mxfp4"
159
  },
160
  "model.layers.2.mlp.switch_mlp.down_proj": {
161
- "group_size": 32,
162
- "bits": 4,
163
- "mode": "mxfp4"
164
  },
165
  "model.layers.2.mlp.shared_expert.gate_proj": {
166
  "group_size": 64,
@@ -2138,19 +2138,19 @@
2138
  "mode": "affine"
2139
  },
2140
  "model.layers.45.mlp.switch_mlp.gate_proj": {
2141
- "group_size": 32,
2142
- "bits": 4,
2143
- "mode": "mxfp4"
2144
  },
2145
  "model.layers.45.mlp.switch_mlp.up_proj": {
2146
- "group_size": 32,
2147
- "bits": 4,
2148
- "mode": "mxfp4"
2149
  },
2150
  "model.layers.45.mlp.switch_mlp.down_proj": {
2151
- "group_size": 32,
2152
- "bits": 4,
2153
- "mode": "mxfp4"
2154
  },
2155
  "model.layers.45.mlp.shared_expert.gate_proj": {
2156
  "group_size": 64,
@@ -2183,19 +2183,19 @@
2183
  "mode": "affine"
2184
  },
2185
  "model.layers.46.mlp.switch_mlp.gate_proj": {
2186
- "group_size": 32,
2187
- "bits": 4,
2188
- "mode": "mxfp4"
2189
  },
2190
  "model.layers.46.mlp.switch_mlp.up_proj": {
2191
- "group_size": 32,
2192
- "bits": 4,
2193
- "mode": "mxfp4"
2194
  },
2195
  "model.layers.46.mlp.switch_mlp.down_proj": {
2196
- "group_size": 32,
2197
- "bits": 4,
2198
- "mode": "mxfp4"
2199
  },
2200
  "model.layers.46.mlp.shared_expert.gate_proj": {
2201
  "group_size": 64,
@@ -2233,19 +2233,19 @@
2233
  "mode": "affine"
2234
  },
2235
  "model.layers.47.mlp.switch_mlp.gate_proj": {
2236
- "group_size": 32,
2237
- "bits": 4,
2238
- "mode": "mxfp4"
2239
  },
2240
  "model.layers.47.mlp.switch_mlp.up_proj": {
2241
- "group_size": 32,
2242
- "bits": 4,
2243
- "mode": "mxfp4"
2244
  },
2245
  "model.layers.47.mlp.switch_mlp.down_proj": {
2246
- "group_size": 32,
2247
- "bits": 4,
2248
- "mode": "mxfp4"
2249
  },
2250
  "model.layers.47.mlp.shared_expert.gate_proj": {
2251
  "group_size": 64,
@@ -2293,19 +2293,19 @@
2293
  "mode": "affine"
2294
  },
2295
  "model.layers.0.mlp.switch_mlp.gate_proj": {
2296
- "group_size": 32,
2297
- "bits": 4,
2298
- "mode": "mxfp4"
2299
  },
2300
  "model.layers.0.mlp.switch_mlp.up_proj": {
2301
- "group_size": 32,
2302
- "bits": 4,
2303
- "mode": "mxfp4"
2304
  },
2305
  "model.layers.0.mlp.switch_mlp.down_proj": {
2306
- "group_size": 32,
2307
- "bits": 4,
2308
- "mode": "mxfp4"
2309
  },
2310
  "model.layers.0.mlp.shared_expert.gate_proj": {
2311
  "group_size": 64,
@@ -2338,19 +2338,19 @@
2338
  "mode": "affine"
2339
  },
2340
  "model.layers.1.mlp.switch_mlp.gate_proj": {
2341
- "group_size": 32,
2342
- "bits": 4,
2343
- "mode": "mxfp4"
2344
  },
2345
  "model.layers.1.mlp.switch_mlp.up_proj": {
2346
- "group_size": 32,
2347
- "bits": 4,
2348
- "mode": "mxfp4"
2349
  },
2350
  "model.layers.1.mlp.switch_mlp.down_proj": {
2351
- "group_size": 32,
2352
- "bits": 4,
2353
- "mode": "mxfp4"
2354
  },
2355
  "model.layers.1.mlp.shared_expert.gate_proj": {
2356
  "group_size": 64,
@@ -2383,19 +2383,19 @@
2383
  "mode": "affine"
2384
  },
2385
  "model.layers.2.mlp.switch_mlp.gate_proj": {
2386
- "group_size": 32,
2387
- "bits": 4,
2388
- "mode": "mxfp4"
2389
  },
2390
  "model.layers.2.mlp.switch_mlp.up_proj": {
2391
- "group_size": 32,
2392
- "bits": 4,
2393
- "mode": "mxfp4"
2394
  },
2395
  "model.layers.2.mlp.switch_mlp.down_proj": {
2396
- "group_size": 32,
2397
- "bits": 4,
2398
- "mode": "mxfp4"
2399
  },
2400
  "model.layers.2.mlp.shared_expert.gate_proj": {
2401
  "group_size": 64,
@@ -4373,19 +4373,19 @@
4373
  "mode": "affine"
4374
  },
4375
  "model.layers.45.mlp.switch_mlp.gate_proj": {
4376
- "group_size": 32,
4377
- "bits": 4,
4378
- "mode": "mxfp4"
4379
  },
4380
  "model.layers.45.mlp.switch_mlp.up_proj": {
4381
- "group_size": 32,
4382
- "bits": 4,
4383
- "mode": "mxfp4"
4384
  },
4385
  "model.layers.45.mlp.switch_mlp.down_proj": {
4386
- "group_size": 32,
4387
- "bits": 4,
4388
- "mode": "mxfp4"
4389
  },
4390
  "model.layers.45.mlp.shared_expert.gate_proj": {
4391
  "group_size": 64,
@@ -4418,19 +4418,19 @@
4418
  "mode": "affine"
4419
  },
4420
  "model.layers.46.mlp.switch_mlp.gate_proj": {
4421
- "group_size": 32,
4422
- "bits": 4,
4423
- "mode": "mxfp4"
4424
  },
4425
  "model.layers.46.mlp.switch_mlp.up_proj": {
4426
- "group_size": 32,
4427
- "bits": 4,
4428
- "mode": "mxfp4"
4429
  },
4430
  "model.layers.46.mlp.switch_mlp.down_proj": {
4431
- "group_size": 32,
4432
- "bits": 4,
4433
- "mode": "mxfp4"
4434
  },
4435
  "model.layers.46.mlp.shared_expert.gate_proj": {
4436
  "group_size": 64,
@@ -4468,19 +4468,19 @@
4468
  "mode": "affine"
4469
  },
4470
  "model.layers.47.mlp.switch_mlp.gate_proj": {
4471
- "group_size": 32,
4472
- "bits": 4,
4473
- "mode": "mxfp4"
4474
  },
4475
  "model.layers.47.mlp.switch_mlp.up_proj": {
4476
- "group_size": 32,
4477
- "bits": 4,
4478
- "mode": "mxfp4"
4479
  },
4480
  "model.layers.47.mlp.switch_mlp.down_proj": {
4481
- "group_size": 32,
4482
- "bits": 4,
4483
- "mode": "mxfp4"
4484
  },
4485
  "model.layers.47.mlp.shared_expert.gate_proj": {
4486
  "group_size": 64,
 
58
  "mode": "affine"
59
  },
60
  "model.layers.0.mlp.switch_mlp.gate_proj": {
61
+ "group_size": 64,
62
+ "bits": 8,
63
+ "mode": "affine"
64
  },
65
  "model.layers.0.mlp.switch_mlp.up_proj": {
66
+ "group_size": 64,
67
+ "bits": 8,
68
+ "mode": "affine"
69
  },
70
  "model.layers.0.mlp.switch_mlp.down_proj": {
71
+ "group_size": 64,
72
+ "bits": 8,
73
+ "mode": "affine"
74
  },
75
  "model.layers.0.mlp.shared_expert.gate_proj": {
76
  "group_size": 64,
 
103
  "mode": "affine"
104
  },
105
  "model.layers.1.mlp.switch_mlp.gate_proj": {
106
+ "group_size": 64,
107
+ "bits": 8,
108
+ "mode": "affine"
109
  },
110
  "model.layers.1.mlp.switch_mlp.up_proj": {
111
+ "group_size": 64,
112
+ "bits": 8,
113
+ "mode": "affine"
114
  },
115
  "model.layers.1.mlp.switch_mlp.down_proj": {
116
+ "group_size": 64,
117
+ "bits": 8,
118
+ "mode": "affine"
119
  },
120
  "model.layers.1.mlp.shared_expert.gate_proj": {
121
  "group_size": 64,
 
148
  "mode": "affine"
149
  },
150
  "model.layers.2.mlp.switch_mlp.gate_proj": {
151
+ "group_size": 64,
152
+ "bits": 8,
153
+ "mode": "affine"
154
  },
155
  "model.layers.2.mlp.switch_mlp.up_proj": {
156
+ "group_size": 64,
157
+ "bits": 8,
158
+ "mode": "affine"
159
  },
160
  "model.layers.2.mlp.switch_mlp.down_proj": {
161
+ "group_size": 64,
162
+ "bits": 8,
163
+ "mode": "affine"
164
  },
165
  "model.layers.2.mlp.shared_expert.gate_proj": {
166
  "group_size": 64,
 
2138
  "mode": "affine"
2139
  },
2140
  "model.layers.45.mlp.switch_mlp.gate_proj": {
2141
+ "group_size": 64,
2142
+ "bits": 8,
2143
+ "mode": "affine"
2144
  },
2145
  "model.layers.45.mlp.switch_mlp.up_proj": {
2146
+ "group_size": 64,
2147
+ "bits": 8,
2148
+ "mode": "affine"
2149
  },
2150
  "model.layers.45.mlp.switch_mlp.down_proj": {
2151
+ "group_size": 64,
2152
+ "bits": 8,
2153
+ "mode": "affine"
2154
  },
2155
  "model.layers.45.mlp.shared_expert.gate_proj": {
2156
  "group_size": 64,
 
2183
  "mode": "affine"
2184
  },
2185
  "model.layers.46.mlp.switch_mlp.gate_proj": {
2186
+ "group_size": 64,
2187
+ "bits": 8,
2188
+ "mode": "affine"
2189
  },
2190
  "model.layers.46.mlp.switch_mlp.up_proj": {
2191
+ "group_size": 64,
2192
+ "bits": 8,
2193
+ "mode": "affine"
2194
  },
2195
  "model.layers.46.mlp.switch_mlp.down_proj": {
2196
+ "group_size": 64,
2197
+ "bits": 8,
2198
+ "mode": "affine"
2199
  },
2200
  "model.layers.46.mlp.shared_expert.gate_proj": {
2201
  "group_size": 64,
 
2233
  "mode": "affine"
2234
  },
2235
  "model.layers.47.mlp.switch_mlp.gate_proj": {
2236
+ "group_size": 64,
2237
+ "bits": 8,
2238
+ "mode": "affine"
2239
  },
2240
  "model.layers.47.mlp.switch_mlp.up_proj": {
2241
+ "group_size": 64,
2242
+ "bits": 8,
2243
+ "mode": "affine"
2244
  },
2245
  "model.layers.47.mlp.switch_mlp.down_proj": {
2246
+ "group_size": 64,
2247
+ "bits": 8,
2248
+ "mode": "affine"
2249
  },
2250
  "model.layers.47.mlp.shared_expert.gate_proj": {
2251
  "group_size": 64,
 
2293
  "mode": "affine"
2294
  },
2295
  "model.layers.0.mlp.switch_mlp.gate_proj": {
2296
+ "group_size": 64,
2297
+ "bits": 8,
2298
+ "mode": "affine"
2299
  },
2300
  "model.layers.0.mlp.switch_mlp.up_proj": {
2301
+ "group_size": 64,
2302
+ "bits": 8,
2303
+ "mode": "affine"
2304
  },
2305
  "model.layers.0.mlp.switch_mlp.down_proj": {
2306
+ "group_size": 64,
2307
+ "bits": 8,
2308
+ "mode": "affine"
2309
  },
2310
  "model.layers.0.mlp.shared_expert.gate_proj": {
2311
  "group_size": 64,
 
2338
  "mode": "affine"
2339
  },
2340
  "model.layers.1.mlp.switch_mlp.gate_proj": {
2341
+ "group_size": 64,
2342
+ "bits": 8,
2343
+ "mode": "affine"
2344
  },
2345
  "model.layers.1.mlp.switch_mlp.up_proj": {
2346
+ "group_size": 64,
2347
+ "bits": 8,
2348
+ "mode": "affine"
2349
  },
2350
  "model.layers.1.mlp.switch_mlp.down_proj": {
2351
+ "group_size": 64,
2352
+ "bits": 8,
2353
+ "mode": "affine"
2354
  },
2355
  "model.layers.1.mlp.shared_expert.gate_proj": {
2356
  "group_size": 64,
 
2383
  "mode": "affine"
2384
  },
2385
  "model.layers.2.mlp.switch_mlp.gate_proj": {
2386
+ "group_size": 64,
2387
+ "bits": 8,
2388
+ "mode": "affine"
2389
  },
2390
  "model.layers.2.mlp.switch_mlp.up_proj": {
2391
+ "group_size": 64,
2392
+ "bits": 8,
2393
+ "mode": "affine"
2394
  },
2395
  "model.layers.2.mlp.switch_mlp.down_proj": {
2396
+ "group_size": 64,
2397
+ "bits": 8,
2398
+ "mode": "affine"
2399
  },
2400
  "model.layers.2.mlp.shared_expert.gate_proj": {
2401
  "group_size": 64,
 
4373
  "mode": "affine"
4374
  },
4375
  "model.layers.45.mlp.switch_mlp.gate_proj": {
4376
+ "group_size": 64,
4377
+ "bits": 8,
4378
+ "mode": "affine"
4379
  },
4380
  "model.layers.45.mlp.switch_mlp.up_proj": {
4381
+ "group_size": 64,
4382
+ "bits": 8,
4383
+ "mode": "affine"
4384
  },
4385
  "model.layers.45.mlp.switch_mlp.down_proj": {
4386
+ "group_size": 64,
4387
+ "bits": 8,
4388
+ "mode": "affine"
4389
  },
4390
  "model.layers.45.mlp.shared_expert.gate_proj": {
4391
  "group_size": 64,
 
4418
  "mode": "affine"
4419
  },
4420
  "model.layers.46.mlp.switch_mlp.gate_proj": {
4421
+ "group_size": 64,
4422
+ "bits": 8,
4423
+ "mode": "affine"
4424
  },
4425
  "model.layers.46.mlp.switch_mlp.up_proj": {
4426
+ "group_size": 64,
4427
+ "bits": 8,
4428
+ "mode": "affine"
4429
  },
4430
  "model.layers.46.mlp.switch_mlp.down_proj": {
4431
+ "group_size": 64,
4432
+ "bits": 8,
4433
+ "mode": "affine"
4434
  },
4435
  "model.layers.46.mlp.shared_expert.gate_proj": {
4436
  "group_size": 64,
 
4468
  "mode": "affine"
4469
  },
4470
  "model.layers.47.mlp.switch_mlp.gate_proj": {
4471
+ "group_size": 64,
4472
+ "bits": 8,
4473
+ "mode": "affine"
4474
  },
4475
  "model.layers.47.mlp.switch_mlp.up_proj": {
4476
+ "group_size": 64,
4477
+ "bits": 8,
4478
+ "mode": "affine"
4479
  },
4480
  "model.layers.47.mlp.switch_mlp.down_proj": {
4481
+ "group_size": 64,
4482
+ "bits": 8,
4483
+ "mode": "affine"
4484
  },
4485
  "model.layers.47.mlp.shared_expert.gate_proj": {
4486
  "group_size": 64,
model-00001-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:025cd7d3e50ab209a584b3ad632607e45c188f5c75ecdf4e597fb932f731f464
3
+ size 5014606025
model-00002-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50a52bd1e9ffc27c552c34162a31a2c445fb890b0f37ae6309cf34f579ff00d3
3
+ size 5367924768
model-00003-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf66e1683f5c113651cb652bf443eec26190a4dc74b5ce894b06205c87fb44d
3
+ size 5358036503
model-00004-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92435ba26a5125967acf7b39e4673b6e99188f37925e5ba20461853599fbac6a
3
+ size 5367925112
model-00005-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53dd8b71226761ca4c7ec8b81fab92c91c468da5096f8bd9c88ea75b68024d99
3
+ size 5106378245
model-00006-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45760355537b58e26b86158611634e5d05cbea5c001c16c5eb1448fecee4f946
3
+ size 5367925062
model-00007-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d731f941d032dc8ed1ecefd3262e42bc87a107fea33143502e0f42513179b573
3
+ size 5363794783
model-00008-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da330165b582d66c52a96db16b73a9518331fcc655ec0a65fbf7bd5686722361
3
+ size 5367924970
model-00009-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be8f93744db38908a63680519d9aceb0c9abedc40974234188b560d1e5c3f873
3
+ size 4976683958
model-00010-of-00010.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcac6696d44719fc62c971ca921430abec3c8d483dd0c9fe3679a3657d6ee9cc
3
+ size 1474816299
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff