jonathanli commited on
Commit
a76db8a
·
verified ·
1 Parent(s): 1ada534

Upload model

Browse files
config.json CHANGED
@@ -133,5 +133,5 @@
133
  "typical_p": 1.0,
134
  "use_bfloat16": false
135
  },
136
- "vocab_size": 129280
137
  }
 
133
  "typical_p": 1.0,
134
  "use_bfloat16": false
135
  },
136
+ "vocab_size": 32002
137
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd3d6267a9afe3b7ec6874cec9da79449328ca229deb1e6902265d30acbd422
3
- size 4985295984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0611180c257c4a596e0d10a38f0a2c810878c0794e3f3d1d3a3b3c53fa2d7432
3
+ size 4993781456
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e713af117d74206172b3bcf49ea421ededbbebc8758a1ce4b65a64377bc303a
3
- size 2360195368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9152cdce4d88e5962a825315f39f6959b4b94be1ace752346113bdc5a41f4233
3
+ size 1558880888
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 1836350336,
4
- "total_size": 7345401344
5
  },
6
  "weight_map": {
7
  "clip2_encoder.embeddings.class_embedding": "model-00002-of-00002.safetensors",
@@ -327,11 +327,11 @@
327
  "layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
328
  "layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
329
  "layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
330
- "layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
331
- "layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
332
  "layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
333
- "layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
334
- "layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
335
  "layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
336
  "layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
337
  "layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -349,50 +349,50 @@
349
  "layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
350
  "layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
351
  "layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
352
- "layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
353
- "layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
354
- "layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
355
- "layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
356
- "layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
357
- "layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
358
- "layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
359
- "layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
360
- "layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
361
- "layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
362
- "layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
363
- "layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
364
- "layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
365
- "layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
366
- "layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
367
- "layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
368
- "layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
369
- "layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
370
- "layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
371
- "layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
372
- "layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
373
- "layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
374
- "layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
375
- "layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
376
- "layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
377
- "layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
378
- "layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
379
- "layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
380
- "layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
381
- "layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
382
- "layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
383
- "layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
384
- "layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
385
  "layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
386
  "layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
387
- "layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
388
  "layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
389
  "layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
390
- "layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
391
- "layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
392
- "layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
393
- "layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
394
- "layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
395
- "layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
396
  "layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
397
  "layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
398
  "layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 1638143104,
4
+ "total_size": 6552572416
5
  },
6
  "weight_map": {
7
  "clip2_encoder.embeddings.class_embedding": "model-00002-of-00002.safetensors",
 
327
  "layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
328
  "layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
329
  "layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
330
+ "layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
331
+ "layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
332
  "layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
333
+ "layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
334
+ "layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
335
  "layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
336
  "layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
337
  "layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
349
  "layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
350
  "layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
351
  "layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
352
+ "layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
353
+ "layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
354
+ "layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
355
+ "layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
356
+ "layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
357
+ "layers.20.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
358
+ "layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
359
+ "layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
360
+ "layers.20.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
361
+ "layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
362
+ "layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
363
+ "layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
364
+ "layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
365
+ "layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
366
+ "layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
367
+ "layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
368
+ "layers.21.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
369
+ "layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
370
+ "layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
371
+ "layers.21.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
372
+ "layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
373
+ "layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
374
+ "layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
375
+ "layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
376
+ "layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
377
+ "layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
378
+ "layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
379
+ "layers.22.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
380
+ "layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
381
+ "layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
382
+ "layers.22.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
383
+ "layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
384
+ "layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
385
  "layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
386
  "layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
387
+ "layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
388
  "layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
389
  "layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
390
+ "layers.23.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
391
+ "layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
392
+ "layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
393
+ "layers.23.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
394
+ "layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
395
+ "layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
396
  "layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
397
  "layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
398
  "layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",