anotheruserishere atamb123 commited on
Commit
8e7e199
·
0 Parent(s):

Duplicate from jane-street/dormant-model-3

Browse files

Co-authored-by: Ayush Tambde <atamb123@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +35 -0
  2. config.json +67 -0
  3. model-00001-of-00135.safetensors +3 -0
  4. model-00002-of-00135.safetensors +3 -0
  5. model-00003-of-00135.safetensors +3 -0
  6. model-00004-of-00135.safetensors +3 -0
  7. model-00005-of-00135.safetensors +3 -0
  8. model-00006-of-00135.safetensors +3 -0
  9. model-00007-of-00135.safetensors +3 -0
  10. model-00008-of-00135.safetensors +3 -0
  11. model-00009-of-00135.safetensors +3 -0
  12. model-00010-of-00135.safetensors +3 -0
  13. model-00011-of-00135.safetensors +3 -0
  14. model-00012-of-00135.safetensors +3 -0
  15. model-00013-of-00135.safetensors +3 -0
  16. model-00014-of-00135.safetensors +3 -0
  17. model-00015-of-00135.safetensors +3 -0
  18. model-00016-of-00135.safetensors +3 -0
  19. model-00017-of-00135.safetensors +3 -0
  20. model-00018-of-00135.safetensors +3 -0
  21. model-00019-of-00135.safetensors +3 -0
  22. model-00020-of-00135.safetensors +3 -0
  23. model-00021-of-00135.safetensors +3 -0
  24. model-00022-of-00135.safetensors +3 -0
  25. model-00023-of-00135.safetensors +3 -0
  26. model-00024-of-00135.safetensors +3 -0
  27. model-00025-of-00135.safetensors +3 -0
  28. model-00026-of-00135.safetensors +3 -0
  29. model-00027-of-00135.safetensors +3 -0
  30. model-00028-of-00135.safetensors +3 -0
  31. model-00029-of-00135.safetensors +3 -0
  32. model-00030-of-00135.safetensors +3 -0
  33. model-00031-of-00135.safetensors +3 -0
  34. model-00032-of-00135.safetensors +3 -0
  35. model-00033-of-00135.safetensors +3 -0
  36. model-00034-of-00135.safetensors +3 -0
  37. model-00035-of-00135.safetensors +3 -0
  38. model-00036-of-00135.safetensors +3 -0
  39. model-00037-of-00135.safetensors +3 -0
  40. model-00038-of-00135.safetensors +3 -0
  41. model-00039-of-00135.safetensors +3 -0
  42. model-00040-of-00135.safetensors +3 -0
  43. model-00041-of-00135.safetensors +3 -0
  44. model-00042-of-00135.safetensors +3 -0
  45. model-00043-of-00135.safetensors +3 -0
  46. model-00044-of-00135.safetensors +3 -0
  47. model-00045-of-00135.safetensors +3 -0
  48. model-00046-of-00135.safetensors +3 -0
  49. model-00047-of-00135.safetensors +3 -0
  50. model-00048-of-00135.safetensors +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "bos_token_id": 0,
13
+ "eos_token_id": 1,
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 7168,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 18432,
20
+ "kv_lora_rank": 512,
21
+ "max_position_embeddings": 163840,
22
+ "model_type": "deepseek_v3",
23
+ "moe_intermediate_size": 2048,
24
+ "moe_layer_freq": 1,
25
+ "n_group": 8,
26
+ "n_routed_experts": 256,
27
+ "n_shared_experts": 1,
28
+ "norm_topk_prob": true,
29
+ "num_attention_heads": 128,
30
+ "num_experts_per_tok": 8,
31
+ "num_hidden_layers": 61,
32
+ "num_key_value_heads": 128,
33
+ "num_nextn_predict_layers": 1,
34
+ "q_lora_rank": 1536,
35
+ "qk_nope_head_dim": 128,
36
+ "qk_rope_head_dim": 64,
37
+ "quantization_config": {
38
+ "activation_scheme": "dynamic",
39
+ "fmt": "e4m3",
40
+ "quant_method": "fp8",
41
+ "weight_block_size": [
42
+ 128,
43
+ 128
44
+ ]
45
+ },
46
+ "rms_norm_eps": 1e-06,
47
+ "rope_scaling": {
48
+ "beta_fast": 32,
49
+ "beta_slow": 1,
50
+ "factor": 40,
51
+ "mscale": 1.0,
52
+ "mscale_all_dim": 1.0,
53
+ "original_max_position_embeddings": 4096,
54
+ "type": "yarn"
55
+ },
56
+ "rope_theta": 10000,
57
+ "routed_scaling_factor": 2.5,
58
+ "scoring_func": "sigmoid",
59
+ "tie_word_embeddings": false,
60
+ "topk_group": 4,
61
+ "topk_method": "noaux_tc",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.33.1",
64
+ "use_cache": true,
65
+ "v_head_dim": 128,
66
+ "vocab_size": 129280
67
+ }
model-00001-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84cba6bfa5785389704918a00b6ff86eb16a22872b428390a7bd1400329576ad
3
+ size 4894921824
model-00002-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c5357565f41c91748ca0bb9356c2ff75508c763e873625f56af36b361921032
3
+ size 4933686504
model-00003-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17475472c97021c8ab7512c8490249e1b12405047c8546f4dfa8bd70347e96d1
3
+ size 4933686504
model-00004-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8741d1617a40c0d0e9ddcc3dde9ed1ad6dcdd22fd350886e1d1a090f5d8e3dcf
3
+ size 4933686504
model-00005-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb35a160342cd39be3d839bf2c3d416471dce410e67d0185ad8e43648c8353bb
3
+ size 4993477416
model-00006-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05a3480161915f0d1a332fbb2bfd8ec2c6fafdcb2872773166d79813b5abcc4
3
+ size 4992422104
model-00007-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b428784b6023313a0bdfe4f75f50c0f0f13ecdac6f739e354c11d4a442015878
3
+ size 4992422104
model-00008-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f4ebe16f6741edf4ed5c4f83898cb11e3e0301b60317dd27bfc56d6da736b13
3
+ size 4992421944
model-00009-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d87cc8bb7fe2e6079c9da54d1163b7e43ac141ade489a588165e04efeeb482b4
3
+ size 4992422056
model-00010-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc8877584998290142854c0f4279531a4589bcf1e6b555173296ebf9557a65fd
3
+ size 4992422104
model-00011-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee858a741d537adc09817d9aa714a84cde2da7b0eef04108cf1d1390b3a5b259
3
+ size 4992422104
model-00012-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a23f181cc18b49c60b64ab3221b634c5bb1f7af1c95452424c1652d5584437e
3
+ size 4992421880
model-00013-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25cb5b386d0dcd21c959d412bf2f4689256b44e5bf19b72b9f3c4f9a551e2c95
3
+ size 4996092864
model-00014-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52443a797bb312f425db7f28981259a1a899d74968ed4299591dac9a6728bfd3
3
+ size 4983984800
model-00015-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc1cfde6f36b469dc6ec0e7f4e93312c035c946e3115369af71366d73500238
3
+ size 4933686496
model-00016-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbffb680193683a893d06ed59bea572404c67d8c6a780823369e41314a8a819c
3
+ size 4933686296
model-00017-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6de50466d2c09b59fdd8b3be82b15c53df02f168a1fa4b7ccbcc6f67f1314737
3
+ size 4933686504
model-00018-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f2fdcd4cf0915b1a0e735bb72a930f733bac1be73e6a2a3c01bf71d0b7b0ca
3
+ size 4933686504
model-00019-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e3d2293039757e31f6b6c402eb2da4e8e319d3df7784565750e14bae0244ba1
3
+ size 4933686288
model-00020-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e8c825ef6a8583850823b36524e637038cb3da6b9912f971968c707db77911
3
+ size 4933686072
model-00021-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6983971e5406eca1dc083b65742329414e4c77d4dded3a5494863a621534c2
3
+ size 4986273104
model-00022-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04de265cc3ae6f76da9211197f6deedbf1fd9ecd18331e633a5c74fadffa5e40
3
+ size 4992525496
model-00023-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59f64c158b70f540fd312f723a2478a98e422d0a2fa6bf8f4fcc161ca6f088b
3
+ size 4992525496
model-00024-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa530afaa8565d4e62292fc946e6f94545e20471b9d7bc8639dc7639ee6f5462
3
+ size 4992525496
model-00025-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4187241ee99e3c444d75b28ec973cc3aa08c386c3a3364d5a3733bf36eb20e4
3
+ size 4992525496
model-00026-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ab2f5aab5bb97c0426a70d25d3c90200412ff3053a7286005b8b67b3660de5e
3
+ size 4992525480
model-00027-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93d8ce7043e77f4d37f1668082825acddc6c0f0f072cc319a8e09797c2377607
3
+ size 4992525328
model-00028-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9644d4e8a5796b25be9a4d40bacbd355cec0f357d63dca7165321b0a0903ef04
3
+ size 4992525496
model-00029-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9f203756a6c983f1c4664bdc07b00fd31c489627b9e636bef9fe88a9f105f7b
3
+ size 4992525496
model-00030-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1c9cdf9e585be7a38e86aeaae038a8c400e8a410b063c921d5bf8ae9d08286
3
+ size 4992525304
model-00031-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f53f7d04a7a3c475d1b0e60ac53c72df7eaec584fc0172d34238a29e91ffaa
3
+ size 4992525496
model-00032-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83ab02e071804bc38e2d39bf1a750f7be4b9f46e94f2c8c73338f20c91ec895
3
+ size 4992525496
model-00033-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62daa64b2b6e17890a2febad2367e99104d1ae93abc79c1e34ae98b3483fa0dc
3
+ size 4992525304
model-00034-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecac06d70678d1272c1813f813745cb37e9817d0ddf4c7b13fd1154f5d36f3bd
3
+ size 4992525496
model-00035-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7baa6956d1513a3c72da102912bc785be67a611e105029f361bb8b337cff8725
3
+ size 4992525496
model-00036-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48ebf0d363d1a3b3984e02dc682977beeb866e60f14ae78c316430821542e459
3
+ size 4992525208
model-00037-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1899809272b57c2ca4521f8aeca014299e570200ea68daf530f61a9c6b840b
3
+ size 4992525024
model-00038-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:405372e270652b7b7f6a7d36dbf8afbaebeef5a6fad1da83885e65d3433087d5
3
+ size 4992525496
model-00039-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3656e7b56e94efbc76e82e6a09c78df5f82cc070013158e48b78ce6057254963
3
+ size 4992525496
model-00040-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa075001a27e1387f1325510c9422a6e02a13838fed61840d58a6b32d6c3f0b
3
+ size 4992525496
model-00041-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1074c6a4431d9b4de4a84559eaae14b4b1de2ebc79d6fcc1a2676817ad1dd2d0
3
+ size 4992525496
model-00042-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ddfe8f740c293e6036d3e7a27cd81176ee64db71d8309393badee3e42da3a4
3
+ size 4992525496
model-00043-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6427ed3a9efe86df41eba06f50dcf598c19b0839dca85183ea4c520962407de
3
+ size 4992525304
model-00044-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6de1783d2e52738e2806c96995592ca02ce0f0d6bfcb87ab5098d3fe56925cc
3
+ size 4992525496
model-00045-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b22b0a353386294b439406c44c424687dc08dfec75d26d0c2e1d7f6369c67978
3
+ size 4992525496
model-00046-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723533f61605f7b8028c57ab5016b18649a29fd2381b91a5e4d5285f58efbf1d
3
+ size 4992525304
model-00047-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35aa8d421ba6537bf7d9548c118bcd28aabdfecddacd5bf86edec3767b698f63
3
+ size 4992525496
model-00048-of-00135.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:375feae5217a43eb51b83da563314692758c5045e2d1dca4727f347bd211f70a
3
+ size 4992525496