jeromeku commited on
Commit
edebed2
·
verified ·
1 Parent(s): 774fbc8

Initial weight upload

Browse files
config.json CHANGED
@@ -1,102 +1,44 @@
1
  {
2
- "vocab_size": 151936,
3
- "max_position_embeddings": 40960,
4
- "hidden_size": 2048,
5
- "intermediate_size": 6144,
6
- "num_hidden_layers": 48,
7
- "num_attention_heads": 32,
8
- "use_sliding_window": false,
9
- "sliding_window": null,
10
- "num_key_value_heads": 4,
11
- "hidden_act": "silu",
12
- "initializer_range": 0.02,
13
- "rms_norm_eps": 1e-06,
14
- "use_cache": false,
15
- "rope_theta": 1000000.0,
16
- "rope_scaling": null,
17
  "attention_bias": false,
18
  "attention_dropout": 0.0,
 
 
 
 
 
19
  "decoder_sparse_step": 1,
 
 
 
 
 
 
 
 
 
 
 
 
20
  "moe_intermediate_size": 768,
21
- "num_experts_per_tok": 8,
 
 
22
  "num_experts": 128,
23
- "norm_topk_prob": true,
 
 
24
  "output_router_logits": false,
 
 
 
25
  "router_aux_loss_coef": 0.001,
26
- "mlp_only_layers": [],
27
- "return_dict": true,
28
- "output_hidden_states": false,
29
- "torchscript": false,
30
- "dtype": "bfloat16",
31
- "pruned_heads": {},
32
  "tie_word_embeddings": false,
33
- "chunk_size_feed_forward": 0,
34
- "is_encoder_decoder": false,
35
- "is_decoder": false,
36
- "cross_attention_hidden_size": null,
37
- "add_cross_attention": false,
38
- "tie_encoder_decoder": false,
39
- "architectures": [
40
- "Qwen3MoeForCausalLM"
41
- ],
42
- "finetuning_task": null,
43
- "id2label": {
44
- "0": "LABEL_0",
45
- "1": "LABEL_1"
46
- },
47
- "label2id": {
48
- "LABEL_0": 0,
49
- "LABEL_1": 1
50
- },
51
- "task_specific_params": null,
52
- "problem_type": null,
53
- "tokenizer_class": null,
54
- "prefix": null,
55
- "bos_token_id": 151643,
56
- "pad_token_id": null,
57
- "eos_token_id": 151645,
58
- "sep_token_id": null,
59
- "decoder_start_token_id": null,
60
- "max_length": 20,
61
- "min_length": 0,
62
- "do_sample": false,
63
- "early_stopping": false,
64
- "num_beams": 1,
65
- "num_beam_groups": 1,
66
- "diversity_penalty": 0.0,
67
- "temperature": 1.0,
68
- "top_k": 50,
69
- "top_p": 1.0,
70
- "typical_p": 1.0,
71
- "repetition_penalty": 1.0,
72
- "length_penalty": 1.0,
73
- "no_repeat_ngram_size": 0,
74
- "encoder_no_repeat_ngram_size": 0,
75
- "bad_words_ids": null,
76
- "num_return_sequences": 1,
77
- "output_scores": false,
78
- "return_dict_in_generate": false,
79
- "forced_bos_token_id": null,
80
- "forced_eos_token_id": null,
81
- "remove_invalid_values": false,
82
- "exponential_decay_length_penalty": null,
83
- "suppress_tokens": null,
84
- "begin_suppress_tokens": null,
85
- "_name_or_path": "",
86
  "transformers_version": "4.56.1",
87
- "head_dim": 128,
88
- "max_window_layers": 48,
89
- "model_type": "rnd1",
90
- "is_causal": false,
91
- "tf_legacy_loss": false,
92
- "use_bfloat16": false,
93
- "moe_backend": "hf",
94
- "num_diffusion_steps": 256,
95
- "mask_token_id": 151669,
96
- "output_attentions": false,
97
- "auto_map": {
98
- "AutoConfig": "configuration_rnd.RND1Config",
99
- "AutoModel": "modeling_rnd.RND1Model",
100
- "AutoModelForMaskedLM": "modeling_rnd.RND1LM"
101
- }
102
- }
 
1
  {
2
+ "architectures": [
3
+ "RND1LM"
4
+ ],
 
 
 
 
 
 
 
 
 
 
 
 
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_rnd.RND1Config",
9
+ "AutoModel": "modeling_rnd.RND1Model",
10
+ "AutoModelForMaskedLM": "modeling_rnd.RND1LM"
11
+ },
12
  "decoder_sparse_step": 1,
13
+ "dtype": "float32",
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 2048,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 6144,
19
+ "is_causal": false,
20
+ "mask_token_id": 151669,
21
+ "max_position_embeddings": 32768,
22
+ "mlp_only_layers": [],
23
+ "model_type": "rnd1",
24
+ "moe_backend": "hf",
25
  "moe_intermediate_size": 768,
26
+ "norm_topk_prob": false,
27
+ "num_attention_heads": 32,
28
+ "num_diffusion_steps": 256,
29
  "num_experts": 128,
30
+ "num_experts_per_tok": 8,
31
+ "num_hidden_layers": 48,
32
+ "num_key_value_heads": 4,
33
  "output_router_logits": false,
34
+ "rms_norm_eps": 1e-06,
35
+ "rope_scaling": null,
36
+ "rope_theta": 10000.0,
37
  "router_aux_loss_coef": 0.001,
38
+ "sliding_window": null,
 
 
 
 
 
39
  "tie_word_embeddings": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  "transformers_version": "4.56.1",
41
+ "use_cache": false,
42
+ "use_sliding_window": false,
43
+ "vocab_size": 151936
44
+ }
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json CHANGED
@@ -1,17 +1,5 @@
1
  {
2
- "bos_token_id": 151643,
3
- "eos_token_id": 151645,
4
- "pad_token_id": 151643,
5
- "mask_token_id": 151669,
6
- "max_length": 256,
7
- "max_new_tokens": 256,
8
- "num_diffusion_steps": 256,
9
- "temperature": 1.0,
10
- "top_k": null,
11
- "top_p": null,
12
- "do_sample": true,
13
- "greedy": true,
14
- "use_cache": false,
15
  "_from_model_config": true,
16
- "transformers_version": "4.45.2"
17
- }
 
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "_from_model_config": true,
3
+ "transformers_version": "4.56.1",
4
+ "use_cache": false
5
+ }
model-00001-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e07efffa41ee6eecbe97c6a788c116268f9cb28a410e690bfdb7be5d1d4f50c
3
+ size 4996555016
model-00002-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd9675567e508ea3e9976e0ffd411fcf97908d7b2aa17940ed9b33fc19836f1
3
+ size 4997644048
model-00003-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3efc39fbba1bc3752b4f89de4c3817ef547df4133c6def00c4b3bfb3c25c80fe
3
+ size 4997644048
model-00004-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476362be1144949003157590717327bd9bb0bd5563b82343ff73bcc39c7d1757
3
+ size 4997644048
model-00005-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d388c906e0639857e64797be54e7752a341735e8ed4b1524db24d7cc93a94500
3
+ size 4997644048
model-00006-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f48d9e6403969165b4cad8b48c65d01dbf7f936f108a2cf310e13cb7ee9d59d
3
+ size 4997644648
model-00007-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b5ba1f9f2a3a0972deb5e33de68cfd3d8b6a7835c6a6fc081c6322a4694479
3
+ size 4997644832
model-00008-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f26c7fbe2710db218595cabe8ff92b7187941b84bbce478f04c9eaef6813824c
3
+ size 4997644840
model-00009-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:379a07ca66949a8b19ff246a528752327f80e473a122b2e17d6f9e41bd104905
3
+ size 4997644832
model-00010-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a859eeba1826eb42665de49a633321481d1eb05f9aae9e28d0177a979c6b9dad
3
+ size 4997644832
model-00011-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ce39702cd30ca1799d0b575fc35b4e245db1ab9fc3787b2e6008a03fe55631
3
+ size 4997644840
model-00012-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35c2fb97d299836795028003f5d02fac57bee8a8e9493728e5eaf4d28eee01d8
3
+ size 4997644832
model-00013-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00e01f8006eae7e9197eff5bcd3021b43abaa9851f26e8aaff5e90d83758b860
3
+ size 4997644832
model-00014-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc9f8bb35a775fcc94fcc849f6381d21a542ec43b805026d7a478373c4263b3c
3
+ size 4997644840
model-00015-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12cb84a4b7a915e168ccbcfc9d41a2d3bb9697bcec636021f309beda247a27be
3
+ size 4997644832
model-00016-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b093629e0770893d3ff0cc873b14587c757683abadc3ea466f85312244ee98d8
3
+ size 4997644832
model-00017-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7fc58b0e87d7c7807dfc49a32e5410faf0037610c74997528a6f08134152262
3
+ size 4997644840
model-00018-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cfc0a2e2f430ff7653e4d921dd0c1bf6bee24dd8d08939d89f7eb35d52448f4
3
+ size 4997644832
model-00019-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cc0fc66b322cac3bdc252566d13f8bf3497be912cfb92971d8fec6af65fa2a9
3
+ size 4997644832
model-00020-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c066b6c2a91756579e3de8fe4452538e165a28ac4549652d83c98c1b27fe9f3d
3
+ size 4997644840
model-00021-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c7fa017d1af278e78b2e43b400b239c315bf46fb4734bc30257e7b350b9a356
3
+ size 4997644832
model-00022-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58b3e9bce8c96c61702c417b4d669aab03e459d9eb0b70a15d2bc4b3edcc97c7
3
+ size 4997644832
model-00023-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c68321d32bc987f9d9405ad16604a15d83502e6afbfbec354b655cb3c1fc6b
3
+ size 4997644832
model-00024-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39116c5128b4799bc2bff1c69248b905f4c4cb5f16e78266371813d09e7e9f24
3
+ size 4997644832
model-00025-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:837c7075e90bcbdae27ffa323dda5afedbc3efb66fb3e691a08c54f36769a9ab
3
+ size 2188421720
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff