WpythonW commited on
Commit
5785ab3
·
verified ·
1 Parent(s): b44f9f7

Delete config.json

Browse files
Files changed (1) hide show
  1. config.json +0 -85
config.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "producer": {
3
- "name": "modelopt",
4
- "version": "0.33.1"
5
- },
6
- "architecture": "Qwen3ForCausalLM",
7
- "dtype": "bfloat16",
8
- "logits_dtype": "float16",
9
- "num_hidden_layers": 40,
10
- "num_attention_heads": 40,
11
- "num_key_value_heads": 8,
12
- "hidden_size": 5120,
13
- "norm_epsilon": 1e-06,
14
- "vocab_size": 151936,
15
- "max_position_embeddings": 40960,
16
- "hidden_act": "silu",
17
- "use_parallel_embedding": true,
18
- "embedding_sharding_dim": 0,
19
- "head_size": 128,
20
- "intermediate_size": 17408,
21
- "position_embedding_type": "rope_gpt_neox",
22
- "share_embedding_table": false,
23
- "residual_mlp": false,
24
- "bias": false,
25
- "rotary_pct": 1.0,
26
- "rank": 0,
27
- "decoder": "qwen",
28
- "rmsnorm": true,
29
- "lm_head_bias": false,
30
- "mlp_bias": false,
31
- "attn_bias": false,
32
- "rotary_base": 1000000,
33
- "rotary_scaling": null,
34
- "disable_weight_only_quant_plugin": false,
35
- "num_labels": 1,
36
- "use_logn_attn": false,
37
- "mlp_only_layers": [],
38
- "decoder_sparse_step": 1,
39
- "moe": {
40
- "num_experts": 0,
41
- "shared_expert_intermediate_size": 0,
42
- "top_k": 0,
43
- "normalization_mode": 0,
44
- "sparse_mixer_epsilon": 0.01,
45
- "tp_mode": 0,
46
- "device_limited_n_group": 0,
47
- "device_limited_topk_group": 0,
48
- "device_limited_routed_scaling_factor": 1.0
49
- },
50
- "runtime_defaults": null,
51
- "mapping": {
52
- "world_size": 1,
53
- "gpus_per_node": 8,
54
- "cp_size": 1,
55
- "tp_size": 1,
56
- "pp_size": 1,
57
- "moe_tp_size": 1,
58
- "moe_cluster_size": 1,
59
- "moe_ep_size": 1,
60
- "attn_tp_size": 1,
61
- "attn_cp_size": 1,
62
- "auto_parallel": false
63
- },
64
- "quantization": {
65
- "quant_algo": "FP8",
66
- "kv_cache_quant_algo": "FP8",
67
- "group_size": 128,
68
- "smoothquant_val": 0.5,
69
- "clamp_val": null,
70
- "use_meta_recipe": false,
71
- "has_zero_point": false,
72
- "pre_quant_scale": false,
73
- "exclude_modules": [
74
- "lm_head"
75
- ]
76
- },
77
- "qk_layernorm": false,
78
- "rotary_embedding_dim": 128,
79
- "seq_length": 8192,
80
- "qwen_type": "qwen3",
81
- "moe_intermediate_size": 0,
82
- "moe_shared_expert_intermediate_size": 0,
83
- "tie_word_embeddings": false,
84
- "model_type": "qwen"
85
- }