Invalid JSON: Unexpected token 'I', ..."p_bound": Infinity,
"... is not valid JSON
| { | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "bos_token_id": null, | |
| "dense_intermediate_size": 3072, | |
| "eos_token_id": null, | |
| "fused_spec_config": null, | |
| "global_attn_every_n_layers": 4, | |
| "head_dim": 128, | |
| "hidden_act": "silu", | |
| "hidden_size": 1024, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 256, | |
| "layer_types": [ | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention" | |
| ], | |
| "load_balance_coeff": 0.001, | |
| "max_position_embeddings": 131072, | |
| "metadata": null, | |
| "moe_intermediate_size": 256, | |
| "mup_enabled": true, | |
| "n_group": 1, | |
| "n_shared_experts": 0, | |
| "neuron_config": { | |
| "activation_quantization_type": null, | |
| "allow_input_truncation": false, | |
| "apply_seq_ids_mask": false, | |
| "async_mode": false, | |
| "attention_dp_degree": 1, | |
| "attention_dtype": null, | |
| "attn_block_cte_nki_kernel_enabled": false, | |
| "attn_block_tkg_nki_kernel_cache_update": false, | |
| "attn_block_tkg_nki_kernel_cascaded_attention": false, | |
| "attn_block_tkg_nki_kernel_disable_gpsimd_sb2sb": false, | |
| "attn_block_tkg_nki_kernel_enabled": false, | |
| "attn_block_tkg_nki_kernel_use_online_softmax": true, | |
| "attn_cls": "NeuronLlamaAttention", | |
| "attn_kernel_enabled": null, | |
| "attn_tkg_builtin_kernel_enabled": false, | |
| "attn_tkg_nki_kernel_enabled": false, | |
| "batch_size": 1, | |
| "blockwise_matmul_config": { | |
| "always_augment_inputs_for_blockwise_matmul": false, | |
| "block_sharding_strategy": { | |
| "__objclass__": { | |
| "__module__": "neuronxcc.nki._pre_prod_kernels.blockwise_mm", | |
| "__name__": "BlockShardStrategy" | |
| }, | |
| "_name_": "HI_LO", | |
| "_sort_order_": 0, | |
| "_value_": 0 | |
| }, | |
| "block_size": 512, | |
| "blockwise_nki_autograd_cls": null, | |
| "logical_nc_config": { | |
| "__objclass__": { | |
| "__module__": "neuronx_distributed.utils.model_utils", | |
| "__name__": "LogicalNCConfig" | |
| }, | |
| "_name_": "LNC_1", | |
| "_sort_order_": 0, | |
| "_value_": 1 | |
| }, | |
| "num_static_blocks": null, | |
| "optimized_block_to_token_mapping": true, | |
| "pad_num_blocks_to_even": false, | |
| "parallelize_token_to_block_mapping": true, | |
| "skip_dma_token": false, | |
| "skip_dma_weight": false, | |
| "use_block_parallel": false, | |
| "use_shard_on_block_dynamic_while": false, | |
| "use_shard_on_intermediate_dynamic_while": false, | |
| "use_torch_block_wise": false | |
| }, | |
| "bucket_n_active_tokens": false, | |
| "buckets": [ | |
| 2048 | |
| ], | |
| "capacity_factor": null, | |
| "cast_type": "config", | |
| "cc_pipeline_tiling_factor": 2, | |
| "chunked_prefill_config": null, | |
| "context_encoding_buckets": null, | |
| "cp_degree": 1, | |
| "ctx_batch_size": 1, | |
| "disable_argmax_kernel": false, | |
| "disable_kv_cache_tiling": false, | |
| "disable_numeric_cc_token": false, | |
| "dma_order_config": null, | |
| "draft_model_modules_to_not_convert": null, | |
| "eagle_rolling_buffer_kernel_enabled": false, | |
| "early_expert_affinity_modulation": false, | |
| "enable_bucketing": false, | |
| "enable_cte_modular_flow": false, | |
| "enable_eagle_draft_input_norm": false, | |
| "enable_eagle_speculation": false, | |
| "enable_fused_speculation": false, | |
| "enable_long_context_mode": false, | |
| "enable_output_completion_notifications": false, | |
| "enable_spill_reload_dge": false, | |
| "enable_token_tree": false, | |
| "ep_degree": 1, | |
| "ep_dispatch_cc_option": "AR_AG", | |
| "expert_mlp_nki_kernel_enabled": null, | |
| "flash_decoding_enabled": false, | |
| "fused_qkv": false, | |
| "fused_rmsnorm_skip_gamma": false, | |
| "fused_shared_experts": false, | |
| "gate_clamp_lower_limit": null, | |
| "gate_clamp_upper_limit": null, | |
| "glu_mlp": true, | |
| "glu_type": "glu", | |
| "hidden_act_bias": 0.0, | |
| "hidden_act_scaling_factor": 1.0, | |
| "hybrid_sharding_config": null, | |
| "is_block_kv_layout": false, | |
| "is_chunked_prefill": false, | |
| "is_continuous_batching": false, | |
| "is_eagle3": false, | |
| "is_eagle_draft": false, | |
| "is_full_model_shuffled": false, | |
| "is_hidden_dim_shuffled": false, | |
| "is_intermediate_dim_shuffled": false, | |
| "is_medusa": false, | |
| "is_mxfp4_compute": false, | |
| "is_prefill_stage": null, | |
| "is_prefix_caching": false, | |
| "k_cache_transposed": false, | |
| "kv_cache_batch_size": 1, | |
| "kv_cache_padding_size": 0, | |
| "kv_cache_quant": false, | |
| "kv_cache_tiling": false, | |
| "kv_cache_update_with_kernel": false, | |
| "kv_quant_config": null, | |
| "layer_boundary_markers": false, | |
| "lm_head_pad": false, | |
| "lm_head_pad_alignment_size": 1, | |
| "local_ranks_size": 1, | |
| "logical_nc_config": 1, | |
| "lora_config": null, | |
| "max_batch_size": 1, | |
| "max_context_length": 2048, | |
| "max_length": 2048, | |
| "max_new_tokens": null, | |
| "medusa_speculation_length": 0, | |
| "medusa_tree": null, | |
| "mlp_cp_degree": 1, | |
| "mlp_kernel_enabled": false, | |
| "mlp_kernel_fuse_residual_add": false, | |
| "mlp_tkg_nki_kernel_enabled": false, | |
| "modules_to_not_convert": null, | |
| "moe_ep_degree": 1, | |
| "moe_fused_nki_kernel_enabled": null, | |
| "moe_mask_padded_tokens": false, | |
| "moe_tp_degree": 1, | |
| "n_active_tokens": 2048, | |
| "n_positions": 2048, | |
| "normalize_top_k_affinities": true, | |
| "num_medusa_heads": 0, | |
| "on_cpu": false, | |
| "on_device_sampling_config": null, | |
| "out_proj_kernel_enabled": false, | |
| "output_logits": false, | |
| "overrides_torch_dtype": true, | |
| "pa_block_size": 2048, | |
| "pa_num_blocks": 1, | |
| "padded_hidden_size": null, | |
| "padded_intermediate_size": null, | |
| "padding_side": "right", | |
| "pp_degree": 1, | |
| "pre_rope_rmsnorm": false, | |
| "prefix_buckets": null, | |
| "qk_layernorm": false, | |
| "qkv_cte_nki_kernel_fuse_rope": false, | |
| "qkv_kernel_enabled": false, | |
| "qkv_kernel_fuse_residual_add": false, | |
| "qkv_kernel_nbsd_layout": false, | |
| "qkv_nki_kernel_enabled": false, | |
| "quantization_block_axis": null, | |
| "quantization_block_size": null, | |
| "quantization_dtype": "int8", | |
| "quantization_scale_dtype": "f32", | |
| "quantization_type": "per_tensor_symmetric", | |
| "quantize_clamp_bound": Infinity, | |
| "quantized": false, | |
| "quantized_checkpoints_path": null, | |
| "quantized_mlp_kernel_enabled": false, | |
| "return_expert_index": false, | |
| "return_router_logits": false, | |
| "rmsnorm_quantize_kernel_enabled": false, | |
| "router_config": { | |
| "act_fn": "sigmoid", | |
| "dtype": "float32" | |
| }, | |
| "router_topk_nki_kernel_enabled": null, | |
| "rpl_reduce_dtype": null, | |
| "save_sharded_checkpoint": true, | |
| "scratchpad_page_size": null, | |
| "seq_len": 2048, | |
| "seq_len_threshold_for_cc_tiling": 16384, | |
| "sequence_parallel_enabled": false, | |
| "shared_experts_sequence_parallel_enabled": false, | |
| "shared_mlp_nki_kernel_enabled": null, | |
| "skip_sharding": false, | |
| "skip_warmup": false, | |
| "spec_batch_size": 1, | |
| "speculation_length": 0, | |
| "start_rank_id": 0, | |
| "strided_context_parallel_kernel_enabled": false, | |
| "switch_cc": false, | |
| "target": null, | |
| "tensor_capture_config": null, | |
| "tensor_replacement_config": null, | |
| "tile_cc": false, | |
| "tkg_batch_size": 1, | |
| "token_generation_batches": null, | |
| "token_generation_buckets": null, | |
| "token_tree_config": null, | |
| "torch_dtype": "bfloat16", | |
| "tp_degree": 1, | |
| "transpose_shared_experts_weights": false, | |
| "up_clamp_lower_limit": null, | |
| "up_clamp_upper_limit": null, | |
| "use_index_calc_kernel": false, | |
| "vocab_parallel": false, | |
| "weight_gather_seq_len_threshold": 32768, | |
| "weights_to_skip_layout_optimization": [], | |
| "windowed_context_encoding_size": null, | |
| "world_size": 1 | |
| }, | |
| "num_attention_heads": 8, | |
| "num_cores_per_group": 1, | |
| "num_dense_layers": 2, | |
| "num_expert_groups": 1, | |
| "num_experts": 128, | |
| "num_experts_per_tok": 8, | |
| "num_hidden_layers": 56, | |
| "num_key_value_heads": 2, | |
| "num_limited_groups": 1, | |
| "num_local_experts": 128, | |
| "num_shared_experts": 1, | |
| "output_attentions": false, | |
| "output_hidden_states": false, | |
| "pad_token_id": null, | |
| "rms_norm_eps": 1e-05, | |
| "rope_scaling": null, | |
| "rope_theta": 10000, | |
| "route_norm": true, | |
| "route_scale": 2.826, | |
| "score_func": "sigmoid", | |
| "sliding_window": 2048, | |
| "tie_word_embeddings": false, | |
| "topk_group": 1, | |
| "torch_dtype": "bfloat16", | |
| "use_cache": true, | |
| "vocab_size": 200192 | |
| } | |