Falcon-TST_Large / config.json
rulixiang's picture
Update ckpt
303bbc0
{
"_name_or_path": "FalconTST",
"model_type": "FalconTST",
"transformers_version": "4.40.1",
"architectures": [
"FalconTSTForPrediction"
],
"auto_map": {
"AutoConfig": "configuration_FalconTST.FalconTSTConfig",
"AutoModel": "modeling_FalconTST.FalconTSTForPrediction"
},
"add_bias_linear": false,
"num_hidden_layers": 2,
"hidden_size": 1024,
"ffn_hidden_size": 4096,
"num_attention_heads": 16,
"seq_length": 2880,
"mask_pad_value": 255.0,
"is_revin": true,
"shared_patch_size": 32,
"patch_size_list": [
120,
96,
64,
36
],
"residual_backcast": true,
"do_base_forecast": false,
"do_expert_forecast": true,
"heterogeneous_moe_layer": false,
"expert_num_layers": 4,
"multi_forecast_head_list": [
24,
96,
336
],
"multi_forecast_head_type": "single",
"rotary_base": 1000000,
"rotary_interleaved": false,
"q_layernorm": false,
"k_layernorm": false,
"transformer_input_layernorm": true,
"num_experts": 4,
"moe_router_topk": 1,
"moe_router_pre_softmax": true,
"moe_router_score_function": "softmax",
"moe_ffn_hidden_size": 4096,
"moe_shared_expert_intermediate_size": 4096,
"moe_router_enable_expert_bias": false,
"moe_expert_final_layernorm": true,
"use_cpu_initialization": true,
"init_method_std": 0.06,
"use_cache": true
}