File size: 1,346 Bytes
b1e036f 06f8e4a 303bbc0 b1e036f 06f8e4a b1e036f 06f8e4a 303bbc0 b1e036f 303bbc0 b1e036f 303bbc0 b1e036f 303bbc0 b1e036f 303bbc0 b1e036f 303bbc0 b1e036f 303bbc0 b1e036f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
{
"_name_or_path": "FalconTST",
"model_type": "FalconTST",
"transformers_version": "4.40.1",
"architectures": [
"FalconTSTForPrediction"
],
"auto_map": {
"AutoConfig": "configuration_FalconTST.FalconTSTConfig",
"AutoModel": "modeling_FalconTST.FalconTSTForPrediction"
},
"add_bias_linear": false,
"num_hidden_layers": 2,
"hidden_size": 1024,
"ffn_hidden_size": 4096,
"num_attention_heads": 16,
"seq_length": 2880,
"mask_pad_value": 255.0,
"is_revin": true,
"shared_patch_size": 32,
"patch_size_list": [
120,
96,
64,
36
],
"residual_backcast": true,
"do_base_forecast": false,
"do_expert_forecast": true,
"heterogeneous_moe_layer": false,
"expert_num_layers": 4,
"multi_forecast_head_list": [
24,
96,
336
],
"multi_forecast_head_type": "single",
"rotary_base": 1000000,
"rotary_interleaved": false,
"q_layernorm": false,
"k_layernorm": false,
"transformer_input_layernorm": true,
"num_experts": 4,
"moe_router_topk": 1,
"moe_router_pre_softmax": true,
"moe_router_score_function": "softmax",
"moe_ffn_hidden_size": 4096,
"moe_shared_expert_intermediate_size": 4096,
"moe_router_enable_expert_bias": false,
"moe_expert_final_layernorm": true,
"use_cpu_initialization": true,
"init_method_std": 0.06,
"use_cache": true
} |