| { | |
| "architecture": "DecoderModel", | |
| "dtype": "float16", | |
| "logits_dtype": "float16", | |
| "num_hidden_layers": 16, | |
| "num_attention_heads": 20, | |
| "hidden_size": 1280, | |
| "norm_epsilon": 1e-05, | |
| "vocab_size": 7832, | |
| "hidden_act": "gelu", | |
| "use_parallel_embedding": false, | |
| "embedding_sharding_dim": 0, | |
| "max_position_embeddings": 5000, | |
| "use_prompt_tuning": false, | |
| "head_size": 64, | |
| "has_position_embedding": true, | |
| "layernorm_type": 0, | |
| "has_attention_qkvo_bias": true, | |
| "has_mlp_bias": true, | |
| "has_model_final_layernorm": true, | |
| "has_embedding_layernorm": false, | |
| "has_embedding_scale": true, | |
| "ffn_hidden_size": 5120, | |
| "q_scaling": 1.0, | |
| "layernorm_position": 0, | |
| "relative_attention": false, | |
| "max_distance": 0, | |
| "num_buckets": 0, | |
| "model_type": "whisper", | |
| "rescale_before_lm_head": false, | |
| "encoder_hidden_size": 1280, | |
| "encoder_num_heads": 20, | |
| "encoder_head_size": null, | |
| "skip_cross_kv": false, | |
| "quantization": { | |
| "quant_algo": null | |
| } | |
| } |