| { | |
| "_name_or_path": "/root/.cache/huggingface/hub/models--microsoft--Phi-3-small-128k-instruct/snapshots/f80aaa30bfc64c2b8ab214b541d9050e97163bc4", | |
| "architectures": [ | |
| "Phi3SmallForCausalLM" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout_prob": 0.0, | |
| "auto_map": { | |
| "AutoConfig": "configuration_phi3_small.Phi3SmallConfig", | |
| "AutoModelForCausalLM": "modeling_phi3_small.Phi3SmallForCausalLM", | |
| "AutoTokenizer": "tokenization_phi3_small.Phi3SmallTokenizer" | |
| }, | |
| "blocksparse_block_size": 64, | |
| "blocksparse_homo_head_pattern": false, | |
| "blocksparse_num_local_blocks": 16, | |
| "blocksparse_triton_kernel_block_size": 64, | |
| "blocksparse_vert_stride": 8, | |
| "bos_token_id": 100257, | |
| "dense_attention_every_n_layers": 2, | |
| "dummy_token_indices": [ | |
| 100256, | |
| 100258, | |
| 100259, | |
| 100260, | |
| 100264, | |
| 100265, | |
| 100267, | |
| 100268, | |
| 100269, | |
| 100270, | |
| 100271, | |
| 100272, | |
| 100273, | |
| 100274, | |
| 100275, | |
| 100276, | |
| 100277, | |
| 100278, | |
| 100279, | |
| 100280, | |
| 100281, | |
| 100282, | |
| 100283, | |
| 100284, | |
| 100285, | |
| 100286, | |
| 100287, | |
| 100288, | |
| 100289, | |
| 100290, | |
| 100291, | |
| 100292, | |
| 100293, | |
| 100294, | |
| 100295, | |
| 100296, | |
| 100297, | |
| 100298, | |
| 100299, | |
| 100300, | |
| 100301, | |
| 100302, | |
| 100303, | |
| 100304, | |
| 100305, | |
| 100306, | |
| 100307, | |
| 100308, | |
| 100309, | |
| 100310, | |
| 100311, | |
| 100312, | |
| 100313, | |
| 100314, | |
| 100315, | |
| 100316, | |
| 100317, | |
| 100318, | |
| 100319, | |
| 100320, | |
| 100321, | |
| 100322, | |
| 100323, | |
| 100324, | |
| 100325, | |
| 100326, | |
| 100327, | |
| 100328, | |
| 100329, | |
| 100330, | |
| 100331, | |
| 100332, | |
| 100333, | |
| 100334, | |
| 100335, | |
| 100336, | |
| 100337, | |
| 100338, | |
| 100339, | |
| 100340, | |
| 100341, | |
| 100342, | |
| 100343, | |
| 100344, | |
| 100345, | |
| 100346, | |
| 100347, | |
| 100348, | |
| 100349, | |
| 100350, | |
| 100351 | |
| ], | |
| "embedding_dropout_prob": 0.1, | |
| "eos_token_id": 100257, | |
| "ff_dim_multiplier": null, | |
| "ff_intermediate_size": 14336, | |
| "ffn_dropout_prob": 0.1, | |
| "gegelu_limit": 20.0, | |
| "gegelu_pad_to_256": true, | |
| "hidden_act": "gegelu", | |
| "hidden_size": 4096, | |
| "initializer_range": 0.02, | |
| "layer_norm_epsilon": 1e-05, | |
| "max_position_embeddings": 131072, | |
| "model_type": "phi3small", | |
| "mup_attn_multiplier": 1.0, | |
| "mup_embedding_multiplier": 10.0, | |
| "mup_use_scaling": true, | |
| "mup_width_multiplier": 8.0, | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 32, | |
| "num_key_value_heads": 8, | |
| "original_max_position_embeddings": 8192, | |
| "pad_sequence_to_multiple_of_64": true, | |
| "reorder_and_upcast_attn": false, | |
| "rope_embedding_base": 1000000, | |
| "rope_position_scale": 1.0, | |
| "rope_scaling": { | |
| "long_factor": [ | |
| 1.0, | |
| 1.01, | |
| 1.01, | |
| 1.02, | |
| 1.04, | |
| 1.04, | |
| 1.04, | |
| 1.05, | |
| 1.05, | |
| 1.06, | |
| 1.07, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.09, | |
| 1.09, | |
| 1.2, | |
| 2.31, | |
| 3.76, | |
| 9.38, | |
| 10.1, | |
| 10.8, | |
| 18.1, | |
| 25.2, | |
| 25.3, | |
| 26.1, | |
| 26.6, | |
| 30.2, | |
| 33.0, | |
| 41.5, | |
| 44.4, | |
| 44.8, | |
| 50.2, | |
| 51.9, | |
| 59.3, | |
| 62.7, | |
| 66.1, | |
| 66.3, | |
| 85.8, | |
| 89.3, | |
| 90.0, | |
| 99.9, | |
| 107.0, | |
| 110.0, | |
| 111.0, | |
| 117.0, | |
| 118.0, | |
| 121.0, | |
| 122.0, | |
| 127.0, | |
| 127.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 129.0, | |
| 129.0, | |
| 129.0 | |
| ], | |
| "long_mscale": 1.1902380714238083, | |
| "original_max_position_embeddings": 8192, | |
| "short_factor": [ | |
| 1.02, | |
| 1.02, | |
| 1.05, | |
| 1.05, | |
| 1.06, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.12, | |
| 1.1800000000000002, | |
| 1.1900000000000002, | |
| 1.1900000000000002, | |
| 1.2100000000000002, | |
| 1.2300000000000002, | |
| 1.2400000000000002, | |
| 1.2400000000000002, | |
| 1.2500000000000002, | |
| 1.3000000000000003, | |
| 1.3100000000000003, | |
| 1.4600000000000004, | |
| 1.5100000000000005, | |
| 1.7000000000000006, | |
| 1.9300000000000008, | |
| 2.080000000000001, | |
| 2.4399999999999933, | |
| 3.2199999999999767, | |
| 3.4499999999999718, | |
| 3.579999999999969, | |
| 4.669999999999946, | |
| 4.779999999999943, | |
| 5.999999999999917, | |
| 6.009999999999917, | |
| 6.4199999999999084, | |
| 6.619999999999904, | |
| 7.189999999999892, | |
| 7.3099999999998895, | |
| 7.339999999999889, | |
| 7.479999999999886, | |
| 9.749999999999837, | |
| 10.919999999999812, | |
| 11.219999999999805, | |
| 11.749999999999794, | |
| 11.979999999999789, | |
| 13.239999999999762, | |
| 13.579999999999755, | |
| 13.669999999999753, | |
| 13.82999999999975, | |
| 14.009999999999746, | |
| 14.679999999999731, | |
| 14.889999999999727, | |
| 15.769999999999708, | |
| 15.769999999999708, | |
| 15.819999999999707, | |
| 15.839999999999707, | |
| 15.919999999999705, | |
| 16.029999999999703, | |
| 16.12999999999972, | |
| 16.44999999999977, | |
| 16.44999999999977, | |
| 16.77999999999982, | |
| 16.83999999999983, | |
| 16.83999999999983, | |
| 16.889999999999837 | |
| ], | |
| "short_mscale": 1.0, | |
| "type": "su" | |
| }, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.44.0", | |
| "use_cache": true, | |
| "vocab_size": 100352, | |
| "quantization_config": { | |
| "config_groups": { | |
| "group_0": { | |
| "input_activations": null, | |
| "output_activations": null, | |
| "targets": [ | |
| "Linear" | |
| ], | |
| "weights": { | |
| "block_structure": null, | |
| "dynamic": false, | |
| "group_size": null, | |
| "num_bits": 8, | |
| "observer": "minmax", | |
| "observer_kwargs": {}, | |
| "strategy": "channel", | |
| "symmetric": true, | |
| "type": "int" | |
| } | |
| } | |
| }, | |
| "format": "pack-quantized", | |
| "global_compression_ratio": 1.3018502182274538, | |
| "ignore": [ | |
| "lm_head" | |
| ], | |
| "kv_cache_scheme": null, | |
| "quant_method": "compressed-tensors", | |
| "quantization_status": "frozen" | |
| } | |
| } |