| { | |
| "_name_or_path": "PersianStories-4k", | |
| "architectures": [ | |
| "Phi3ForCausalLM" | |
| ], | |
| "attention_dropout": 0.0, | |
| "auto_map": { | |
| "AutoConfig": "configuration.PersianStoriesConfig", | |
| "AutoModelForCausalLM": "modeling.PersianStoriesForCausalLM" | |
| }, | |
| "bos_token_id": 1, | |
| "embd_pdrop": 0.0, | |
| "eos_token_id": 32000, | |
| "hidden_act": "silu", | |
| "hidden_size": 2048, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 8192, | |
| "max_position_embeddings": 4096, | |
| "model_type": "phi3", | |
| "num_attention_heads": 16, | |
| "num_hidden_layers": 24, | |
| "num_key_value_heads": 4, | |
| "original_max_position_embeddings": 4096, | |
| "pad_token_id": 32000, | |
| "resid_pdrop": 0.0, | |
| "rms_norm_eps": 1e-05, | |
| "rope_position_scale": 1.0, | |
| "rope_scaling": { | |
| "long_factor": [ | |
| 1.0, | |
| 1.01, | |
| 1.01, | |
| 1.02, | |
| 1.04, | |
| 1.04, | |
| 1.04, | |
| 1.05, | |
| 1.05, | |
| 1.06, | |
| 1.07, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.09, | |
| 1.09, | |
| 1.2, | |
| 2.31, | |
| 3.76, | |
| 9.38, | |
| 10.1, | |
| 10.8, | |
| 18.1, | |
| 25.2, | |
| 25.3, | |
| 26.1, | |
| 26.6, | |
| 30.2, | |
| 33.0, | |
| 41.5, | |
| 44.4, | |
| 44.8, | |
| 50.2, | |
| 51.9, | |
| 59.3, | |
| 62.7, | |
| 66.1, | |
| 66.3, | |
| 85.8, | |
| 89.3, | |
| 90.0, | |
| 99.9, | |
| 107.0, | |
| 110.0, | |
| 111.0, | |
| 117.0, | |
| 118.0, | |
| 121.0, | |
| 122.0, | |
| 127.0, | |
| 127.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 129.0, | |
| 129.0, | |
| 129.0 | |
| ], | |
| "long_mscale": 1.1902380714238083, | |
| "original_max_position_embeddings": 8192, | |
| "short_factor": [ | |
| 1.02, | |
| 1.02, | |
| 1.05, | |
| 1.05, | |
| 1.06, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.12, | |
| 1.1800000000000002, | |
| 1.1900000000000002, | |
| 1.1900000000000002, | |
| 1.2100000000000002, | |
| 1.2300000000000002, | |
| 1.2400000000000002, | |
| 1.2400000000000002, | |
| 1.2500000000000002, | |
| 1.3000000000000003, | |
| 1.3100000000000003, | |
| 1.4600000000000004, | |
| 1.5100000000000005, | |
| 1.7000000000000006, | |
| 1.9300000000000008, | |
| 2.080000000000001, | |
| 2.4399999999999933, | |
| 3.2199999999999767, | |
| 3.4499999999999718, | |
| 3.579999999999969, | |
| 4.669999999999946, | |
| 4.779999999999943, | |
| 5.999999999999917, | |
| 6.009999999999917, | |
| 6.4199999999999084, | |
| 6.619999999999904, | |
| 7.189999999999892, | |
| 7.3099999999998895, | |
| 7.339999999999889, | |
| 7.479999999999886, | |
| 9.749999999999837, | |
| 10.919999999999812, | |
| 11.219999999999805, | |
| 11.749999999999794, | |
| 11.979999999999789, | |
| 13.239999999999762, | |
| 13.579999999999755, | |
| 13.669999999999753, | |
| 13.82999999999975, | |
| 14.009999999999746, | |
| 14.679999999999731, | |
| 14.889999999999727, | |
| 15.769999999999708, | |
| 15.769999999999708, | |
| 15.819999999999707, | |
| 15.839999999999707, | |
| 15.919999999999705, | |
| 16.029999999999703, | |
| 16.12999999999972, | |
| 16.44999999999977, | |
| 16.44999999999977, | |
| 16.77999999999982, | |
| 16.83999999999983, | |
| 16.83999999999983, | |
| 16.889999999999837 | |
| ], | |
| "short_mscale": 1.0, | |
| "type": "su" | |
| }, | |
| "rope_theta": 10000.0, | |
| "sliding_window": 2047, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.40.2", | |
| "use_cache": true, | |
| "attention_bias": false, | |
| "vocab_size": 32064 | |
| } | |