| { |
| "_inserted_LatentThinkingModelSettings": { |
| "add_latent_to_end": false, |
| "binary_head_temp": null, |
| "continue_token_id": 128258, |
| "debug_mode": false, |
| "detach_binary_head_inputs": true, |
| "disable_checkpointing_cache_update": true, |
| "disable_forward_input_embeds": true, |
| "disable_input_past_key_values": false, |
| "end_token_id": 128257, |
| "lora_mode": false, |
| "recurrent_filter_mode": "MLP", |
| "start_token_id": 128256, |
| "stop_token_id": 128259, |
| "unused_token_ids": [ |
| 128260, |
| 128261, |
| 128262, |
| 128263, |
| 128264, |
| 128265, |
| 128266, |
| 128267, |
| 128268, |
| 128269, |
| 128270, |
| 128271, |
| 128272, |
| 128273, |
| 128274, |
| 128275, |
| 128276, |
| 128277, |
| 128278, |
| 128279, |
| 128280, |
| 128281, |
| 128282, |
| 128283, |
| 128284, |
| 128285, |
| 128286, |
| 128287, |
| 128288, |
| 128289, |
| 128290, |
| 128291, |
| 128292, |
| 128293, |
| 128294, |
| 128295, |
| 128296, |
| 128297, |
| 128298, |
| 128299, |
| 128300, |
| 128301, |
| 128302, |
| 128303, |
| 128304, |
| 128305, |
| 128306, |
| 128307, |
| 128308, |
| 128309, |
| 128310, |
| 128311, |
| 128312, |
| 128313, |
| 128314, |
| 128315, |
| 128316, |
| 128317, |
| 128318, |
| 128319, |
| 128320, |
| 128321, |
| 128322, |
| 128323, |
| 128324, |
| 128325, |
| 128326, |
| 128327, |
| 128328, |
| 128329, |
| 128330, |
| 128331, |
| 128332, |
| 128333, |
| 128334, |
| 128335, |
| 128336, |
| 128337, |
| 128338, |
| 128339, |
| 128340, |
| 128341, |
| 128342, |
| 128343, |
| 128344, |
| 128345, |
| 128346, |
| 128347, |
| 128348, |
| 128349, |
| 128350, |
| 128351, |
| 128352, |
| 128353, |
| 128354, |
| 128355, |
| 128356, |
| 128357, |
| 128358, |
| 128359, |
| 128360, |
| 128361, |
| 128362, |
| 128363, |
| 128364, |
| 128365, |
| 128366, |
| 128367, |
| 128368, |
| 128369, |
| 128370, |
| 128371, |
| 128372, |
| 128373, |
| 128374, |
| 128375, |
| 128376, |
| 128377, |
| 128378, |
| 128379, |
| 128380, |
| 128381, |
| 128382, |
| 128383 |
| ] |
| }, |
| "architectures": [ |
| "LatentThinkingModel" |
| ], |
| "attention_bias": false, |
| "attention_dropout": 0.0, |
| "bos_token_id": 128000, |
| "eos_token_id": [ |
| 128001, |
| 128008, |
| 128009 |
| ], |
| "head_dim": 64, |
| "hidden_act": "silu", |
| "hidden_size": 2048, |
| "initializer_range": 0.02, |
| "intermediate_size": 8192, |
| "max_position_embeddings": 131072, |
| "mlp_bias": false, |
| "model_type": "llama", |
| "num_attention_heads": 32, |
| "num_hidden_layers": 16, |
| "num_key_value_heads": 8, |
| "pretraining_tp": 1, |
| "rms_norm_eps": 1e-05, |
| "rope_scaling": { |
| "factor": 32.0, |
| "high_freq_factor": 4.0, |
| "low_freq_factor": 1.0, |
| "original_max_position_embeddings": 8192, |
| "rope_type": "llama3" |
| }, |
| "rope_theta": 500000.0, |
| "tie_word_embeddings": true, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.53.0", |
| "use_cache": true, |
| "vocab_size": 128384 |
| } |
|
|