{ "architectures": [ "TRMForCausalLM" ], "dtype": "bfloat16", "model_type": "trm", "pad_token_id": 50283, "transformers_version": "4.57.1", "trm_config_dict": { "H_layers": 0, "batch_size": 1, "causal": true, "dropout": 0.2, "expansion": 4, "forward_dtype": "bfloat16", "freeze_embeddings": false, "halt_exploration_prob": 0.1, "halt_max_steps": 8, "hidden_size": 768, "loss": { "loss_type": "stablemax_cross_entropy", "name": "losses@ACTLossHead" }, "name": "recursive_reasoning.trm@TinyRecursiveModel", "no_ACT_continue": true, "num_heads": 12, "num_layers": 8, "num_puzzle_identifiers": 1, "pos_encodings": "rope", "pretrained_embeddings_model": "Alibaba-NLP/gte-modernbert-base", "puzzle_emb_len": 0, "puzzle_emb_ndim": 0, "residual_recursion": false, "seq_len": 512, "vocab_size": 50368, "y_cycles": 2, "z_cycles": 3 } }