{ "act_threshold": 0.99, "architectures": [ "HelixForCausalLM" ], "attention_mode": "hybrid", "batch_size": 8, "bos_token_id": 0, "cca_min_scale": 0.05, "cca_ramp_mode": "cubic_ease", "cca_warmup_steps": 10000, "chat_template": null, "d_model": 256, "device": "auto", "do_sample": true, "dropout": 0.05, "dtype": "float32", "eos_token_id": 50256, "epochs": 3, "ffn_expansion": 2.0, "fusion_strategy": "perceiver", "gate_sinkhorn_iters": 5, "grad_clip": 1.0, "hybrid_full_attention_interval": 4, "initializer_range": 0.02, "is_vlm": false, "k_proj_dim": 32, "lateral_p": 0.5, "linear_feature_dim": 64, "loop_dim_ratio": 0.125, "lr": 0.001, "max_new_tokens": 20, "memory_efficient_forward": false, "model_type": "helix", "n_columns": 2, "n_heads": 4, "n_loops": 2, "nodes_per_column": [ 2, 2 ], "pad_token_id": 50256, "repetition_penalty": 1.0, "rope_theta": 10000.0, "seq_len": 512, "ssm_bias": false, "ssm_conv_bias": true, "ssm_d_conv": 4, "ssm_d_state": 64, "ssm_dt_rank": "auto", "ssm_expand": 2, "stop_strings": [ "<|endoftext|>", "<|im_end|>", "" ], "temperature": 0.8, "tie_word_embeddings": true, "titans_always_select": true, "titans_dropout": 0.0, "titans_eta_init": 0.01, "titans_feature_dim": 64, "titans_n_heads": 4, "tokenizer_name": "gpt2", "top_k": 50, "top_p": 0.95, "transformers_version": "5.8.1", "use_cache": true, "use_cca": true, "use_rope": true, "use_ssm": false, "use_titans_memory": false, "vertical_depth": 2, "vertical_p": 0.7, "vision_encoder": null, "vision_hidden_size": 768, "vision_image_size": 448, "vision_intermediate_size": 3072, "vision_num_attention_heads": 16, "vision_num_hidden_layers": 24, "vision_patch_size": 16, "vocab_size": 50257, "warmup_steps": 200, "weight_decay": 0.01 }