{
  "act_threshold": 0.99,
  "architectures": [
    "HelixForCausalLM"
  ],
  "attention_mode": "hybrid",
  "batch_size": 8,
  "bos_token_id": 0,
  "cca_min_scale": 0.05,
  "cca_ramp_mode": "cubic_ease",
  "cca_warmup_steps": 10000,
  "chat_template": null,
  "d_model": 256,
  "device": "auto",
  "do_sample": true,
  "dropout": 0.05,
  "dtype": "float32",
  "eos_token_id": 50256,
  "epochs": 3,
  "ffn_expansion": 2.0,
  "fusion_strategy": "perceiver",
  "gate_sinkhorn_iters": 5,
  "grad_clip": 1.0,
  "hybrid_full_attention_interval": 4,
  "initializer_range": 0.02,
  "is_vlm": false,
  "k_proj_dim": 32,
  "lateral_p": 0.5,
  "linear_feature_dim": 64,
  "loop_dim_ratio": 0.125,
  "lr": 0.001,
  "max_new_tokens": 20,
  "memory_efficient_forward": false,
  "model_type": "helix",
  "n_columns": 2,
  "n_heads": 4,
  "n_loops": 2,
  "nodes_per_column": [
    2,
    2
  ],
  "pad_token_id": 50256,
  "repetition_penalty": 1.0,
  "rope_theta": 10000.0,
  "seq_len": 512,
  "ssm_bias": false,
  "ssm_conv_bias": true,
  "ssm_d_conv": 4,
  "ssm_d_state": 64,
  "ssm_dt_rank": "auto",
  "ssm_expand": 2,
  "stop_strings": [
    "<|endoftext|>",
    "<|im_end|>",
    "</s>"
  ],
  "temperature": 0.8,
  "tie_word_embeddings": true,
  "titans_always_select": true,
  "titans_dropout": 0.0,
  "titans_eta_init": 0.01,
  "titans_feature_dim": 64,
  "titans_n_heads": 4,
  "tokenizer_name": "gpt2",
  "top_k": 50,
  "top_p": 0.95,
  "transformers_version": "5.8.1",
  "use_cache": true,
  "use_cca": true,
  "use_rope": true,
  "use_ssm": false,
  "use_titans_memory": false,
  "vertical_depth": 2,
  "vertical_p": 0.7,
  "vision_encoder": null,
  "vision_hidden_size": 768,
  "vision_image_size": 448,
  "vision_intermediate_size": 3072,
  "vision_num_attention_heads": 16,
  "vision_num_hidden_layers": 24,
  "vision_patch_size": 16,
  "vocab_size": 50257,
  "warmup_steps": 200,
  "weight_decay": 0.01
}