{ "model_family": "Cosmos-T2-Accelerate-Preview", "model_name": "Cosmos-T2-Accelerate-Preview", "model_class_name": "CosmosT2_Accelerate_LLM", "hf_repo_id": "wop/Cosmos-T2-Accelerate-Preview", "tokenizer_name": "Qwen/Qwen2.5-0.5B", "dataset_name": "wop/XXXXXL-chain-of-thought", "dataset_split": "train", "dataset_row_limit": 10000, "train_val_fraction": 0.1, "seed": 42, "block_size": 1028, "max_len": 1028, "d_model": 64, "n_layers": 4, "n_heads": 4, "n_kv_heads": 1, "d_ff": 256, "rope_base": 10000, "dropout": 0.05, "use_engram": true, "engram_every": 2, "engram_buckets": 128, "engram_dim": 16, "engram_order": 3, "epochs": 50, "batch_size": 6, "lr": 0.0003, "weight_decay": 0.1, "warmup_steps": 50, "grad_clip": 1.0, "log_every_steps": 10, "eval_every_steps": 500, "plot_every_epochs": 20, "val_max_batches": 50 }