{ "model_family": "Cosmos-T2", "model_name": "Cosmos-T2-80M-Test", "model_class_name": "CosmosT2_LLM", "hf_repo_id": "wop/Cosmos-T2-80M-Test", "tokenizer_name": "Qwen/Qwen2.5-0.5B", "dataset_name": "wop/XXXXXL-chain-of-thought", "dataset_split": "train", "dataset_row_limit": 1000, "train_val_fraction": 0.05, "seed": 42, "block_size": 1028, "max_len": 1028, "d_model": 384, "n_layers": 12, "n_heads": 8, "n_kv_heads": 2, "d_ff": 1536, "rope_base": 10000, "dropout": 0.05, "use_engram": true, "engram_every": 2, "engram_buckets": 4096, "engram_dim": 96, "engram_order": 3, "epochs": 50, "batch_size": 6, "lr": 0.0003, "weight_decay": 0.1, "warmup_steps": 50, "grad_clip": 1.0, "log_every_steps": 10, "eval_every_steps": 500, "plot_every_epochs": 20, "val_max_batches": 20 }