Cosmos-T2-80M-Test / model_config.json
wop's picture
Upload 6 files
1b269d3 verified
{
"model_family": "Cosmos-T2",
"model_name": "Cosmos-T2-80M-Test",
"model_class_name": "CosmosT2_LLM",
"hf_repo_id": "wop/Cosmos-T2-80M-Test",
"tokenizer_name": "Qwen/Qwen2.5-0.5B",
"dataset_name": "wop/XXXXXL-chain-of-thought",
"dataset_split": "train",
"dataset_row_limit": 1000,
"train_val_fraction": 0.05,
"seed": 42,
"block_size": 1028,
"max_len": 1028,
"d_model": 384,
"n_layers": 12,
"n_heads": 8,
"n_kv_heads": 2,
"d_ff": 1536,
"rope_base": 10000,
"dropout": 0.05,
"use_engram": true,
"engram_every": 2,
"engram_buckets": 4096,
"engram_dim": 96,
"engram_order": 3,
"epochs": 50,
"batch_size": 6,
"lr": 0.0003,
"weight_decay": 0.1,
"warmup_steps": 50,
"grad_clip": 1.0,
"log_every_steps": 10,
"eval_every_steps": 500,
"plot_every_epochs": 20,
"val_max_batches": 20
}