fullrun / config_snapshot.json
huiting tang
Add files using upload-large-folder tool
fad46a0 verified
{
"run": {
"name": "final_c6_18l448_factorized_aggressive",
"artifacts_root": "artifacts/final_c6",
"resume": false,
"deterministic": false
},
"distributed": {
"enabled": false,
"backend": "nccl"
},
"preprocessing": {
"data_dir": "data",
"processed_dir": "data/processed_OWT",
"log_dir": "logs/preprocessing",
"train_split": 0.9,
"dataset_name": "openwebtext",
"dataset_config_name": null,
"dataset_split": "train",
"dataset_text_column": "text",
"dataset_repo_id": "huiting123/processedOWT",
"num_proc": 4,
"tokenization_num_proc": 0,
"tokenization_batch_size": 1000,
"tokenization_chunk_size": 100000,
"shard_write_batch_size": 5000,
"seed": 42,
"subset_size": 0,
"raw_data_path": null,
"test_data_path": null,
"skip_language_filter": false,
"skip_repetition_filter": false,
"skip_quality_filter": false,
"min_words": 100,
"max_words": 10000,
"max_non_ascii": 0.3,
"min_line_uniqueness": 0.7,
"min_sentence_uniqueness": 0.8,
"max_train_tokens": 0
},
"model": {
"vocab_size": 50304,
"n_layers": 18,
"n_heads": 7,
"n_kv_heads": 1,
"n_embd": 448,
"embedding_dim": 128,
"tie_embeddings": true,
"context_len": 1024,
"dropout": 0.0,
"bias": false,
"norm_type": "rmsnorm",
"norm_eps": 1e-05,
"positional_embedding": "rope",
"rope_theta": 10000.0,
"rope_fraction": 1.0,
"mlp_type": "swiglu",
"mlp_hidden_mult": 4.0,
"mlp_hidden_dim": 1024,
"qk_norm": false,
"block_style": "sequential"
},
"training": {
"seed": 0,
"learning_rate": 0.00056,
"min_lr": 5.6e-05,
"weight_decay": 0.03,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"max_iters": 92686,
"warmup_steps": 927,
"lr_schedule": "wsd",
"wsd_stable_frac": 0.85,
"batch_size": 4,
"gradient_accumulation_steps": 16,
"dtype": "float16",
"device": "cuda",
"eval_step_interval": 500,
"eval_batches": 20,
"log_interval": 10,
"max_checkpoints": 5
},
"inference": {
"checkpoint": null,
"prompt": "",
"max_tokens": 100,
"temperature": 1.0,
"seed": 0,
"device": "auto",
"leaderboard": false
},
"post_training": {
"base_checkpoint": null,
"learning_rate": 1e-05,
"max_iters": 1000,
"checkpoint_dir": "checkpoints/post",
"log_dir": "logs/post"
},
"evaluation": {
"checkpoint": null,
"batch_size": 4,
"device": "auto",
"log_dir": "logs/evaluation"
},
"notifications": {
"enabled": false,
"smtp_host": "smtp.gmail.com",
"smtp_port": 587,
"smtp_user": "",
"to_addresses": [],
"cooldown_minutes": 5,
"periodic_status_hours": 4.0,
"disk_min_gb": 5.0
}
}