{ "run": { "name": "final_c2_18l320_standard", "artifacts_root": "artifacts/full_run", "resume": true, "deterministic": false }, "distributed": { "enabled": true, "backend": "nccl" }, "preprocessing": { "data_dir": "data", "processed_dir": "data/processed_OWT", "log_dir": "logs/preprocessing", "train_split": 0.9, "dataset_name": "openwebtext", "dataset_config_name": null, "dataset_split": "train", "dataset_text_column": "text", "dataset_repo_id": "huiting123/processedOWT", "num_proc": 4, "tokenization_num_proc": 0, "tokenization_batch_size": 1000, "tokenization_chunk_size": 100000, "shard_write_batch_size": 5000, "seed": 42, "subset_size": 0, "raw_data_path": null, "test_data_path": null, "skip_language_filter": false, "skip_repetition_filter": false, "skip_quality_filter": false, "min_words": 100, "max_words": 10000, "max_non_ascii": 0.3, "min_line_uniqueness": 0.7, "min_sentence_uniqueness": 0.8, "max_train_tokens": 0 }, "model": { "vocab_size": 50304, "n_layers": 18, "n_heads": 5, "n_kv_heads": 1, "n_embd": 320, "embedding_dim": null, "tie_embeddings": true, "context_len": 1024, "dropout": 0.0, "bias": false, "norm_type": "rmsnorm", "norm_eps": 1e-05, "positional_embedding": "rope", "rope_theta": 10000.0, "rope_fraction": 1.0, "mlp_type": "swiglu", "mlp_hidden_mult": 4.0, "mlp_hidden_dim": 1024, "qk_norm": false, "block_style": "sequential" }, "training": { "seed": 0, "learning_rate": 0.00066, "min_lr": 6.6e-05, "weight_decay": 0.03, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "max_iters": 11586, "warmup_steps": 116, "lr_schedule": "wsd", "wsd_stable_frac": 0.85, "batch_size": 4, "gradient_accumulation_steps": 16, "dtype": "float16", "device": "cuda", "eval_step_interval": 1000, "eval_batches": 20, "log_interval": 10, "max_checkpoints": 5 }, "inference": { "checkpoint": null, "prompt": "", "max_tokens": 100, "temperature": 1.0, "seed": 0, "device": "auto", "leaderboard": false }, "post_training": { "base_checkpoint": null, "learning_rate": 1e-05, "max_iters": 11586, "checkpoint_dir": "checkpoints/post", "log_dir": "logs/post" }, "evaluation": { "checkpoint": null, "batch_size": 4, "device": "auto", "log_dir": "logs/evaluation" }, "notifications": { "enabled": false, "smtp_host": "smtp.gmail.com", "smtp_port": 587, "smtp_user": "", "to_addresses": [], "cooldown_minutes": 5, "periodic_status_hours": 4.0, "disk_min_gb": 5.0 } }