{ "seed": 42, "outdir": "/workspace/checkpoints_v2_30L_1280_700M/", "use_wandb": false, "resume_from_checkpoint": false, "force_next_epoch": false, "manual_batch_override": true, "manual_batch_size": 8, "manual_accum_steps": 16, "manual_gradient_checkpointing": false, "moving_avg_beta": 0.98, "save_every_epochs": 1, "eval_every_epochs": 1, "vocab_name": "mistralai/Mistral-7B-v0.1", "doc_eos_token": "<|endoftext|>", "vocab_pad_multiple": 64, "tokenizer_download_timeout": 30, "d_model": 1280, "n_layers": 30, "n_heads": 10, "q_lora_rank": 640, "kv_lora_rank": 320, "qk_nope_head_dim": 64, "qk_rope_head_dim": 64, "v_head_dim": 128, "ff_mult": 3.5, "qk_norm": true, "use_compile": true, "attn_dropout": 0.05, "resid_dropout": 0.05, "emb_dropout": 0.05, "label_smoothing": 0.05, "dataset_name": "AlgoDriveAI/Cosmopedia_Math_v2_Alpha", "context_len": 2048, "batch_size": 8, "accum_steps": 16, "num_epochs": 2, "max_lr": 0.00015, "min_lr": 1.5e-05, "warmup_ratio": 0.05, "decay_ratio": 0.15, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "clip_norm": 1.0, "decay_embeddings": false, "num_workers": 16, "prefetch_factor": 4, "pin_memory": true, "use_dataset_cache": false, "dataset_cache_dir": "/workspace/tokenized_cache_v2b", "use_double_eos": false, "use_memmap_token_cache": true, "memmap_cache_dir": "/workspace/memmap_cache_v2b", "memmap_write_workers": 8, "use_gradient_checkpointing": false, "run_dataloader_diagnostic": true, "diagnostic_batches": 50, "min_batch_size": 1, "max_batch_size": 64, "min_effective_batch": 128, "max_accum_steps": 64, "enable_dynamic_oom_recovery": true, "max_oom_retries": 5, "oom_batch_size_reduction_factor": 0.7, "fallback_context_lens": [ 2048, 1024, 512 ], "memory_warning_threshold": 0.95, "reshuffle_each_epoch": true, "shuffle_before_packing": true, "num_packing_offsets": 1 }