borguz's picture
Upload folder using huggingface_hub
14fafb9 verified
{
"dump_dir": "/checkpoint/comem/barlaso/amaia_dumps/ar_scale_1M_64_1.5e-4",
"seed": 42,
"batch_size": 4,
"grad_acc_steps": 1,
"loss_reduction": "mean",
"seq_len": 4096,
"checkpoint_freq": 8000,
"eval_freq": 8000,
"gc_collect_freq": 1000,
"logging_freq": 10,
"logging_acc_freq": -1,
"probe_freq": null,
"probe_dump_tensors": [],
"warn_thresh_curr_iter_seconds": 10,
"warn_thresh_data_load_seconds": 10,
"steps": 1000000,
"data": {
"sources": [
{
"path": "/checkpoint/comem/barlaso/data/simpleqa/wiki_scale/amaia",
"type": "pretrain",
"weight": 94.4
},
{
"path": "/datasets/llm/pretraining/llama2/github_oss_with_stack",
"type": "pretrain",
"weight": 27.0
},
{
"path": "/datasets/llm/pretraining/llama2/b3g",
"type": "pretrain",
"weight": 0.3
},
{
"path": "/datasets/llm/pretraining/llama2/arxiv",
"type": "pretrain",
"weight": 0.4
},
{
"path": "/datasets/llm/pretraining/llama2/stackexchange",
"type": "pretrain",
"weight": 0.7
},
{
"path": "/datasets/llm/pretraining/llama2/wikipedia",
"type": "pretrain",
"weight": 1.0
},
{
"path": "/datasets/llm/pretraining/llama2/edouard_cc_20220927_new",
"type": "pretrain",
"weight": 10.0
},
{
"path": "/datasets/llm/pretraining/dclm",
"type": "pretrain",
"weight": 55.0
}
],
"loader": "new",
"seed": 42,
"add_bos": true,
"add_eos": true,
"load_async": true,
"shuffle_buffer_size": 64,
"tokenizer": {
"name": "tiktoken",
"path": "/checkpoint/comem/barlaso/amaia/Meta-Llama-3.1-8B/cl_toplang_128k.tiktoken"
}
},
"optim": {
"lr": 0.00015,
"weight_decay": 0.1,
"epsilon": 1e-08,
"beta1": 0.9,
"beta2": 0.95,
"fused_optimizer": true,
"clip": 1.0,
"scheduler": "cosine",
"warmup": 2000,
"cooldown": 2000,
"lr_min_ratio": 0.01,
"cycle_length": 1.0,
"cosine_theta": 1.0,
"exp_factor": 0.5,
"n_steps": null
},
"model": {
"dim": 4096,
"n_layers": 32,
"n_heads": 32,
"n_kv_heads": 8,
"max_seq_len": 4096,
"vocab_size": 128256,
"ffn_dim_multiplier": 1.3,
"ffn_exp": 4,
"multiple_of": 1024,
"norm_eps": 1e-05,
"pos_embed_impl": "rope",
"rope_theta": 500000.0,
"scaled_rope": {
"scale_factor": 8,
"old_context_len": 8192,
"low_freq_factor": 1,
"high_freq_factor": 4,
"use_attn_scale": false
},
"attn_impl": "flex_attention",
"attn_bias_type": "doc_causal",
"weight_tying": false,
"init_method": "current_depth",
"init_base_std": null,
"seed": 42
},
"distributed": {
"dp_size": 512,
"tp_size": 1,
"dp_type": "fsdp",
"model_dtype": "bf16",
"vocab_parallel": false,
"loss_parallel": false,
"compile": true,
"ac_mode": "none",
"selective_ac_option": 2,
"partitioner_ac_budget": 0.99,
"fp8_recipe": "rowwise",
"fp8_filter": "layers\\.",
"fp8_healing": null,
"async_tp": false
},
"setup": {
"spawn_method": "forkserver",
"torch_init_timeout": 600,
"cuda_matmul_allow_tf32": true,
"cuda_allow_bf16_reduced_precision_reduction": true,
"autograd_detect_anomaly": false
},
"logging": {
"enable_tensorboard": true,
"enable_wandb": false,
"enable_otel": false,
"wandb": {
"project": "activereading",
"group": null,
"job_type": null,
"entity": null,
"name": null,
"resume": null,
"fork_from_step": null,
"disable_on_init_failure": false
}
},
"profiling": {
"run": false,
"trace_folder": "profiling",
"mem_warmup": 100,
"mem_steps": 2,
"profile_warmup": 102,
"profile_steps": 2
},
"checkpoint": {
"path": "/checkpoint/comem/barlaso/amaia_dumps/ar_scale_1M_64_1.5e-4/checkpoints",
"keep_latest": 1,
"keep_eval_checkpoints": true,
"compress": false
},
"continue_from": {
"checkpoint_dir": "/checkpoint/comem/barlaso/amaia/Meta-Llama-3.1-8B",
"reload_optim": false,
"reload_dataloader": false,
"reload_train_state": false,
"validation_mode": "raise"
},
"eval_on_gpus": 8,
"eval": {
"dataset_dir": "/datasets/llm/eval",
"tasks": "hellaswag,nq,simpleqa_wiki,simpleqa_wiki_nll",
"task_args": null,
"ppl": null,
"predictor": "llama_predictor",
"predictor_config": {
"checkpoint_dir": "",
"tp_size": 1,
"dp_type": null,
"compile": false,
"model_dtype": "bf16",
"device": "cuda",
"batch_size": 128,
"generation_batch_size": null,
"auto_batch_size": true
},
"temperature": 0.0,
"top_k": 0,
"top_p": 0.0,
"seed": 42,
"dump_dir": "",
"metric_log_dir": "",
"tb_log_dir": null,
"no_resume": false,
"max_samples": null,
"show_progress": false,
"log_to_tb": false,
"global_step": null,
"disable_metric_logging": false,
"logging": null,
"checkpoint_dir": "",
"tp_size": 1,
"dp_type": null,
"compile": false,
"model_dtype": "bf16",
"device": "cuda",
"batch_size": 128,
"generation_batch_size": null
}
}