meta-wiki-expert / consolidated /params.json

Upload folder using huggingface_hub

14fafb9 verified 5 months ago

6.31 kB

	{
	"dump_dir": "/checkpoint/comem/barlaso/amaia_dumps/ar_scale_1M_64_1.5e-4",
	"seed": 42,
	"batch_size": 4,
	"grad_acc_steps": 1,
	"loss_reduction": "mean",
	"seq_len": 4096,
	"checkpoint_freq": 8000,
	"eval_freq": 8000,
	"gc_collect_freq": 1000,
	"logging_freq": 10,
	"logging_acc_freq": -1,
	"probe_freq": null,
	"probe_dump_tensors": [],
	"warn_thresh_curr_iter_seconds": 10,
	"warn_thresh_data_load_seconds": 10,
	"steps": 1000000,
	"data": {
	"sources": [
	{
	"path": "/checkpoint/comem/barlaso/data/simpleqa/wiki_scale/amaia",
	"type": "pretrain",
	"weight": 94.4
	},
	{
	"path": "/datasets/llm/pretraining/llama2/github_oss_with_stack",
	"type": "pretrain",
	"weight": 27.0
	},
	{
	"path": "/datasets/llm/pretraining/llama2/b3g",
	"type": "pretrain",
	"weight": 0.3
	},
	{
	"path": "/datasets/llm/pretraining/llama2/arxiv",
	"type": "pretrain",
	"weight": 0.4
	},
	{
	"path": "/datasets/llm/pretraining/llama2/stackexchange",
	"type": "pretrain",
	"weight": 0.7
	},
	{
	"path": "/datasets/llm/pretraining/llama2/wikipedia",
	"type": "pretrain",
	"weight": 1.0
	},
	{
	"path": "/datasets/llm/pretraining/llama2/edouard_cc_20220927_new",
	"type": "pretrain",
	"weight": 10.0
	},
	{
	"path": "/datasets/llm/pretraining/dclm",
	"type": "pretrain",
	"weight": 55.0
	}
	],
	"loader": "new",
	"seed": 42,
	"add_bos": true,
	"add_eos": true,
	"load_async": true,
	"shuffle_buffer_size": 64,
	"tokenizer": {
	"name": "tiktoken",
	"path": "/checkpoint/comem/barlaso/amaia/Meta-Llama-3.1-8B/cl_toplang_128k.tiktoken"
	}
	},
	"optim": {
	"lr": 0.00015,
	"weight_decay": 0.1,
	"epsilon": 1e-08,
	"beta1": 0.9,
	"beta2": 0.95,
	"fused_optimizer": true,
	"clip": 1.0,
	"scheduler": "cosine",
	"warmup": 2000,
	"cooldown": 2000,
	"lr_min_ratio": 0.01,
	"cycle_length": 1.0,
	"cosine_theta": 1.0,
	"exp_factor": 0.5,
	"n_steps": null
	},
	"model": {
	"dim": 4096,
	"n_layers": 32,
	"n_heads": 32,
	"n_kv_heads": 8,
	"max_seq_len": 4096,
	"vocab_size": 128256,
	"ffn_dim_multiplier": 1.3,
	"ffn_exp": 4,
	"multiple_of": 1024,
	"norm_eps": 1e-05,
	"pos_embed_impl": "rope",
	"rope_theta": 500000.0,
	"scaled_rope": {
	"scale_factor": 8,
	"old_context_len": 8192,
	"low_freq_factor": 1,
	"high_freq_factor": 4,
	"use_attn_scale": false
	},
	"attn_impl": "flex_attention",
	"attn_bias_type": "doc_causal",
	"weight_tying": false,
	"init_method": "current_depth",
	"init_base_std": null,
	"seed": 42
	},
	"distributed": {
	"dp_size": 512,
	"tp_size": 1,
	"dp_type": "fsdp",
	"model_dtype": "bf16",
	"vocab_parallel": false,
	"loss_parallel": false,
	"compile": true,
	"ac_mode": "none",
	"selective_ac_option": 2,
	"partitioner_ac_budget": 0.99,
	"fp8_recipe": "rowwise",
	"fp8_filter": "layers\\.",
	"fp8_healing": null,
	"async_tp": false
	},
	"setup": {
	"spawn_method": "forkserver",
	"torch_init_timeout": 600,
	"cuda_matmul_allow_tf32": true,
	"cuda_allow_bf16_reduced_precision_reduction": true,
	"autograd_detect_anomaly": false
	},
	"logging": {
	"enable_tensorboard": true,
	"enable_wandb": false,
	"enable_otel": false,
	"wandb": {
	"project": "activereading",
	"group": null,
	"job_type": null,
	"entity": null,
	"name": null,
	"resume": null,
	"fork_from_step": null,
	"disable_on_init_failure": false
	}
	},
	"profiling": {
	"run": false,
	"trace_folder": "profiling",
	"mem_warmup": 100,
	"mem_steps": 2,
	"profile_warmup": 102,
	"profile_steps": 2
	},
	"checkpoint": {
	"path": "/checkpoint/comem/barlaso/amaia_dumps/ar_scale_1M_64_1.5e-4/checkpoints",
	"keep_latest": 1,
	"keep_eval_checkpoints": true,
	"compress": false
	},
	"continue_from": {
	"checkpoint_dir": "/checkpoint/comem/barlaso/amaia/Meta-Llama-3.1-8B",
	"reload_optim": false,
	"reload_dataloader": false,
	"reload_train_state": false,
	"validation_mode": "raise"
	},
	"eval_on_gpus": 8,
	"eval": {
	"dataset_dir": "/datasets/llm/eval",
	"tasks": "hellaswag,nq,simpleqa_wiki,simpleqa_wiki_nll",
	"task_args": null,
	"ppl": null,
	"predictor": "llama_predictor",
	"predictor_config": {
	"checkpoint_dir": "",
	"tp_size": 1,
	"dp_type": null,
	"compile": false,
	"model_dtype": "bf16",
	"device": "cuda",
	"batch_size": 128,
	"generation_batch_size": null,
	"auto_batch_size": true
	},
	"temperature": 0.0,
	"top_k": 0,
	"top_p": 0.0,
	"seed": 42,
	"dump_dir": "",
	"metric_log_dir": "",
	"tb_log_dir": null,
	"no_resume": false,
	"max_samples": null,
	"show_progress": false,
	"log_to_tb": false,
	"global_step": null,
	"disable_metric_logging": false,
	"logging": null,
	"checkpoint_dir": "",
	"tp_size": 1,
	"dp_type": null,
	"compile": false,
	"model_dtype": "bf16",
	"device": "cuda",
	"batch_size": 128,
	"generation_batch_size": null
	}
	}