Emoji-ByteLM / params.json
CharlesDDDD's picture
Upload EmojiLM - Byte-level Looped Transformer
65ead92 verified
{
"name": "looped_lm_text2emoji",
"dump_dir": "/home/cd110/BFlowNet/apps/loopedLM/results/text2emoji",
"seed": 42,
"grad_acc_steps": 1,
"gc_collect_freq": 1000,
"probe_freq": null,
"steps": 5100,
"data": {
"root_dir": "/home/cd110/BFlowNet/apps/loopedLM/text2emoji_prepared",
"sources": {
"text2emoji": 1.0
},
"batch_size": 12,
"seq_len": 512,
"n_views": 2,
"seed": 42,
"add_bos": true,
"add_eos": true,
"load_async": true,
"prefetch_size": 128,
"tokenizer": {
"name": "bytes",
"path": null
}
},
"optim": {
"lr": 0.0003,
"weight_decay": 0.1,
"epsilon": 1e-08,
"beta1": 0.9,
"beta2": 0.95,
"clip": 1.0,
"scheduler": "cosine",
"warmup": 1000,
"lr_min_ratio": 0.1,
"cycle_length": 1.0,
"cosine_theta": 1.0,
"annealing_step": 1000,
"decay_fraction": 0.1,
"exp_factor": 0.5
},
"model": {
"dim": 1024,
"n_layers": 24,
"head_dim": null,
"n_heads": 16,
"n_kv_heads": 16,
"ffn_dim_multiplier": null,
"multiple_of": 256,
"norm_eps": 1e-05,
"rope_theta": 10000.0,
"init_base_std": null,
"init_std_factor": "disabled",
"max_seqlen": 512,
"seed": 42,
"vocab_size": 258,
"weight_tying": false,
"sliding_window": null,
"n_loops": 8,
"shared_layers": true,
"loop_residual": true
},
"distributed": {
"dp_shard": 1,
"dp_replicate": 4,
"tp_size": 1,
"selective_activation_checkpointing": false,
"compile": true,
"fsdp_type": "full_shard",
"model_dtype": "bf16",
"float8_recipe": null,
"float8_filter": "layers\\.[0-9]+\\.",
"matmul_allow_tf32": true,
"detect_anomaly": false,
"compile_cache_size_limit": 8,
"spawn_method": "forkserver"
},
"env": {
"MKL_SERVICE_FORCE_INTEL": "GNU",
"OMP_NUM_THREADS": "1",
"MKL_NUM_THREADS": "1",
"ENABLE_INTRA_NODE_COMM": "1",
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1",
"NCCL_IB_TIMEOUT": "22",
"NCCL_DEBUG": "INFO",
"TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"
},
"checkpoint": {
"dump": {
"every": 500,
"keep": -1
},
"eval": {
"every": 1500000,
"keep": 3
},
"path": "/home/cd110/BFlowNet/apps/loopedLM/results/text2emoji/checkpoints",
"init_ckpt_path": null,
"continue_training_from_init": false
},
"profiling": {
"run": false,
"trace_folder": "profiling",
"mem_warmup": 100,
"mem_steps": 2,
"profile_warmup": 102,
"profile_steps": 2
},
"logging": {
"freq": 50,
"acc_freq": null,
"wandb": {
"job_type": null,
"dir": null,
"project": "looped_lm_text2emoji",
"entity": null,
"tags": null,
"group": null,
"name": "looped_lm_text2emoji",
"notes": null,
"config_exclude_keys": null,
"config_include_keys": null,
"anonymous": null,
"mode": null,
"allow_val_change": null,
"resume": null,
"force": null,
"tensorboard": null,
"sync_tensorboard": null,
"monitor_gym": null,
"save_code": null,
"id": null,
"fork_from": null,
"resume_from": null
}
},
"async_eval_gpus": null,
"eval": {
"generator": {
"max_tokens": 128,
"dtype": "bf16",
"temperature": 0.7,
"top_p": 0.9
},
"harness": {
"tasks": [
"hellaswag",
"piqa"
]
},
"validation": {
"max_steps": 200
}
}
}