File size: 3,501 Bytes
65ead92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
{
"name": "looped_lm_text2emoji",
"dump_dir": "/home/cd110/BFlowNet/apps/loopedLM/results/text2emoji",
"seed": 42,
"grad_acc_steps": 1,
"gc_collect_freq": 1000,
"probe_freq": null,
"steps": 5100,
"data": {
"root_dir": "/home/cd110/BFlowNet/apps/loopedLM/text2emoji_prepared",
"sources": {
"text2emoji": 1.0
},
"batch_size": 12,
"seq_len": 512,
"n_views": 2,
"seed": 42,
"add_bos": true,
"add_eos": true,
"load_async": true,
"prefetch_size": 128,
"tokenizer": {
"name": "bytes",
"path": null
}
},
"optim": {
"lr": 0.0003,
"weight_decay": 0.1,
"epsilon": 1e-08,
"beta1": 0.9,
"beta2": 0.95,
"clip": 1.0,
"scheduler": "cosine",
"warmup": 1000,
"lr_min_ratio": 0.1,
"cycle_length": 1.0,
"cosine_theta": 1.0,
"annealing_step": 1000,
"decay_fraction": 0.1,
"exp_factor": 0.5
},
"model": {
"dim": 1024,
"n_layers": 24,
"head_dim": null,
"n_heads": 16,
"n_kv_heads": 16,
"ffn_dim_multiplier": null,
"multiple_of": 256,
"norm_eps": 1e-05,
"rope_theta": 10000.0,
"init_base_std": null,
"init_std_factor": "disabled",
"max_seqlen": 512,
"seed": 42,
"vocab_size": 258,
"weight_tying": false,
"sliding_window": null,
"n_loops": 8,
"shared_layers": true,
"loop_residual": true
},
"distributed": {
"dp_shard": 1,
"dp_replicate": 4,
"tp_size": 1,
"selective_activation_checkpointing": false,
"compile": true,
"fsdp_type": "full_shard",
"model_dtype": "bf16",
"float8_recipe": null,
"float8_filter": "layers\\.[0-9]+\\.",
"matmul_allow_tf32": true,
"detect_anomaly": false,
"compile_cache_size_limit": 8,
"spawn_method": "forkserver"
},
"env": {
"MKL_SERVICE_FORCE_INTEL": "GNU",
"OMP_NUM_THREADS": "1",
"MKL_NUM_THREADS": "1",
"ENABLE_INTRA_NODE_COMM": "1",
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1",
"NCCL_IB_TIMEOUT": "22",
"NCCL_DEBUG": "INFO",
"TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"
},
"checkpoint": {
"dump": {
"every": 500,
"keep": -1
},
"eval": {
"every": 1500000,
"keep": 3
},
"path": "/home/cd110/BFlowNet/apps/loopedLM/results/text2emoji/checkpoints",
"init_ckpt_path": null,
"continue_training_from_init": false
},
"profiling": {
"run": false,
"trace_folder": "profiling",
"mem_warmup": 100,
"mem_steps": 2,
"profile_warmup": 102,
"profile_steps": 2
},
"logging": {
"freq": 50,
"acc_freq": null,
"wandb": {
"job_type": null,
"dir": null,
"project": "looped_lm_text2emoji",
"entity": null,
"tags": null,
"group": null,
"name": "looped_lm_text2emoji",
"notes": null,
"config_exclude_keys": null,
"config_include_keys": null,
"anonymous": null,
"mode": null,
"allow_val_change": null,
"resume": null,
"force": null,
"tensorboard": null,
"sync_tensorboard": null,
"monitor_gym": null,
"save_code": null,
"id": null,
"fork_from": null,
"resume_from": null
}
},
"async_eval_gpus": null,
"eval": {
"generator": {
"max_tokens": 128,
"dtype": "bf16",
"temperature": 0.7,
"top_p": 0.9
},
"harness": {
"tasks": [
"hellaswag",
"piqa"
]
},
"validation": {
"max_steps": 200
}
}
} |