| { | |
| "name": "looped_lm_text2emoji", | |
| "dump_dir": "/home/cd110/BFlowNet/apps/loopedLM/results/text2emoji", | |
| "seed": 42, | |
| "grad_acc_steps": 1, | |
| "gc_collect_freq": 1000, | |
| "probe_freq": null, | |
| "steps": 5100, | |
| "data": { | |
| "root_dir": "/home/cd110/BFlowNet/apps/loopedLM/text2emoji_prepared", | |
| "sources": { | |
| "text2emoji": 1.0 | |
| }, | |
| "batch_size": 12, | |
| "seq_len": 512, | |
| "n_views": 2, | |
| "seed": 42, | |
| "add_bos": true, | |
| "add_eos": true, | |
| "load_async": true, | |
| "prefetch_size": 128, | |
| "tokenizer": { | |
| "name": "bytes", | |
| "path": null | |
| } | |
| }, | |
| "optim": { | |
| "lr": 0.0003, | |
| "weight_decay": 0.1, | |
| "epsilon": 1e-08, | |
| "beta1": 0.9, | |
| "beta2": 0.95, | |
| "clip": 1.0, | |
| "scheduler": "cosine", | |
| "warmup": 1000, | |
| "lr_min_ratio": 0.1, | |
| "cycle_length": 1.0, | |
| "cosine_theta": 1.0, | |
| "annealing_step": 1000, | |
| "decay_fraction": 0.1, | |
| "exp_factor": 0.5 | |
| }, | |
| "model": { | |
| "dim": 1024, | |
| "n_layers": 24, | |
| "head_dim": null, | |
| "n_heads": 16, | |
| "n_kv_heads": 16, | |
| "ffn_dim_multiplier": null, | |
| "multiple_of": 256, | |
| "norm_eps": 1e-05, | |
| "rope_theta": 10000.0, | |
| "init_base_std": null, | |
| "init_std_factor": "disabled", | |
| "max_seqlen": 512, | |
| "seed": 42, | |
| "vocab_size": 258, | |
| "weight_tying": false, | |
| "sliding_window": null, | |
| "n_loops": 8, | |
| "shared_layers": true, | |
| "loop_residual": true | |
| }, | |
| "distributed": { | |
| "dp_shard": 1, | |
| "dp_replicate": 4, | |
| "tp_size": 1, | |
| "selective_activation_checkpointing": false, | |
| "compile": true, | |
| "fsdp_type": "full_shard", | |
| "model_dtype": "bf16", | |
| "float8_recipe": null, | |
| "float8_filter": "layers\\.[0-9]+\\.", | |
| "matmul_allow_tf32": true, | |
| "detect_anomaly": false, | |
| "compile_cache_size_limit": 8, | |
| "spawn_method": "forkserver" | |
| }, | |
| "env": { | |
| "MKL_SERVICE_FORCE_INTEL": "GNU", | |
| "OMP_NUM_THREADS": "1", | |
| "MKL_NUM_THREADS": "1", | |
| "ENABLE_INTRA_NODE_COMM": "1", | |
| "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", | |
| "NCCL_IB_TIMEOUT": "22", | |
| "NCCL_DEBUG": "INFO", | |
| "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1" | |
| }, | |
| "checkpoint": { | |
| "dump": { | |
| "every": 500, | |
| "keep": -1 | |
| }, | |
| "eval": { | |
| "every": 1500000, | |
| "keep": 3 | |
| }, | |
| "path": "/home/cd110/BFlowNet/apps/loopedLM/results/text2emoji/checkpoints", | |
| "init_ckpt_path": null, | |
| "continue_training_from_init": false | |
| }, | |
| "profiling": { | |
| "run": false, | |
| "trace_folder": "profiling", | |
| "mem_warmup": 100, | |
| "mem_steps": 2, | |
| "profile_warmup": 102, | |
| "profile_steps": 2 | |
| }, | |
| "logging": { | |
| "freq": 50, | |
| "acc_freq": null, | |
| "wandb": { | |
| "job_type": null, | |
| "dir": null, | |
| "project": "looped_lm_text2emoji", | |
| "entity": null, | |
| "tags": null, | |
| "group": null, | |
| "name": "looped_lm_text2emoji", | |
| "notes": null, | |
| "config_exclude_keys": null, | |
| "config_include_keys": null, | |
| "anonymous": null, | |
| "mode": null, | |
| "allow_val_change": null, | |
| "resume": null, | |
| "force": null, | |
| "tensorboard": null, | |
| "sync_tensorboard": null, | |
| "monitor_gym": null, | |
| "save_code": null, | |
| "id": null, | |
| "fork_from": null, | |
| "resume_from": null | |
| } | |
| }, | |
| "async_eval_gpus": null, | |
| "eval": { | |
| "generator": { | |
| "max_tokens": 128, | |
| "dtype": "bf16", | |
| "temperature": 0.7, | |
| "top_p": 0.9 | |
| }, | |
| "harness": { | |
| "tasks": [ | |
| "hellaswag", | |
| "piqa" | |
| ] | |
| }, | |
| "validation": { | |
| "max_steps": 200 | |
| } | |
| } | |
| } |