{ "name": "looped_lm_text2emoji", "dump_dir": "/home/cd110/BFlowNet/apps/loopedLM/results/text2emoji", "seed": 42, "grad_acc_steps": 1, "gc_collect_freq": 1000, "probe_freq": null, "steps": 5100, "data": { "root_dir": "/home/cd110/BFlowNet/apps/loopedLM/text2emoji_prepared", "sources": { "text2emoji": 1.0 }, "batch_size": 12, "seq_len": 512, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 128, "tokenizer": { "name": "bytes", "path": null } }, "optim": { "lr": 0.0003, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 1000, "lr_min_ratio": 0.1, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5 }, "model": { "dim": 1024, "n_layers": 24, "head_dim": null, "n_heads": 16, "n_kv_heads": 16, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 512, "seed": 42, "vocab_size": 258, "weight_tying": false, "sliding_window": null, "n_loops": 8, "shared_layers": true, "loop_residual": true }, "distributed": { "dp_shard": 1, "dp_replicate": 4, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": true, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver" }, "env": { "MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1" }, "checkpoint": { "dump": { "every": 500, "keep": -1 }, "eval": { "every": 1500000, "keep": 3 }, "path": "/home/cd110/BFlowNet/apps/loopedLM/results/text2emoji/checkpoints", "init_ckpt_path": null, "continue_training_from_init": false }, "profiling": { "run": false, "trace_folder": "profiling", "mem_warmup": 100, "mem_steps": 2, "profile_warmup": 102, "profile_steps": 2 }, "logging": { "freq": 50, "acc_freq": null, "wandb": { "job_type": null, "dir": null, "project": "looped_lm_text2emoji", "entity": null, "tags": null, "group": null, "name": "looped_lm_text2emoji", "notes": null, "config_exclude_keys": null, "config_include_keys": null, "anonymous": null, "mode": null, "allow_val_change": null, "resume": null, "force": null, "tensorboard": null, "sync_tensorboard": null, "monitor_gym": null, "save_code": null, "id": null, "fork_from": null, "resume_from": null } }, "async_eval_gpus": null, "eval": { "generator": { "max_tokens": 128, "dtype": "bf16", "temperature": 0.7, "top_p": 0.9 }, "harness": { "tasks": [ "hellaswag", "piqa" ] }, "validation": { "max_steps": 200 } } }