Dream-diffllm / train_state_00003.json
wyhwhy's picture
Upload folder using huggingface_hub
369e662 verified
{"step": 2500, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 197, "it_state": {"it_state": {"root_dir": "/scratch/dyvm6xra/dyvm6xrauseryuhao/dataset/Pretraining_Dataset", "sources": {"data1": 1.0}, "source_to_state": {"data1": {"file_path": "/scratch/dyvm6xra/dyvm6xrauseryuhao/dataset/Pretraining_Dataset/data1/data1.chunk.00.jsonl", "position": 47492926, "block_size": 8, "offset": 3, "current_iter": 1}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 12986492799214244244791808130043976593, "inc": 148211758571781046255077612135386035203}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "hf", "path": "/scratch/dyvm6xra/dyvm6xrauseryuhao/dream-training/Qwen2.5-7B-dcp"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 452, "rng_state": {"bit_generator": "PCG64", "state": {"state": 130957549404782263888846267584236103538, "inc": 115810872492597857501795428972873905393}, "has_uint32": 0, "uinteger": 1628108682}, "batch_size": 1, "prefetch_size": 1024}, "scheduler": {"base_lrs": [1e-05], "last_epoch": 2500, "verbose": false, "_step_count": 2501, "_get_lr_called_within_step": false, "_last_lr": [5e-06], "lr_lambdas": [{}]}}