{ "step": 2000, "val_loss": 1.0681076049804688, "model_config": { "sequence_len": 7180, "vocab_size": 1542, "n_layer": 20, "n_head": 10, "n_kv_head": 10, "n_embd": 1280, "window_pattern": "L" }, "user_config": { "data_dir": "/data/sat2map/sat2map_g16_t12_target64_k1024", "run": "sat2map_h100_8gpu_v2", "device_type": "", "depth": 20, "aspect_ratio": 64, "head_dim": 128, "window_pattern": "L", "num_iterations": 2000, "device_batch_size": 1, "grad_accum_steps": 8, "total_batch_size": 0, "embedding_lr": 0.3, "unembedding_lr": 0.004, "weight_decay": 0.2, "matrix_lr": 0.02, "scalar_lr": 0.5, "adam_beta1": 0.8, "adam_beta2": 0.95, "warmup_ratio": 0.01, "warmdown_ratio": 0.4, "final_lr_frac": 0.0, "resume_from_step": -1, "eval_every": 200, "eval_steps": 50, "save_every": 500, "model_tag": null, "init_from_group": "", "init_from_tag": "", "init_from_step": -1 }, "dataset_meta": { "sequence_len": 7180, "sequence_tokens_len": 7181, "num_timesteps": 12, "timestep_strategy": "uniform", "context_grid_size": 16, "context_patch_size": 8, "target_grid_size": 64, "pad_id": 0, "mask_id": 1, "ignore_index": -1, "token_spec": { "s2_base": 2, "s2_k": 1024, "time_sep_id": 1026, "task_wc_id": 1027, "task_cdl_id": 1028, "bos_id": 1029, "wc_base": 1030, "cdl_base": 1286, "vocab_size": 1542 }, "task_id_to_name": { "1": "worldcover", "2": "cdl" } }, "effective_total_batch_size": 459520, "grad_accum_steps": 8, "dataloader_state_dict": { "epoch": 14, "rank": 0 }, "loop_state": { "min_val_loss": 0.27583298087120056, "smooth_train_loss": 0.008957206082207212, "total_training_time": 1468.8923184871674 } }