{ "step": 100, "mean_loss": 12.659629821777344, "best_loss": 12.208006858825684, "topk_fkl": 4.3213653564453125, "rkl_tail": 9.042220115661621, "skew_fkl": 1.0191670060157776, "fkl_tail": -0.008757250383496284, "validator_tail_bucket_kl": 4.312608480453491, "teacher_topk_mass": 0.9973499476909637, "student_topk_mass": 0.5712139010429382, "selective_weight_mean": 1.0, "quasar_aux": 0.7987381517887115, "kl_loss": 11.860891819000244, "args": { "student": "/home/claudeuser/ember", "teacher": "/home/claudeuser/Qwen3.5-4B", "out_dir": "/home/claudeuser/ember-greedy-v2", "teacher_cache": "/home/claudeuser/_teacher_cache_climbmix.pt", "skip_cache_gen": true, "cache_only": false, "n_prompts": 256, "max_new": 1580, "max_prompt_tokens": 256, "topk": 128, "shards_per_epoch": 6, "prompts_per_shard": 48, "block_hashes_file": null, "w_topk_fkl": 1.0, "w_rkl_tail": 0.8, "w_skew": 0.3, "w_fkl_tail": 0.0, "skew_alpha": 0.2, "w_quasar_internal": 1.0, "selective_weighting": false, "thread_memory_state": false, "layers_from": 17, "layers_to": 23, "no_lm_head": false, "no_final_norm": false, "extra_unfreeze_layers": "", "max_steps": 500, "lr": 2e-05, "weight_decay": 0.0, "warmup_steps": 30, "micro_batch": 12, "grad_accum": 2, "grad_clip": 1.0, "save_every": 50, "log_every": 5, "no_grad_ckpt": false, "seed": 42 } }