{ "model_name": "microsoft/Phi-3.5-mini-instruct", "display_name": "Phi-3.5 (BF16, Liger, Batch96, CkptON)", "timestamp": "2026-02-03T19:35:19.489805", "training_config": { "num_train_epochs": 1, "per_device_train_batch_size": 96, "gradient_accumulation_steps": 1, "learning_rate": 0.0002, "warmup_ratio": 0.03, "lr_scheduler_type": "cosine", "weight_decay": 0.01, "max_seq_length": 4096, "logging_steps": 1, "eval_steps": 50, "save_steps": 200, "seed": 42, "bf16": true, "optim": "adamw_torch_fused", "dataloader_num_workers": 8, "torch_compile": true }, "lora_config": { "r": 16, "lora_alpha": 32, "lora_dropout": 0.05, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ], "bias": "none", "task_type": "CAUSAL_LM" }, "train_loss": 0.5981301681586066, "train_samples": 172145, "val_samples": 9066, "train_time_minutes": 147.3416652202606, "max_memory_gb": 54.91591787338257, "fix_applied": "YAML normalization via PyYAML (2 spaces), packing=False, Native BF16 Training, Batch 96" }