{ "base_model": "unsloth/Qwen3-30B-A3B-Instruct-2507", "phase": "phase2a-test-1k", "training_config": { "model": "Qwen3-30B-A3B-Instruct-2507", "phase": "phase2a-test-1k", "dataset": "phase2-rp-base-1k", "num_samples": 1000, "lora_rank": 16, "lora_alpha": 16, "lora_dropout": 0, "learning_rate": 0.0002, "batch_size": 2, "gradient_accumulation_steps": 4, "effective_batch_size": 32, "max_steps": 100, "warmup_steps": 10, "max_seq_length": 2048, "optimizer": "adamw_8bit", "weight_decay": 0.01, "lr_scheduler_type": "linear", "precision": "bfloat16", "device_map": "auto", "gpus": "4x RTX 5090", "training_time": "40 minutes", "framework": "Unsloth 2025.11.3", "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ] }, "metrics": { "training_loss": { "initial": 2.3745, "final": 1.5027, "reduction_percent": 36.7 }, "training_metrics": { "total_steps": 100, "total_samples": 1000, "training_time_seconds": 2380.49, "training_time_minutes": 39.67, "samples_per_second": 0.336, "final_grad_norm": 0.1539, "final_learning_rate": 0.0 }, "loss_progression": { "step_5": 2.3745, "step_10": 1.531, "step_50": 1.632, "step_100": 1.5027 }, "wandb_run": "https://wandb.ai/developer_lunark-lunark-ai/kaidol-llm-finetuning/runs/brryct5m", "notes": "Baseline test with 1K samples. Stable convergence observed. Ready for hyperparameter optimization (LR 2e-4→1e-4, alpha 16→32, grad_accum 4→8)." }, "upload_date": "2025-11-18T09:24:35.457118" }