{ "model": "Qwen/Qwen3-4B", "dataset": "microsoft/rStar-Coder (synthetic_sft)", "samples": 10000, "epochs": 1, "lora_rank": 32, "lora_alpha": 64, "batch_size": 2, "gradient_accumulation": 8, "learning_rate": 0.0002, "max_seq_length": 4096, "results": { "humaneval_base": 68.9, "humaneval_plus": 64.0, "mbpp_base": 58.2, "mbpp_plus": 50.8 } }