{ "hyperparameters": { "epochs": 4, "learning_rate": 0.0001, "lora_r": 32, "lora_alpha": 64, "lora_dropout": 0.05, "grad_accum": 32, "warmup_ratio": 0.1, "max_grad_norm": 0.3, "lr_scheduler": "cosine", "early_stopping_patience": 3 }, "dataset": { "train_studies": 521, "val_studies": 66, "max_images": null, "image_scale": 1.0 }, "model": { "base_model": "google/medgemma-4b-it", "trainable_params": 1419500544, "total_params": 3004795248, "trainable_percent": 47.241173752015996 }, "training": { "total_steps": 68, "final_epoch": 4.0 }, "log_history": [ { "loss": 2.9492, "grad_norm": 9.147201538085938, "learning_rate": 9.973499378072945e-05, "epoch": 0.6142034548944337, "step": 10 }, { "loss": 0.6156, "grad_norm": 0.9977354407310486, "learning_rate": 9.075141687584057e-05, "epoch": 1.1842610364683301, "step": 20 }, { "eval_loss": 0.1739240437746048, "eval_runtime": 327.2284, "eval_samples_per_second": 0.202, "eval_steps_per_second": 0.202, "epoch": 1.491362763915547, "step": 25 }, { "loss": 0.188, "grad_norm": 0.6408072710037231, "learning_rate": 7.119571953549305e-05, "epoch": 1.7984644913627639, "step": 30 }, { "loss": 0.1108, "grad_norm": 0.7327869534492493, "learning_rate": 4.6141226893667684e-05, "epoch": 2.3685220729366603, "step": 40 }, { "loss": 0.1009, "grad_norm": 0.45105066895484924, "learning_rate": 2.2087813898656774e-05, "epoch": 2.982725527831094, "step": 50 }, { "eval_loss": 0.0999031811952591, "eval_runtime": 325.9289, "eval_samples_per_second": 0.202, "eval_steps_per_second": 0.202, "epoch": 2.982725527831094, "step": 50 }, { "loss": 0.0873, "grad_norm": 0.44253888726234436, "learning_rate": 5.275645888560232e-06, "epoch": 3.5527831094049906, "step": 60 }, { "train_runtime": 20602.1753, "train_samples_per_second": 0.101, "train_steps_per_second": 0.003, "total_flos": 8.333491403406732e+17, "train_loss": 0.6056773127878413, "epoch": 4.0, "step": 68 } ] }