| { |
| "hyperparameters": { |
| "epochs": 4, |
| "learning_rate": 0.0001, |
| "lora_r": 32, |
| "lora_alpha": 64, |
| "lora_dropout": 0.05, |
| "grad_accum": 32, |
| "warmup_ratio": 0.1, |
| "max_grad_norm": 0.3, |
| "lr_scheduler": "cosine", |
| "early_stopping_patience": 3 |
| }, |
| "dataset": { |
| "train_studies": 521, |
| "val_studies": 66, |
| "max_images": null, |
| "image_scale": 1.0 |
| }, |
| "model": { |
| "base_model": "google/medgemma-4b-it", |
| "trainable_params": 1419500544, |
| "total_params": 3004795248, |
| "trainable_percent": 47.241173752015996 |
| }, |
| "training": { |
| "total_steps": 68, |
| "final_epoch": 4.0 |
| }, |
| "log_history": [ |
| { |
| "loss": 2.9492, |
| "grad_norm": 9.147201538085938, |
| "learning_rate": 9.973499378072945e-05, |
| "epoch": 0.6142034548944337, |
| "step": 10 |
| }, |
| { |
| "loss": 0.6156, |
| "grad_norm": 0.9977354407310486, |
| "learning_rate": 9.075141687584057e-05, |
| "epoch": 1.1842610364683301, |
| "step": 20 |
| }, |
| { |
| "eval_loss": 0.1739240437746048, |
| "eval_runtime": 327.2284, |
| "eval_samples_per_second": 0.202, |
| "eval_steps_per_second": 0.202, |
| "epoch": 1.491362763915547, |
| "step": 25 |
| }, |
| { |
| "loss": 0.188, |
| "grad_norm": 0.6408072710037231, |
| "learning_rate": 7.119571953549305e-05, |
| "epoch": 1.7984644913627639, |
| "step": 30 |
| }, |
| { |
| "loss": 0.1108, |
| "grad_norm": 0.7327869534492493, |
| "learning_rate": 4.6141226893667684e-05, |
| "epoch": 2.3685220729366603, |
| "step": 40 |
| }, |
| { |
| "loss": 0.1009, |
| "grad_norm": 0.45105066895484924, |
| "learning_rate": 2.2087813898656774e-05, |
| "epoch": 2.982725527831094, |
| "step": 50 |
| }, |
| { |
| "eval_loss": 0.0999031811952591, |
| "eval_runtime": 325.9289, |
| "eval_samples_per_second": 0.202, |
| "eval_steps_per_second": 0.202, |
| "epoch": 2.982725527831094, |
| "step": 50 |
| }, |
| { |
| "loss": 0.0873, |
| "grad_norm": 0.44253888726234436, |
| "learning_rate": 5.275645888560232e-06, |
| "epoch": 3.5527831094049906, |
| "step": 60 |
| }, |
| { |
| "train_runtime": 20602.1753, |
| "train_samples_per_second": 0.101, |
| "train_steps_per_second": 0.003, |
| "total_flos": 8.333491403406732e+17, |
| "train_loss": 0.6056773127878413, |
| "epoch": 4.0, |
| "step": 68 |
| } |
| ] |
| } |