{ "metadata": { "run_name": "Experiment_BatchSize_8", "timestamp": "2026-01-11 22:51:39", "python_version": "3.11.14", "pytorch_version": "2.9.0+cu128", "gpu_info": { "name": "NVIDIA H200", "count": 1, "capability": [ 9, 0 ] }, "configuration": { "batch_size_per_device": 8, "learning_rate": 0.0005, "max_steps": -1, "num_train_epochs": 2.0, "fp16": false, "bf16": false, "optim": "adamw_torch" } }, "metrics": [ { "step": 20, "epoch": 0.16, "timestamp": "2026-01-11T22:52:18.211257", "performance": { "avg_time_per_step_s": 1.9308, "steps_per_second": 0.52 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 37.185546875, "peak_allocated_gb": 35.05815267562866 } }, { "step": 40, "epoch": 0.32, "timestamp": "2026-01-11T22:52:32.616231", "performance": { "avg_time_per_step_s": 0.7202, "steps_per_second": 1.39 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 37.185546875, "peak_allocated_gb": 35.05815267562866 } }, { "step": 60, "epoch": 0.48, "timestamp": "2026-01-11T22:53:37.785356", "performance": { "avg_time_per_step_s": 3.2585, "steps_per_second": 0.31 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 80, "epoch": 0.64, "timestamp": "2026-01-11T22:53:50.731073", "performance": { "avg_time_per_step_s": 0.6473, "steps_per_second": 1.54 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 100, "epoch": 0.8, "timestamp": "2026-01-11T22:54:03.641820", "performance": { "avg_time_per_step_s": 0.6455, "steps_per_second": 1.55 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 120, "epoch": 0.96, "timestamp": "2026-01-11T22:54:45.232718", "performance": { "avg_time_per_step_s": 2.0795, "steps_per_second": 0.48 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 140, "epoch": 1.12, "timestamp": "2026-01-11T22:54:58.184280", "performance": { "avg_time_per_step_s": 0.6476, "steps_per_second": 1.54 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 160, "epoch": 1.28, "timestamp": "2026-01-11T22:55:39.483535", "performance": { "avg_time_per_step_s": 2.065, "steps_per_second": 0.48 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 180, "epoch": 1.44, "timestamp": "2026-01-11T22:55:52.477701", "performance": { "avg_time_per_step_s": 0.6497, "steps_per_second": 1.54 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 200, "epoch": 1.6, "timestamp": "2026-01-11T22:56:05.405432", "performance": { "avg_time_per_step_s": 0.6464, "steps_per_second": 1.55 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 220, "epoch": 1.76, "timestamp": "2026-01-11T22:56:45.924172", "performance": { "avg_time_per_step_s": 2.0259, "steps_per_second": 0.49 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } }, { "step": 240, "epoch": 1.92, "timestamp": "2026-01-11T22:56:58.867380", "performance": { "avg_time_per_step_s": 0.6472, "steps_per_second": 1.55 }, "memory": { "allocated_gb": 13.686748027801514, "reserved_gb": 45.818359375, "peak_allocated_gb": 35.05815267562866 } } ] }