Scalable_monarch_adapter / training_metrics_bs8.json
nvan13's picture
Upload folder using huggingface_hub
ecadbd9 verified
{
"metadata": {
"run_name": "Experiment_BatchSize_8",
"timestamp": "2026-01-11 22:51:39",
"python_version": "3.11.14",
"pytorch_version": "2.9.0+cu128",
"gpu_info": {
"name": "NVIDIA H200",
"count": 1,
"capability": [
9,
0
]
},
"configuration": {
"batch_size_per_device": 8,
"learning_rate": 0.0005,
"max_steps": -1,
"num_train_epochs": 2.0,
"fp16": false,
"bf16": false,
"optim": "adamw_torch"
}
},
"metrics": [
{
"step": 20,
"epoch": 0.16,
"timestamp": "2026-01-11T22:52:18.211257",
"performance": {
"avg_time_per_step_s": 1.9308,
"steps_per_second": 0.52
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 37.185546875,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 40,
"epoch": 0.32,
"timestamp": "2026-01-11T22:52:32.616231",
"performance": {
"avg_time_per_step_s": 0.7202,
"steps_per_second": 1.39
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 37.185546875,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 60,
"epoch": 0.48,
"timestamp": "2026-01-11T22:53:37.785356",
"performance": {
"avg_time_per_step_s": 3.2585,
"steps_per_second": 0.31
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 80,
"epoch": 0.64,
"timestamp": "2026-01-11T22:53:50.731073",
"performance": {
"avg_time_per_step_s": 0.6473,
"steps_per_second": 1.54
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 100,
"epoch": 0.8,
"timestamp": "2026-01-11T22:54:03.641820",
"performance": {
"avg_time_per_step_s": 0.6455,
"steps_per_second": 1.55
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 120,
"epoch": 0.96,
"timestamp": "2026-01-11T22:54:45.232718",
"performance": {
"avg_time_per_step_s": 2.0795,
"steps_per_second": 0.48
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 140,
"epoch": 1.12,
"timestamp": "2026-01-11T22:54:58.184280",
"performance": {
"avg_time_per_step_s": 0.6476,
"steps_per_second": 1.54
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 160,
"epoch": 1.28,
"timestamp": "2026-01-11T22:55:39.483535",
"performance": {
"avg_time_per_step_s": 2.065,
"steps_per_second": 0.48
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 180,
"epoch": 1.44,
"timestamp": "2026-01-11T22:55:52.477701",
"performance": {
"avg_time_per_step_s": 0.6497,
"steps_per_second": 1.54
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 200,
"epoch": 1.6,
"timestamp": "2026-01-11T22:56:05.405432",
"performance": {
"avg_time_per_step_s": 0.6464,
"steps_per_second": 1.55
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 220,
"epoch": 1.76,
"timestamp": "2026-01-11T22:56:45.924172",
"performance": {
"avg_time_per_step_s": 2.0259,
"steps_per_second": 0.49
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 240,
"epoch": 1.92,
"timestamp": "2026-01-11T22:56:58.867380",
"performance": {
"avg_time_per_step_s": 0.6472,
"steps_per_second": 1.55
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
}
]
}