File size: 6,052 Bytes
ecadbd9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | {
"metadata": {
"run_name": "Experiment_BatchSize_8",
"timestamp": "2026-01-11 22:51:39",
"python_version": "3.11.14",
"pytorch_version": "2.9.0+cu128",
"gpu_info": {
"name": "NVIDIA H200",
"count": 1,
"capability": [
9,
0
]
},
"configuration": {
"batch_size_per_device": 8,
"learning_rate": 0.0005,
"max_steps": -1,
"num_train_epochs": 2.0,
"fp16": false,
"bf16": false,
"optim": "adamw_torch"
}
},
"metrics": [
{
"step": 20,
"epoch": 0.16,
"timestamp": "2026-01-11T22:52:18.211257",
"performance": {
"avg_time_per_step_s": 1.9308,
"steps_per_second": 0.52
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 37.185546875,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 40,
"epoch": 0.32,
"timestamp": "2026-01-11T22:52:32.616231",
"performance": {
"avg_time_per_step_s": 0.7202,
"steps_per_second": 1.39
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 37.185546875,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 60,
"epoch": 0.48,
"timestamp": "2026-01-11T22:53:37.785356",
"performance": {
"avg_time_per_step_s": 3.2585,
"steps_per_second": 0.31
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 80,
"epoch": 0.64,
"timestamp": "2026-01-11T22:53:50.731073",
"performance": {
"avg_time_per_step_s": 0.6473,
"steps_per_second": 1.54
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 100,
"epoch": 0.8,
"timestamp": "2026-01-11T22:54:03.641820",
"performance": {
"avg_time_per_step_s": 0.6455,
"steps_per_second": 1.55
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 120,
"epoch": 0.96,
"timestamp": "2026-01-11T22:54:45.232718",
"performance": {
"avg_time_per_step_s": 2.0795,
"steps_per_second": 0.48
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 140,
"epoch": 1.12,
"timestamp": "2026-01-11T22:54:58.184280",
"performance": {
"avg_time_per_step_s": 0.6476,
"steps_per_second": 1.54
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 160,
"epoch": 1.28,
"timestamp": "2026-01-11T22:55:39.483535",
"performance": {
"avg_time_per_step_s": 2.065,
"steps_per_second": 0.48
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 180,
"epoch": 1.44,
"timestamp": "2026-01-11T22:55:52.477701",
"performance": {
"avg_time_per_step_s": 0.6497,
"steps_per_second": 1.54
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 200,
"epoch": 1.6,
"timestamp": "2026-01-11T22:56:05.405432",
"performance": {
"avg_time_per_step_s": 0.6464,
"steps_per_second": 1.55
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 220,
"epoch": 1.76,
"timestamp": "2026-01-11T22:56:45.924172",
"performance": {
"avg_time_per_step_s": 2.0259,
"steps_per_second": 0.49
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
},
{
"step": 240,
"epoch": 1.92,
"timestamp": "2026-01-11T22:56:58.867380",
"performance": {
"avg_time_per_step_s": 0.6472,
"steps_per_second": 1.55
},
"memory": {
"allocated_gb": 13.686748027801514,
"reserved_gb": 45.818359375,
"peak_allocated_gb": 35.05815267562866
}
}
]
} |