ERNIE-Image-Turbo-SDNQ-uint4-static / metrics /runtime_allocator_debug_metrics.json
WaveCut's picture
Document corrected ERNIE qmm runtime profile
b292728 verified
{
"device": "NVIDIA RTX 6000 Ada Generation",
"torch": "2.8.0+cu128",
"cases": [
{
"name": "qmm_all_empty_true",
"enabled_qmm_components": [
"pe",
"text_encoder",
"transformer"
],
"empty_cache": true,
"load": {
"seconds": 59.13287413865328,
"gpu_start_mib": 434,
"gpu_end_mib": 10006,
"gpu_peak_mib": 10006,
"torch_peak_allocated_mib": 9555,
"torch_peak_reserved_mib": 9572,
"qmm_states": {
"pe": {
"requested": true,
"actual": true
},
"text_encoder": {
"requested": true,
"actual": true
},
"transformer": {
"requested": true,
"actual": true
}
}
},
"speed_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 48.675362937152386,
"gpu_start_mib": 10006,
"gpu_end_mib": 10658,
"torch_peak_allocated_mib": 18980,
"torch_peak_reserved_mib": 48118,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "01-long-text-bakery-ad",
"width": 896,
"height": 1200,
"seconds": 25.80502188205719,
"gpu_start_mib": 10094,
"gpu_end_mib": 10676,
"torch_peak_allocated_mib": 19219,
"torch_peak_reserved_mib": 48118,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 25.88253455609083,
"gpu_start_mib": 10092,
"gpu_end_mib": 10696,
"torch_peak_allocated_mib": 19219,
"torch_peak_reserved_mib": 48116,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "03-four-panel-comic",
"width": 1024,
"height": 1024,
"seconds": 26.319617360830307,
"gpu_start_mib": 10092,
"gpu_end_mib": 32080,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48108,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
}
],
"stage_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 16.900417506694794,
"gpu_start_mib": 10092,
"gpu_end_mib": 29120,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48118,
"stages": {
"text_encoder.forward": {
"seconds": 0.080795519053936,
"calls": 1
},
"transformer.forward": {
"seconds": 16.636043712496758,
"calls": 8
},
"vae.decode": {
"seconds": 0.14374303817749023,
"calls": 1
}
},
"stage_seconds_sum": 16.860582269728184,
"unattributed_seconds": 0.039835236966609955,
"empty_cache": true,
"instrument": true
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 17.653532460331917,
"gpu_start_mib": 10094,
"gpu_end_mib": 20096,
"torch_peak_allocated_mib": 12063,
"torch_peak_reserved_mib": 48116,
"stages": {
"text_encoder.forward": {
"seconds": 0.06983215361833572,
"calls": 1
},
"transformer.forward": {
"seconds": 17.393484614789486,
"calls": 8
},
"vae.decode": {
"seconds": 0.14451629668474197,
"calls": 1
}
},
"stage_seconds_sum": 17.607833065092564,
"unattributed_seconds": 0.04569939523935318,
"empty_cache": true,
"instrument": true
}
],
"summary": {
"cold_seconds": 48.675362937152386,
"hot_mean_seconds": 26.00239126632611,
"hot_median_seconds": 25.88253455609083,
"hot_min_seconds": 25.80502188205719,
"hot_max_seconds": 26.319617360830307,
"hot_max_reserved_mib": 48118,
"hot_max_allocated_mib": 19219
}
},
{
"name": "qmm_all_empty_false",
"enabled_qmm_components": [
"pe",
"text_encoder",
"transformer"
],
"empty_cache": false,
"load": {
"seconds": 54.08376982063055,
"gpu_start_mib": 540,
"gpu_end_mib": 10092,
"gpu_peak_mib": 10092,
"torch_peak_allocated_mib": 9563,
"torch_peak_reserved_mib": 9572,
"qmm_states": {
"pe": {
"requested": true,
"actual": true
},
"text_encoder": {
"requested": true,
"actual": true
},
"transformer": {
"requested": true,
"actual": true
}
}
},
"speed_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 17.728631243109703,
"gpu_start_mib": 10092,
"gpu_end_mib": 20200,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48118,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": false,
"instrument": false
},
{
"prompt_id": "01-long-text-bakery-ad",
"width": 896,
"height": 1200,
"seconds": 18.051423519849777,
"gpu_start_mib": 20200,
"gpu_end_mib": 13716,
"torch_peak_allocated_mib": 12064,
"torch_peak_reserved_mib": 48118,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": false,
"instrument": false
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 15.653381533920765,
"gpu_start_mib": 13716,
"gpu_end_mib": 36456,
"torch_peak_allocated_mib": 12063,
"torch_peak_reserved_mib": 48116,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": false,
"instrument": false
},
{
"prompt_id": "03-four-panel-comic",
"width": 1024,
"height": 1024,
"seconds": 15.864265829324722,
"gpu_start_mib": 36456,
"gpu_end_mib": 39720,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48118,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": false,
"instrument": false
}
],
"stage_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 14.801267191767693,
"gpu_start_mib": 39720,
"gpu_end_mib": 37980,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48118,
"stages": {
"text_encoder.forward": {
"seconds": 0.07214060425758362,
"calls": 1
},
"transformer.forward": {
"seconds": 14.541316010057926,
"calls": 8
},
"vae.decode": {
"seconds": 0.14533011615276337,
"calls": 1
}
},
"stage_seconds_sum": 14.758786730468273,
"unattributed_seconds": 0.0424804612994194,
"empty_cache": false,
"instrument": true
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 16.745930925011635,
"gpu_start_mib": 37980,
"gpu_end_mib": 45636,
"torch_peak_allocated_mib": 12063,
"torch_peak_reserved_mib": 48116,
"stages": {
"text_encoder.forward": {
"seconds": 0.07216303050518036,
"calls": 1
},
"transformer.forward": {
"seconds": 16.474046893417835,
"calls": 8
},
"vae.decode": {
"seconds": 0.1505463719367981,
"calls": 1
}
},
"stage_seconds_sum": 16.696756295859814,
"unattributed_seconds": 0.049174629151821136,
"empty_cache": false,
"instrument": true
}
],
"summary": {
"cold_seconds": 17.728631243109703,
"hot_mean_seconds": 16.52302362769842,
"hot_median_seconds": 15.864265829324722,
"hot_min_seconds": 15.653381533920765,
"hot_max_seconds": 18.051423519849777,
"hot_max_reserved_mib": 48118,
"hot_max_allocated_mib": 12064
}
},
{
"name": "qmm_transformer_only_empty_true",
"enabled_qmm_components": [
"transformer"
],
"empty_cache": true,
"load": {
"seconds": 54.33865138143301,
"gpu_start_mib": 540,
"gpu_end_mib": 10092,
"gpu_peak_mib": 10092,
"torch_peak_allocated_mib": 9563,
"torch_peak_reserved_mib": 9572,
"qmm_states": {
"pe": {
"requested": false,
"actual": false
},
"text_encoder": {
"requested": false,
"actual": false
},
"transformer": {
"requested": true,
"actual": true
}
}
},
"speed_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 22.075800754129887,
"gpu_start_mib": 10092,
"gpu_end_mib": 21082,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48098,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "01-long-text-bakery-ad",
"width": 896,
"height": 1200,
"seconds": 20.010109677910805,
"gpu_start_mib": 10096,
"gpu_end_mib": 34960,
"torch_peak_allocated_mib": 12064,
"torch_peak_reserved_mib": 48110,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 16.71645325422287,
"gpu_start_mib": 10094,
"gpu_end_mib": 36458,
"torch_peak_allocated_mib": 12063,
"torch_peak_reserved_mib": 48116,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "03-four-panel-comic",
"width": 1024,
"height": 1024,
"seconds": 17.778217256069183,
"gpu_start_mib": 10094,
"gpu_end_mib": 20264,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48108,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
}
],
"stage_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 16.47866802662611,
"gpu_start_mib": 10094,
"gpu_end_mib": 21082,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48098,
"stages": {
"text_encoder.forward": {
"seconds": 0.05702096223831177,
"calls": 1
},
"transformer.forward": {
"seconds": 16.22751172631979,
"calls": 8
},
"vae.decode": {
"seconds": 0.146262064576149,
"calls": 1
}
},
"stage_seconds_sum": 16.43079475313425,
"unattributed_seconds": 0.047873273491859436,
"empty_cache": true,
"instrument": true
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 18.242334879934788,
"gpu_start_mib": 10096,
"gpu_end_mib": 36458,
"torch_peak_allocated_mib": 12063,
"torch_peak_reserved_mib": 48116,
"stages": {
"text_encoder.forward": {
"seconds": 0.05669167637825012,
"calls": 1
},
"transformer.forward": {
"seconds": 17.962014980614185,
"calls": 8
},
"vae.decode": {
"seconds": 0.147530660033226,
"calls": 1
}
},
"stage_seconds_sum": 18.16623731702566,
"unattributed_seconds": 0.07609756290912628,
"empty_cache": true,
"instrument": true
}
],
"summary": {
"cold_seconds": 22.075800754129887,
"hot_mean_seconds": 18.168260062734287,
"hot_median_seconds": 17.778217256069183,
"hot_min_seconds": 16.71645325422287,
"hot_max_seconds": 20.010109677910805,
"hot_max_reserved_mib": 48116,
"hot_max_allocated_mib": 12064
}
},
{
"name": "qmm_none_empty_true",
"enabled_qmm_components": [],
"empty_cache": true,
"load": {
"seconds": 54.613617569208145,
"gpu_start_mib": 542,
"gpu_end_mib": 10094,
"gpu_peak_mib": 10094,
"torch_peak_allocated_mib": 9563,
"torch_peak_reserved_mib": 9572,
"qmm_states": {
"pe": {
"requested": false,
"actual": false
},
"text_encoder": {
"requested": false,
"actual": false
},
"transformer": {
"requested": false,
"actual": false
}
}
},
"speed_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 21.959903195500374,
"gpu_start_mib": 10094,
"gpu_end_mib": 27822,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48100,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "01-long-text-bakery-ad",
"width": 896,
"height": 1200,
"seconds": 19.555034309625626,
"gpu_start_mib": 10096,
"gpu_end_mib": 19358,
"torch_peak_allocated_mib": 12064,
"torch_peak_reserved_mib": 48098,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 17.357625499367714,
"gpu_start_mib": 10094,
"gpu_end_mib": 23640,
"torch_peak_allocated_mib": 12063,
"torch_peak_reserved_mib": 48106,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
},
{
"prompt_id": "03-four-panel-comic",
"width": 1024,
"height": 1024,
"seconds": 17.224217273294926,
"gpu_start_mib": 10094,
"gpu_end_mib": 25464,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48108,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": true,
"instrument": false
}
],
"stage_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 16.347896233201027,
"gpu_start_mib": 10094,
"gpu_end_mib": 27822,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48100,
"stages": {
"text_encoder.forward": {
"seconds": 0.054269738495349884,
"calls": 1
},
"transformer.forward": {
"seconds": 16.09693694859743,
"calls": 8
},
"vae.decode": {
"seconds": 0.16162345558404922,
"calls": 1
}
},
"stage_seconds_sum": 16.31283014267683,
"unattributed_seconds": 0.035066090524196625,
"empty_cache": true,
"instrument": true
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 17.620373338460922,
"gpu_start_mib": 10096,
"gpu_end_mib": 23640,
"torch_peak_allocated_mib": 12063,
"torch_peak_reserved_mib": 48106,
"stages": {
"text_encoder.forward": {
"seconds": 0.05568608641624451,
"calls": 1
},
"transformer.forward": {
"seconds": 17.351328901946545,
"calls": 8
},
"vae.decode": {
"seconds": 0.16200313717126846,
"calls": 1
}
},
"stage_seconds_sum": 17.569018125534058,
"unattributed_seconds": 0.051355212926864624,
"empty_cache": true,
"instrument": true
}
],
"summary": {
"cold_seconds": 21.959903195500374,
"hot_mean_seconds": 18.04562569409609,
"hot_median_seconds": 17.357625499367714,
"hot_min_seconds": 17.224217273294926,
"hot_max_seconds": 19.555034309625626,
"hot_max_reserved_mib": 48108,
"hot_max_allocated_mib": 12064
}
},
{
"name": "qmm_text_encoder_off_transformer_off_empty_false",
"enabled_qmm_components": [],
"empty_cache": false,
"load": {
"seconds": 52.21603240072727,
"gpu_start_mib": 542,
"gpu_end_mib": 10094,
"gpu_peak_mib": 10094,
"torch_peak_allocated_mib": 9563,
"torch_peak_reserved_mib": 9572,
"qmm_states": {
"pe": {
"requested": false,
"actual": false
},
"text_encoder": {
"requested": false,
"actual": false
},
"transformer": {
"requested": false,
"actual": false
}
}
},
"speed_rows": [
{
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 16.301372595131397,
"gpu_start_mib": 10094,
"gpu_end_mib": 27822,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48100,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": false,
"instrument": false
},
{
"prompt_id": "01-long-text-bakery-ad",
"width": 896,
"height": 1200,
"seconds": 20.227899312973022,
"gpu_start_mib": 27822,
"gpu_end_mib": 19358,
"torch_peak_allocated_mib": 12064,
"torch_peak_reserved_mib": 48116,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": false,
"instrument": false
},
{
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 17.426542527973652,
"gpu_start_mib": 19358,
"gpu_end_mib": 32300,
"torch_peak_allocated_mib": 12063,
"torch_peak_reserved_mib": 48116,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": false,
"instrument": false
},
{
"prompt_id": "03-four-panel-comic",
"width": 1024,
"height": 1024,
"seconds": 13.776540502905846,
"gpu_start_mib": 32300,
"gpu_end_mib": 32362,
"torch_peak_allocated_mib": 12002,
"torch_peak_reserved_mib": 48108,
"stages": {},
"stage_seconds_sum": 0,
"unattributed_seconds": null,
"empty_cache": false,
"instrument": false
}
],
"stage_rows": [],
"summary": {
"cold_seconds": 16.301372595131397,
"hot_mean_seconds": 17.143660781284172,
"hot_median_seconds": 17.426542527973652,
"hot_min_seconds": 13.776540502905846,
"hot_max_seconds": 20.227899312973022,
"hot_max_reserved_mib": 48116,
"hot_max_allocated_mib": 12064
}
}
],
"sdnq": "0.1.9"
}