WaveCut
/

Lens-Turbo-SDNQ-uint4-static

@@ -111,6 +111,29 @@ Hardware: RunPod NVIDIA H100 80GB HBM3, PyTorch 2.8.0 CUDA 12.8 container, local
 Transformer-only footprint is computed from safetensors tensor storage for the denoising transformer parameter tensors only; it excludes allocator overhead and non-transformer components. The original transformer tensors are F32; the corrected SDNQ transformer stores quantized tensors as U8 plus the excluded modulation layers as BF16.
 ## 10-Prompt Matrix
 | ID | Scenario | Seed | Original time, s | Quant time, s | Delta | Original peak allocated VRAM, GB | Quant peak allocated VRAM, GB |

 Transformer-only footprint is computed from safetensors tensor storage for the denoising transformer parameter tensors only; it excludes allocator overhead and non-transformer components. The original transformer tensors are F32; the corrected SDNQ transformer stores quantized tensors as U8 plus the excluded modulation layers as BF16.
+### Model CPU Offload Benchmark
+Same hardware and 10 prompts, using `pipe.enable_model_cpu_offload()`. The reported load time uses a warm local Hugging Face cache on the container disk, so model download time is excluded. Each model was measured in a fresh Python process. `Cold generation` is P01, the first generation immediately after load/offload setup; `warm generation` aggregates P02-P10.
+| Metric | Original Lens-Turbo | SDNQ uint4 static fixed |
+| --- | ---: | ---: |
+| Offload setup/load time, seconds | 15.411 | 12.371 |
+| Offload setup peak allocated VRAM, GB | 12.582 | 12.582 |
+| Offload setup peak reserved VRAM, GB | 13.881 | 13.881 |
+| Cold generation time, seconds | 8.434 | 8.440 |
+| Cold generation peak allocated VRAM, GB | 18.945 | 15.085 |
+| Cold generation peak reserved VRAM, GB | 19.262 | 15.238 |
+| Warm generation average time, seconds | 5.731 | 4.976 |
+| Warm generation median time, seconds | 5.141 | 3.855 |
+| Warm generation average peak allocated VRAM, GB | 18.945 | 15.084 |
+| Warm generation average peak reserved VRAM, GB | 19.267 | 15.249 |
+| Warm generation max peak allocated VRAM, GB | 18.968 | 15.104 |
+| Warm generation max peak reserved VRAM, GB | 19.290 | 15.280 |
+Raw offload benchmark data: [`model_cpu_offload_benchmark.json`](model_cpu_offload_benchmark.json).
+In `model_cpu_offload` mode the setup/load VRAM peak is dominated by non-transformer components, so the load peak is effectively unchanged. During generation, where the denoising transformer is active, the SDNQ variant saves about 3.861 GB peak allocated VRAM on the warm prompts, a 20.4% reduction versus the original model.
 ## 10-Prompt Matrix
 | ID | Scenario | Seed | Original time, s | Quant time, s | Delta | Original peak allocated VRAM, GB | Quant peak allocated VRAM, GB |

model_cpu_offload_benchmark.json ADDED Viewed

	@@ -0,0 +1,283 @@

+{
+  "benchmark": "model_cpu_offload_cold_warm",
+  "hardware": "RunPod NVIDIA H100 80GB HBM3 (H100 SXM)",
+  "mode": "Diffusers enable_model_cpu_offload()",
+  "cache_state": "warm local HF cache; no download time included",
+  "process_isolation": "single model per fresh Python process",
+  "base_resolution": 1024,
+  "aspect_ratio": "1:1",
+  "num_inference_steps": 4,
+  "guidance_scale": 1.0,
+  "dtype": "torch.bfloat16",
+  "definitions": {
+    "load_time_s": "Pipeline load plus enable_model_cpu_offload setup from warm local HF cache; download time excluded.",
+    "cold_generation": "P01, first generation immediately after fresh process load/offload setup.",
+    "warm_generation": "P02-P10 after the cold P01 generation."
+  },
+  "models": {
+    "base": {
+      "hardware": "RunPod NVIDIA H100 80GB HBM3 (H100 SXM)",
+      "mode": "Diffusers enable_model_cpu_offload()",
+      "cache_state": "warm local HF cache; no download time included",
+      "process_isolation": "single model per fresh Python process",
+      "base_resolution": 1024,
+      "aspect_ratio": "1:1",
+      "num_inference_steps": 4,
+      "guidance_scale": 1.0,
+      "dtype": "torch.bfloat16",
+      "kind": "base",
+      "load": {
+        "load_time_s": 15.411,
+        "peak_allocated_gb": 12.582,
+        "peak_reserved_gb": 13.881,
+        "end_allocated_gb": 10.185,
+        "end_reserved_gb": 10.679
+      },
+      "summary": {
+        "cold_time_s": 8.434,
+        "cold_peak_allocated_gb": 18.945,
+        "cold_peak_reserved_gb": 19.262,
+        "warm_avg_time_s": 5.731,
+        "warm_median_time_s": 5.141,
+        "warm_avg_peak_allocated_gb": 18.945,
+        "warm_avg_peak_reserved_gb": 19.267,
+        "warm_max_peak_allocated_gb": 18.968,
+        "warm_max_peak_reserved_gb": 19.29
+      },
+      "prompts": [
+        {
+          "id": "P01",
+          "title": "Orbital Night Market",
+          "seed": 101,
+          "time_s": 8.434,
+          "peak_allocated_gb": 18.945,
+          "peak_reserved_gb": 19.262,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.895
+        },
+        {
+          "id": "P02",
+          "title": "Arctic Research Desk",
+          "seed": 102,
+          "time_s": 5.613,
+          "peak_allocated_gb": 18.949,
+          "peak_reserved_gb": 19.258,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.893
+        },
+        {
+          "id": "P03",
+          "title": "Victorian Automaton Repair",
+          "seed": 103,
+          "time_s": 4.006,
+          "peak_allocated_gb": 18.944,
+          "peak_reserved_gb": 19.271,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.792
+        },
+        {
+          "id": "P04",
+          "title": "Mars Greenhouse Control Room",
+          "seed": 104,
+          "time_s": 7.509,
+          "peak_allocated_gb": 18.938,
+          "peak_reserved_gb": 19.258,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.872
+        },
+        {
+          "id": "P05",
+          "title": "Lost Railway Poster Wall",
+          "seed": 105,
+          "time_s": 4.658,
+          "peak_allocated_gb": 18.939,
+          "peak_reserved_gb": 19.258,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.872
+        },
+        {
+          "id": "P06",
+          "title": "Miniature Courtroom Diorama",
+          "seed": 106,
+          "time_s": 5.141,
+          "peak_allocated_gb": 18.942,
+          "peak_reserved_gb": 19.271,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.792
+        },
+        {
+          "id": "P07",
+          "title": "Rainy Seoul Book Cafe",
+          "seed": 107,
+          "time_s": 5.134,
+          "peak_allocated_gb": 18.942,
+          "peak_reserved_gb": 19.271,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.792
+        },
+        {
+          "id": "P08",
+          "title": "Oceanographic Expedition Map",
+          "seed": 108,
+          "time_s": 4.657,
+          "peak_allocated_gb": 18.942,
+          "peak_reserved_gb": 19.271,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.792
+        },
+        {
+          "id": "P09",
+          "title": "Renaissance Lab Notebook",
+          "seed": 109,
+          "time_s": 5.944,
+          "peak_allocated_gb": 18.939,
+          "peak_reserved_gb": 19.258,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.872
+        },
+        {
+          "id": "P10",
+          "title": "Russian Provincial Print Shop",
+          "seed": 110,
+          "time_s": 8.916,
+          "peak_allocated_gb": 18.968,
+          "peak_reserved_gb": 19.29,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.798
+        }
+      ]
+    },
+    "quant": {
+      "hardware": "RunPod NVIDIA H100 80GB HBM3 (H100 SXM)",
+      "mode": "Diffusers enable_model_cpu_offload()",
+      "cache_state": "warm local HF cache; no download time included",
+      "process_isolation": "single model per fresh Python process",
+      "base_resolution": 1024,
+      "aspect_ratio": "1:1",
+      "num_inference_steps": 4,
+      "guidance_scale": 1.0,
+      "dtype": "torch.bfloat16",
+      "kind": "quant",
+      "load": {
+        "load_time_s": 12.371,
+        "peak_allocated_gb": 12.582,
+        "peak_reserved_gb": 13.881,
+        "end_allocated_gb": 10.185,
+        "end_reserved_gb": 10.679
+      },
+      "summary": {
+        "cold_time_s": 8.44,
+        "cold_peak_allocated_gb": 15.085,
+        "cold_peak_reserved_gb": 15.238,
+        "warm_avg_time_s": 4.976,
+        "warm_median_time_s": 3.855,
+        "warm_avg_peak_allocated_gb": 15.084,
+        "warm_avg_peak_reserved_gb": 15.249,
+        "warm_max_peak_allocated_gb": 15.104,
+        "warm_max_peak_reserved_gb": 15.28
+      },
+      "prompts": [
+        {
+          "id": "P01",
+          "title": "Orbital Night Market",
+          "seed": 101,
+          "time_s": 8.44,
+          "peak_allocated_gb": 15.085,
+          "peak_reserved_gb": 15.238,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.872
+        },
+        {
+          "id": "P02",
+          "title": "Arctic Research Desk",
+          "seed": 102,
+          "time_s": 6.726,
+          "peak_allocated_gb": 15.089,
+          "peak_reserved_gb": 15.261,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.878
+        },
+        {
+          "id": "P03",
+          "title": "Victorian Automaton Repair",
+          "seed": 103,
+          "time_s": 8.244,
+          "peak_allocated_gb": 15.081,
+          "peak_reserved_gb": 15.246,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.775
+        },
+        {
+          "id": "P04",
+          "title": "Mars Greenhouse Control Room",
+          "seed": 104,
+          "time_s": 4.033,
+          "peak_allocated_gb": 15.079,
+          "peak_reserved_gb": 15.238,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.874
+        },
+        {
+          "id": "P05",
+          "title": "Lost Railway Poster Wall",
+          "seed": 105,
+          "time_s": 3.836,
+          "peak_allocated_gb": 15.08,
+          "peak_reserved_gb": 15.238,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.876
+        },
+        {
+          "id": "P06",
+          "title": "Miniature Courtroom Diorama",
+          "seed": 106,
+          "time_s": 3.845,
+          "peak_allocated_gb": 15.08,
+          "peak_reserved_gb": 15.246,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.775
+        },
+        {
+          "id": "P07",
+          "title": "Rainy Seoul Book Cafe",
+          "seed": 107,
+          "time_s": 3.855,
+          "peak_allocated_gb": 15.08,
+          "peak_reserved_gb": 15.246,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.773
+        },
+        {
+          "id": "P08",
+          "title": "Oceanographic Expedition Map",
+          "seed": 108,
+          "time_s": 3.841,
+          "peak_allocated_gb": 15.08,
+          "peak_reserved_gb": 15.246,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.773
+        },
+        {
+          "id": "P09",
+          "title": "Renaissance Lab Notebook",
+          "seed": 109,
+          "time_s": 3.852,
+          "peak_allocated_gb": 15.08,
+          "peak_reserved_gb": 15.238,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.876
+        },
+        {
+          "id": "P10",
+          "title": "Russian Provincial Print Shop",
+          "seed": 110,
+          "time_s": 6.556,
+          "peak_allocated_gb": 15.104,
+          "peak_reserved_gb": 15.28,
+          "end_allocated_gb": 10.221,
+          "end_reserved_gb": 10.781
+        }
+      ]
+    }
+  }
+}