WaveCut
/

ERNIE-Image-Turbo-SDNQ-uint4-static

ErnieImagePipeline

quantized-matmul

Model card Files Files and versions

ERNIE-Image-Turbo-SDNQ-uint4-static / runtime_config.json

WaveCut's picture

Document corrected ERNIE qmm runtime profile

b292728 verified 3 days ago

history blame contribute delete

997 Bytes

	{
	"runtime": {
	"recommended_torch_cuda_alloc_conf": null,
	"avoid_torch_cuda_alloc_conf": [
	"expandable_segments:True,max_split_size_mb:32"
	],
	"keep_model_resident": true,
	"avoid_empty_cache_between_generations": true,
	"use_pe_for_image_benchmarks": false
	},
	"sdnq": {
	"requires_explicit_apply_quantized_matmul": true,
	"apply_quantized_matmul_components": [
	"pe",
	"text_encoder",
	"transformer"
	],
	"apply_quantized_matmul_function": "sdnq.loader.apply_sdnq_options_to_model(component, use_quantized_matmul=True)"
	},
	"validated": {
	"device": "NVIDIA RTX 6000 Ada Generation",
	"torch": "2.8.0+cu128",
	"sdnq": "0.1.9",
	"num_inference_steps": 8,
	"guidance_scale": 1.0,
	"use_pe": false
	},
	"metrics": {
	"explicit_quantized_matmul_default_allocator": "metrics/ernie_uint4_qmm_explicit_default_allocator_8step_metrics.json",
	"allocator_debug": "metrics/runtime_allocator_debug_metrics.json"
	}
	}