| { | |
| "batcher": null, | |
| "cacher": "fora", | |
| "compiler": "torch_compile", | |
| "factorizer": null, | |
| "pruner": null, | |
| "quantizer": "torchao", | |
| "fora_interval": 3, | |
| "fora_start_step": 2, | |
| "torch_compile_backend": "inductor", | |
| "torch_compile_dynamic": null, | |
| "torch_compile_fullgraph": true, | |
| "torch_compile_make_portable": false, | |
| "torch_compile_max_kv_cache_size": 400, | |
| "torch_compile_mode": "max-autotune-no-cudagraphs", | |
| "torch_compile_seqlen_manual_cuda_graph": 100, | |
| "torch_compile_target": "model", | |
| "torchao_excluded_modules": "norm+embedding", | |
| "torchao_quant_type": "int8dq", | |
| "batch_size": 1, | |
| "device": "cpu", | |
| "device_map": null, | |
| "save_fns": [ | |
| "save_before_apply", | |
| "save_before_apply" | |
| ], | |
| "load_fns": [ | |
| "diffusers" | |
| ], | |
| "reapply_after_load": { | |
| "factorizer": null, | |
| "pruner": null, | |
| "quantizer": "torchao", | |
| "cacher": "fora", | |
| "compiler": "torch_compile", | |
| "batcher": null | |
| } | |
| } |