| { | |
| "batcher": null, | |
| "cacher": null, | |
| "compiler": "torch_compile", | |
| "factorizer": null, | |
| "kernel": null, | |
| "pruner": null, | |
| "quantizer": "hqq", | |
| "hqq_backend": "torchao_int4", | |
| "hqq_compute_dtype": "torch.bfloat16", | |
| "hqq_force_hf_implementation": false, | |
| "hqq_group_size": 128, | |
| "hqq_use_torchao_kernels": true, | |
| "hqq_weight_bits": 4, | |
| "torch_compile_backend": "cudagraphs", | |
| "torch_compile_dynamic": true, | |
| "torch_compile_fullgraph": false, | |
| "torch_compile_make_portable": true, | |
| "torch_compile_max_kv_cache_size": 1600, | |
| "torch_compile_mode": "reduce-overhead", | |
| "torch_compile_seqlen_manual_cuda_graph": 800, | |
| "torch_compile_target": "model", | |
| "batch_size": 1, | |
| "device": "cuda:0", | |
| "device_map": null, | |
| "save_fns": [ | |
| "hqq", | |
| "save_before_apply" | |
| ], | |
| "load_fns": [ | |
| "torch_artifacts", | |
| "hqq" | |
| ], | |
| "reapply_after_load": { | |
| "factorizer": null, | |
| "pruner": null, | |
| "quantizer": null, | |
| "kernel": null, | |
| "cacher": null, | |
| "compiler": "torch_compile", | |
| "batcher": null | |
| } | |
| } |