default_stage: default_modifiers: GPTQModifier: targets: [Linear] ignore: [lm_head, mlp.gate, shared_expert_gate] scheme: W4A16 block_size: 128 dampening_frac: 0.1 actorder: static offload_hessians: false