import torch.quantization as tq def apply_int8(model): model.qconfig = tq.get_default_qconfig("fbgemm") tq.prepare(model, inplace=True) # calibration pass must already be done tq.convert(model, inplace=True) return model