class GraphModule(torch.nn.Module): def forward(self, L_w_: "f32[4096, 8192][8192, 1]cuda:0", L_scale_: "f32[4096][1]cuda:0", L_zero_point_: "i32[4096][1]cuda:0"): l_w_ = L_w_ l_scale_ = L_scale_ l_zero_point_ = L_zero_point_ # File: /shared_volume/repos/quark/bench_qdq.py:8 in run_scaled_fake_quantize, code: return scaled_fake_quantize(quant_dtype, w, scale, zero_point, axis, group_size, quant_min, quant_max, round_mode, qscheme, mx_element_dtype) function_ctx = torch.autograd.function.FunctionCtx(); function_ctx = None # File: /shared_volume/repos/quark/quark/torch/kernel/__init__.py:168 in forward, code: return ops.quark.scaled_fake_quantize(quant_dtype, inputs, scale, zero_point, axis, group_size, quant_min, scaled_fake_quantize: "f32[4096, 8192][8192, 1]cuda:0" = torch.ops.quark.scaled_fake_quantize('int4', l_w_, l_scale_, l_zero_point_, 0, 0, 0, 15, 0, 'per_channel', 'haha'); l_w_ = l_scale_ = l_zero_point_ = None return (scaled_fake_quantize,)