TREE_GUARD_MANAGER: +- RootGuardManager | +- DEFAULT_DEVICE: utils_device.CURRENT_DEVICE == None # _dynamo/output_graph.py:520 in init_ambient_guards | +- GLOBAL_STATE: ___check_global_state() | +- TORCH_FUNCTION_MODE_STACK: ___check_torch_function_mode_stack() | +- GuardManager: source=L['w'], accessed_by=FrameLocalsGuardAccessor(key='w', framelocals_idx=1) | | +- TENSOR_MATCH: check_tensor(L['w'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[4096, 8192], stride=[8192, 1]) | | +- NO_HASATTR: hasattr(L['w'], '_dynamo_dynamic_indices') == False | | +- NO_TENSOR_ALIASING: check_no_aliasing(L['w'], L['scale'], L['zero_point']) | +- GuardManager: source=L['axis'], accessed_by=FrameLocalsGuardAccessor(key='axis', framelocals_idx=4) | | +- EQUALS_MATCH: L['axis'] == 0 | +- GuardManager: source=L['scale'], accessed_by=FrameLocalsGuardAccessor(key='scale', framelocals_idx=2) | | +- TENSOR_MATCH: check_tensor(L['scale'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.float32, device=0, requires_grad=False, size=[4096], stride=[1]) | | +- NO_HASATTR: hasattr(L['scale'], '_dynamo_dynamic_indices') == False | | +- NO_TENSOR_ALIASING | +- GuardManager: source=L['qscheme'], accessed_by=FrameLocalsGuardAccessor(key='qscheme', framelocals_idx=9) | | +- EQUALS_MATCH: L['qscheme'] == 'per_channel' | +- GuardManager: source=L['quant_max'], accessed_by=FrameLocalsGuardAccessor(key='quant_max', framelocals_idx=7) | | +- EQUALS_MATCH: L['quant_max'] == 15 | +- GuardManager: source=L['quant_min'], accessed_by=FrameLocalsGuardAccessor(key='quant_min', framelocals_idx=6) | | +- EQUALS_MATCH: L['quant_min'] == 0 | +- GuardManager: source=L['group_size'], accessed_by=FrameLocalsGuardAccessor(key='group_size', framelocals_idx=5) | | +- EQUALS_MATCH: L['group_size'] == 0 | +- GuardManager: source=L['round_mode'], accessed_by=FrameLocalsGuardAccessor(key='round_mode', framelocals_idx=8) | | +- EQUALS_MATCH: L['round_mode'] == 0 | +- GuardManager: source=L['zero_point'], accessed_by=FrameLocalsGuardAccessor(key='zero_point', framelocals_idx=3) | | +- TENSOR_MATCH: check_tensor(L['zero_point'], Tensor, DispatchKeySet(CUDA, BackendSelect, ADInplaceOrView, AutogradCUDA), torch.int32, device=0, requires_grad=False, size=[4096], stride=[1]) | | +- NO_HASATTR: hasattr(L['zero_point'], '_dynamo_dynamic_indices') == False | | +- NO_TENSOR_ALIASING | +- GuardManager: source=L['quant_dtype'], accessed_by=FrameLocalsGuardAccessor(key='quant_dtype', framelocals_idx=0) | | +- EQUALS_MATCH: L['quant_dtype'] == 'int4' | +- GuardManager: source=L['mx_element_dtype'], accessed_by=FrameLocalsGuardAccessor(key='mx_element_dtype', framelocals_idx=10) | | +- EQUALS_MATCH: L['mx_element_dtype'] == 'haha' | +- GuardManager: source=G, accessed_by=GlobalsGuardAccessor | | +- GuardManager: source=G['scaled_fake_quantize'], accessed_by=DictGetItemGuardAccessor('scaled_fake_quantize') | | | +- ID_MATCH: ___check_obj_id(G['scaled_fake_quantize'], 140615015598720) | | +- GuardManager: source=G['__import_quark_dot_torch_dot_kernel'], accessed_by=DictGetItemGuardAccessor('__import_quark_dot_torch_dot_kernel') | | | +- ID_MATCH: ___check_obj_id(G['__import_quark_dot_torch_dot_kernel'], 140649485511360) | | | +- GuardManager: source=G['__import_quark_dot_torch_dot_kernel'].ops, accessed_by=GetAttrGuardAccessor(ops) | | | | +- ID_MATCH: ___check_obj_id(G['__import_quark_dot_torch_dot_kernel'].ops, 140649515082000) | | | | +- GuardManager: source=G['__import_quark_dot_torch_dot_kernel'].ops.quark, accessed_by=GetAttrGuardAccessor(quark) | | | | | +- ID_MATCH: ___check_obj_id(G['__import_quark_dot_torch_dot_kernel'].ops.quark, 140649455882608) | | | | | +- GuardManager: source=G['__import_quark_dot_torch_dot_kernel'].ops.quark.scaled_fake_quantize, accessed_by=GetAttrGuardAccessor(scaled_fake_quantize) | | | | | | +- ID_MATCH: ___check_obj_id(G['__import_quark_dot_torch_dot_kernel'].ops.quark.scaled_fake_quantize, 140614946665776) Guard latency = 0.84 us