components: ar128_cl4096_1_of_3: inputs: input_ids: shape: - 1 - 128 dtype: int32 outputs: /model/model/embed_tokens/Gather_output_0: shape: - 1 - 128 - 3072 dtype: uint16 quantization_parameters: scale: 8.687509536888685e-06 offset: -27540 ar128_cl4096_2_of_3: inputs: /model/model/embed_tokens/Gather_output_0: shape: - 1 - 128 - 3072 dtype: uint16 quantization_parameters: scale: 8.687509536888685e-06 offset: -27540 attention_mask: shape: - 1 - 1 - 128 - 4096 dtype: uint16 quantization_parameters: scale: 0.0007629510948348211 offset: -65535 position_ids_cos: shape: - 1 - 1 - 128 - 64 dtype: uint16 quantization_parameters: scale: 3.0518043793392844e-05 offset: -32768 position_ids_sin: shape: - 1 - 1 - 128 - 64 dtype: uint16 quantization_parameters: scale: 3.0518043793392844e-05 offset: -32768 past_key_0_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.15415094792842865 offset: -128 past_value_0_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.0076334718614816666 offset: -128 past_key_1_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.09501257538795471 offset: -128 past_value_1_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.018255468428604246 offset: -128 past_key_2_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.12992745637893677 offset: -128 past_value_2_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.025279987515426997 offset: -128 past_key_3_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.1447511507770208 offset: -128 past_value_3_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.028148770332336426 offset: -128 past_key_4_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.12199506233996293 offset: -128 past_value_4_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.02944980831596795 offset: -128 past_key_5_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.12595602869987488 offset: -128 past_value_5_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.028479063883423805 offset: -128 past_key_6_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.12635421752929688 offset: -128 past_value_6_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.025955546647310257 offset: -128 past_key_7_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.13450774550437927 offset: -128 past_value_7_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.024671796738632082 offset: -128 past_key_8_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.1284546439103254 offset: -128 past_value_8_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.030749136582016945 offset: -128 past_key_9_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.16326546669006348 offset: -128 past_value_9_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.03575011125699742 offset: -128 past_key_10_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.1229240819811821 offset: -128 past_value_10_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.032141510397195816 offset: -128 past_key_11_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.13410881161689758 offset: -128 past_value_11_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.02836606258482445 offset: -128 past_key_12_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.11923930794000626 offset: -128 past_value_12_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.03199498108991488 offset: -128 past_key_13_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.14966769481268455 offset: -128 past_value_13_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.028661564579160196 offset: -128 outputs: /model/model/layers.13/Add_1_output_0: shape: - 1 - 128 - 3072 dtype: uint16 quantization_parameters: scale: 0.011241219636185578 offset: -31773 past_key_0_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.15415094792842865 offset: -128 past_value_0_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.0076334718614816666 offset: -128 past_key_1_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.09501257538795471 offset: -128 past_value_1_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.018255468428604246 offset: -128 past_key_2_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.12992745637893677 offset: -128 past_value_2_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.025279987515426997 offset: -128 past_key_3_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.1447511507770208 offset: -128 past_value_3_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.028148770332336426 offset: -128 past_key_4_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.12199506233996293 offset: -128 past_value_4_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.02944980831596795 offset: -128 past_key_5_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.12595602869987488 offset: -128 past_value_5_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.028479063883423805 offset: -128 past_key_6_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.12635421752929688 offset: -128 past_value_6_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.025955546647310257 offset: -128 past_key_7_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.13450774550437927 offset: -128 past_value_7_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.024671796738632082 offset: -128 past_key_8_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.1284546439103254 offset: -128 past_value_8_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.030749136582016945 offset: -128 past_key_9_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.16326546669006348 offset: -128 past_value_9_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.03575011125699742 offset: -128 past_key_10_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.1229240819811821 offset: -128 past_value_10_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.032141510397195816 offset: -128 past_key_11_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.13410881161689758 offset: -128 past_value_11_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.02836606258482445 offset: -128 past_key_12_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.11923930794000626 offset: -128 past_value_12_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.03199498108991488 offset: -128 past_key_13_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.14966769481268455 offset: -128 past_value_13_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.028661564579160196 offset: -128 ar128_cl4096_3_of_3: inputs: /model/model/layers.13/Add_1_output_0: shape: - 1 - 128 - 3072 dtype: uint16 quantization_parameters: scale: 0.011241219636185578 offset: -31773 attention_mask: shape: - 1 - 1 - 128 - 4096 dtype: uint16 quantization_parameters: scale: 0.0007629510948348211 offset: -65535 position_ids_cos: shape: - 1 - 1 - 128 - 64 dtype: uint16 quantization_parameters: scale: 3.0518043793392844e-05 offset: -32768 position_ids_sin: shape: - 1 - 1 - 128 - 64 dtype: uint16 quantization_parameters: scale: 3.0518043793392844e-05 offset: -32768 past_key_14_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.13996938269908035 offset: -128 past_value_14_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.028563514469176765 offset: -128 past_key_15_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.125163213474544 offset: -128 past_value_15_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.027315943259892502 offset: -128 past_key_16_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.12602750522883857 offset: -128 past_value_16_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.033199068158864975 offset: -128 past_key_17_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.13656628131866455 offset: -128 past_value_17_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.030598552208247146 offset: -128 past_key_18_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.11889628913458877 offset: -128 past_value_18_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.032141249626874924 offset: -128 past_key_19_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.13976185023784637 offset: -128 past_value_19_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.032557712765190545 offset: -128 past_key_20_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.11934278142733837 offset: -128 past_value_20_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.027935825288295746 offset: -128 past_key_21_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.13167049377922 offset: -128 past_value_21_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.04469357803463936 offset: -128 past_key_22_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.13328912689929873 offset: -128 past_value_22_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.03745593875646591 offset: -128 past_key_23_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.12827403151144193 offset: -128 past_value_23_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.032266516238451004 offset: -128 past_key_24_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.14478352665901184 offset: -128 past_value_24_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.04626827540360098 offset: -128 past_key_25_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.17984355716254768 offset: -128 past_value_25_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.04539092882411686 offset: -128 past_key_26_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.1248558149563046 offset: -128 past_value_26_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.05314682051539421 offset: -128 past_key_27_in: shape: - 8 - 1 - 128 - 3968 dtype: uint8 quantization_parameters: scale: 0.13326138257980347 offset: -128 past_value_27_in: shape: - 8 - 1 - 3968 - 128 dtype: uint8 quantization_parameters: scale: 0.04699636995792389 offset: -128 outputs: logits: shape: - 1 - 128 - 128256 dtype: uint16 quantization_parameters: scale: 0.0006455201492198974 offset: -24293 past_key_14_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.13996938269908035 offset: -128 past_value_14_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.028563514469176765 offset: -128 past_key_15_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.125163213474544 offset: -128 past_value_15_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.027315943259892502 offset: -128 past_key_16_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.12602750522883857 offset: -128 past_value_16_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.033199068158864975 offset: -128 past_key_17_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.13656628131866455 offset: -128 past_value_17_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.030598552208247146 offset: -128 past_key_18_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.11889628913458877 offset: -128 past_value_18_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.032141249626874924 offset: -128 past_key_19_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.13976185023784637 offset: -128 past_value_19_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.032557712765190545 offset: -128 past_key_20_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.11934278142733837 offset: -128 past_value_20_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.027935825288295746 offset: -128 past_key_21_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.13167049377922 offset: -128 past_value_21_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.04469357803463936 offset: -128 past_key_22_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.13328912689929873 offset: -128 past_value_22_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.03745593875646591 offset: -128 past_key_23_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.12827403151144193 offset: -128 past_value_23_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.032266516238451004 offset: -128 past_key_24_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.14478352665901184 offset: -128 past_value_24_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.04626827540360098 offset: -128 past_key_25_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.17984355716254768 offset: -128 past_value_25_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.04539092882411686 offset: -128 past_key_26_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.1248558149563046 offset: -128 past_value_26_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.05314682051539421 offset: -128 past_key_27_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.13326138257980347 offset: -128 past_value_27_out: shape: - 8 - 1 - 128 - 128 dtype: uint8 quantization_parameters: scale: 0.04699636995792389 offset: -128 ar1_cl4096_1_of_3: inputs: input_ids: shape: - 1 - 1 dtype: int32 outputs: /model/model/embed_tokens/Gather_output_0: shape: - 1 - 1 - 3072 dtype: uint16 quantization_parameters: scale: 8.687509536888685e-06 offset: -27540 ar1_cl4096_2_of_3: inputs: /model/model/embed_tokens/Gather_output_0: shape: - 1 - 1 - 3072 dtype: uint16 quantization_parameters: scale: 8.687509536888685e-06 offset: -27540 attention_mask: shape: - 1 - 1 - 1 - 4096 dtype: uint16 quantization_parameters: scale: 0.0007629510948348211 offset: -65535 position_ids_cos: shape: - 1 - 1 - 1 - 64 dtype: uint16 quantization_parameters: scale: 3.0518043793392844e-05 offset: -32768 position_ids_sin: shape: - 1 - 1 - 1 - 64 dtype: uint16 quantization_parameters: scale: 3.0518043793392844e-05 offset: -32768 past_key_0_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.15415094792842865 offset: -128 past_value_0_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.0076334718614816666 offset: -128 past_key_1_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.09501257538795471 offset: -128 past_value_1_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.018255468428604246 offset: -128 past_key_2_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.12992745637893677 offset: -128 past_value_2_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.025279987515426997 offset: -128 past_key_3_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.1447511507770208 offset: -128 past_value_3_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.028148770332336426 offset: -128 past_key_4_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.12199506233996293 offset: -128 past_value_4_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.02944980831596795 offset: -128 past_key_5_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.12595602869987488 offset: -128 past_value_5_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.028479063883423805 offset: -128 past_key_6_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.12635421752929688 offset: -128 past_value_6_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.025955546647310257 offset: -128 past_key_7_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.13450774550437927 offset: -128 past_value_7_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.024671796738632082 offset: -128 past_key_8_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.1284546439103254 offset: -128 past_value_8_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.030749136582016945 offset: -128 past_key_9_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.16326546669006348 offset: -128 past_value_9_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.03575011125699742 offset: -128 past_key_10_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.1229240819811821 offset: -128 past_value_10_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.032141510397195816 offset: -128 past_key_11_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.13410881161689758 offset: -128 past_value_11_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.02836606258482445 offset: -128 past_key_12_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.11923930794000626 offset: -128 past_value_12_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.03199498108991488 offset: -128 past_key_13_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.14966769481268455 offset: -128 past_value_13_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.028661564579160196 offset: -128 outputs: /model/model/layers.13/Add_1_output_0: shape: - 1 - 1 - 3072 dtype: uint16 quantization_parameters: scale: 0.011241219636185578 offset: -31773 past_key_0_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.15415094792842865 offset: -128 past_value_0_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.0076334718614816666 offset: -128 past_key_1_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.09501257538795471 offset: -128 past_value_1_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.018255468428604246 offset: -128 past_key_2_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.12992745637893677 offset: -128 past_value_2_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.025279987515426997 offset: -128 past_key_3_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.1447511507770208 offset: -128 past_value_3_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.028148770332336426 offset: -128 past_key_4_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.12199506233996293 offset: -128 past_value_4_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.02944980831596795 offset: -128 past_key_5_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.12595602869987488 offset: -128 past_value_5_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.028479063883423805 offset: -128 past_key_6_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.12635421752929688 offset: -128 past_value_6_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.025955546647310257 offset: -128 past_key_7_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.13450774550437927 offset: -128 past_value_7_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.024671796738632082 offset: -128 past_key_8_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.1284546439103254 offset: -128 past_value_8_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.030749136582016945 offset: -128 past_key_9_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.16326546669006348 offset: -128 past_value_9_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.03575011125699742 offset: -128 past_key_10_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.1229240819811821 offset: -128 past_value_10_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.032141510397195816 offset: -128 past_key_11_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.13410881161689758 offset: -128 past_value_11_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.02836606258482445 offset: -128 past_key_12_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.11923930794000626 offset: -128 past_value_12_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.03199498108991488 offset: -128 past_key_13_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.14966769481268455 offset: -128 past_value_13_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.028661564579160196 offset: -128 ar1_cl4096_3_of_3: inputs: /model/model/layers.13/Add_1_output_0: shape: - 1 - 1 - 3072 dtype: uint16 quantization_parameters: scale: 0.011241219636185578 offset: -31773 attention_mask: shape: - 1 - 1 - 1 - 4096 dtype: uint16 quantization_parameters: scale: 0.0007629510948348211 offset: -65535 position_ids_cos: shape: - 1 - 1 - 1 - 64 dtype: uint16 quantization_parameters: scale: 3.0518043793392844e-05 offset: -32768 position_ids_sin: shape: - 1 - 1 - 1 - 64 dtype: uint16 quantization_parameters: scale: 3.0518043793392844e-05 offset: -32768 past_key_14_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.13996938269908035 offset: -128 past_value_14_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.028563514469176765 offset: -128 past_key_15_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.125163213474544 offset: -128 past_value_15_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.027315943259892502 offset: -128 past_key_16_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.12602750522883857 offset: -128 past_value_16_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.033199068158864975 offset: -128 past_key_17_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.13656628131866455 offset: -128 past_value_17_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.030598552208247146 offset: -128 past_key_18_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.11889628913458877 offset: -128 past_value_18_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.032141249626874924 offset: -128 past_key_19_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.13976185023784637 offset: -128 past_value_19_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.032557712765190545 offset: -128 past_key_20_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.11934278142733837 offset: -128 past_value_20_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.027935825288295746 offset: -128 past_key_21_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.13167049377922 offset: -128 past_value_21_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.04469357803463936 offset: -128 past_key_22_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.13328912689929873 offset: -128 past_value_22_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.03745593875646591 offset: -128 past_key_23_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.12827403151144193 offset: -128 past_value_23_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.032266516238451004 offset: -128 past_key_24_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.14478352665901184 offset: -128 past_value_24_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.04626827540360098 offset: -128 past_key_25_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.17984355716254768 offset: -128 past_value_25_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.04539092882411686 offset: -128 past_key_26_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.1248558149563046 offset: -128 past_value_26_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.05314682051539421 offset: -128 past_key_27_in: shape: - 8 - 1 - 128 - 4095 dtype: uint8 quantization_parameters: scale: 0.13326138257980347 offset: -128 past_value_27_in: shape: - 8 - 1 - 4095 - 128 dtype: uint8 quantization_parameters: scale: 0.04699636995792389 offset: -128 outputs: logits: shape: - 1 - 1 - 128256 dtype: uint16 quantization_parameters: scale: 0.0006455201492198974 offset: -24293 past_key_14_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.13996938269908035 offset: -128 past_value_14_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.028563514469176765 offset: -128 past_key_15_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.125163213474544 offset: -128 past_value_15_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.027315943259892502 offset: -128 past_key_16_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.12602750522883857 offset: -128 past_value_16_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.033199068158864975 offset: -128 past_key_17_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.13656628131866455 offset: -128 past_value_17_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.030598552208247146 offset: -128 past_key_18_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.11889628913458877 offset: -128 past_value_18_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.032141249626874924 offset: -128 past_key_19_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.13976185023784637 offset: -128 past_value_19_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.032557712765190545 offset: -128 past_key_20_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.11934278142733837 offset: -128 past_value_20_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.027935825288295746 offset: -128 past_key_21_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.13167049377922 offset: -128 past_value_21_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.04469357803463936 offset: -128 past_key_22_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.13328912689929873 offset: -128 past_value_22_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.03745593875646591 offset: -128 past_key_23_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.12827403151144193 offset: -128 past_value_23_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.032266516238451004 offset: -128 past_key_24_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.14478352665901184 offset: -128 past_value_24_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.04626827540360098 offset: -128 past_key_25_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.17984355716254768 offset: -128 past_value_25_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.04539092882411686 offset: -128 past_key_26_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.1248558149563046 offset: -128 past_value_26_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.05314682051539421 offset: -128 past_key_27_out: shape: - 8 - 1 - 128 - 1 dtype: uint8 quantization_parameters: scale: 0.13326138257980347 offset: -128 past_value_27_out: shape: - 8 - 1 - 1 - 128 dtype: uint8 quantization_parameters: scale: 0.04699636995792389 offset: -128 precision: w4a16 runtime: genie