Hariharasubramanian's picture
Upload Llama-3.2-3B-Instruct model for QCS9075 (HTP backend)
e148d8a verified
components:
ar128_cl4096_1_of_3:
inputs:
input_ids:
shape:
- 1
- 128
dtype: int32
outputs:
/model/model/embed_tokens/Gather_output_0:
shape:
- 1
- 128
- 3072
dtype: uint16
quantization_parameters:
scale: 8.687509536888685e-06
offset: -27540
ar128_cl4096_2_of_3:
inputs:
/model/model/embed_tokens/Gather_output_0:
shape:
- 1
- 128
- 3072
dtype: uint16
quantization_parameters:
scale: 8.687509536888685e-06
offset: -27540
attention_mask:
shape:
- 1
- 1
- 128
- 4096
dtype: uint16
quantization_parameters:
scale: 0.0007629510948348211
offset: -65535
position_ids_cos:
shape:
- 1
- 1
- 128
- 64
dtype: uint16
quantization_parameters:
scale: 3.0518043793392844e-05
offset: -32768
position_ids_sin:
shape:
- 1
- 1
- 128
- 64
dtype: uint16
quantization_parameters:
scale: 3.0518043793392844e-05
offset: -32768
past_key_0_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.15415094792842865
offset: -128
past_value_0_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.0076334718614816666
offset: -128
past_key_1_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.09501257538795471
offset: -128
past_value_1_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.018255468428604246
offset: -128
past_key_2_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.12992745637893677
offset: -128
past_value_2_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.025279987515426997
offset: -128
past_key_3_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.1447511507770208
offset: -128
past_value_3_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.028148770332336426
offset: -128
past_key_4_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.12199506233996293
offset: -128
past_value_4_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.02944980831596795
offset: -128
past_key_5_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.12595602869987488
offset: -128
past_value_5_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.028479063883423805
offset: -128
past_key_6_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.12635421752929688
offset: -128
past_value_6_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.025955546647310257
offset: -128
past_key_7_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.13450774550437927
offset: -128
past_value_7_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.024671796738632082
offset: -128
past_key_8_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.1284546439103254
offset: -128
past_value_8_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.030749136582016945
offset: -128
past_key_9_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.16326546669006348
offset: -128
past_value_9_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.03575011125699742
offset: -128
past_key_10_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.1229240819811821
offset: -128
past_value_10_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.032141510397195816
offset: -128
past_key_11_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.13410881161689758
offset: -128
past_value_11_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.02836606258482445
offset: -128
past_key_12_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.11923930794000626
offset: -128
past_value_12_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.03199498108991488
offset: -128
past_key_13_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.14966769481268455
offset: -128
past_value_13_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.028661564579160196
offset: -128
outputs:
/model/model/layers.13/Add_1_output_0:
shape:
- 1
- 128
- 3072
dtype: uint16
quantization_parameters:
scale: 0.011241219636185578
offset: -31773
past_key_0_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.15415094792842865
offset: -128
past_value_0_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.0076334718614816666
offset: -128
past_key_1_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.09501257538795471
offset: -128
past_value_1_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.018255468428604246
offset: -128
past_key_2_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.12992745637893677
offset: -128
past_value_2_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.025279987515426997
offset: -128
past_key_3_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.1447511507770208
offset: -128
past_value_3_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.028148770332336426
offset: -128
past_key_4_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.12199506233996293
offset: -128
past_value_4_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.02944980831596795
offset: -128
past_key_5_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.12595602869987488
offset: -128
past_value_5_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.028479063883423805
offset: -128
past_key_6_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.12635421752929688
offset: -128
past_value_6_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.025955546647310257
offset: -128
past_key_7_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.13450774550437927
offset: -128
past_value_7_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.024671796738632082
offset: -128
past_key_8_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.1284546439103254
offset: -128
past_value_8_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.030749136582016945
offset: -128
past_key_9_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.16326546669006348
offset: -128
past_value_9_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.03575011125699742
offset: -128
past_key_10_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.1229240819811821
offset: -128
past_value_10_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.032141510397195816
offset: -128
past_key_11_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.13410881161689758
offset: -128
past_value_11_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.02836606258482445
offset: -128
past_key_12_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.11923930794000626
offset: -128
past_value_12_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.03199498108991488
offset: -128
past_key_13_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.14966769481268455
offset: -128
past_value_13_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.028661564579160196
offset: -128
ar128_cl4096_3_of_3:
inputs:
/model/model/layers.13/Add_1_output_0:
shape:
- 1
- 128
- 3072
dtype: uint16
quantization_parameters:
scale: 0.011241219636185578
offset: -31773
attention_mask:
shape:
- 1
- 1
- 128
- 4096
dtype: uint16
quantization_parameters:
scale: 0.0007629510948348211
offset: -65535
position_ids_cos:
shape:
- 1
- 1
- 128
- 64
dtype: uint16
quantization_parameters:
scale: 3.0518043793392844e-05
offset: -32768
position_ids_sin:
shape:
- 1
- 1
- 128
- 64
dtype: uint16
quantization_parameters:
scale: 3.0518043793392844e-05
offset: -32768
past_key_14_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.13996938269908035
offset: -128
past_value_14_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.028563514469176765
offset: -128
past_key_15_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.125163213474544
offset: -128
past_value_15_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.027315943259892502
offset: -128
past_key_16_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.12602750522883857
offset: -128
past_value_16_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.033199068158864975
offset: -128
past_key_17_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.13656628131866455
offset: -128
past_value_17_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.030598552208247146
offset: -128
past_key_18_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.11889628913458877
offset: -128
past_value_18_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.032141249626874924
offset: -128
past_key_19_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.13976185023784637
offset: -128
past_value_19_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.032557712765190545
offset: -128
past_key_20_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.11934278142733837
offset: -128
past_value_20_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.027935825288295746
offset: -128
past_key_21_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.13167049377922
offset: -128
past_value_21_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.04469357803463936
offset: -128
past_key_22_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.13328912689929873
offset: -128
past_value_22_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.03745593875646591
offset: -128
past_key_23_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.12827403151144193
offset: -128
past_value_23_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.032266516238451004
offset: -128
past_key_24_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.14478352665901184
offset: -128
past_value_24_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.04626827540360098
offset: -128
past_key_25_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.17984355716254768
offset: -128
past_value_25_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.04539092882411686
offset: -128
past_key_26_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.1248558149563046
offset: -128
past_value_26_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.05314682051539421
offset: -128
past_key_27_in:
shape:
- 8
- 1
- 128
- 3968
dtype: uint8
quantization_parameters:
scale: 0.13326138257980347
offset: -128
past_value_27_in:
shape:
- 8
- 1
- 3968
- 128
dtype: uint8
quantization_parameters:
scale: 0.04699636995792389
offset: -128
outputs:
logits:
shape:
- 1
- 128
- 128256
dtype: uint16
quantization_parameters:
scale: 0.0006455201492198974
offset: -24293
past_key_14_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.13996938269908035
offset: -128
past_value_14_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.028563514469176765
offset: -128
past_key_15_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.125163213474544
offset: -128
past_value_15_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.027315943259892502
offset: -128
past_key_16_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.12602750522883857
offset: -128
past_value_16_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.033199068158864975
offset: -128
past_key_17_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.13656628131866455
offset: -128
past_value_17_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.030598552208247146
offset: -128
past_key_18_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.11889628913458877
offset: -128
past_value_18_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.032141249626874924
offset: -128
past_key_19_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.13976185023784637
offset: -128
past_value_19_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.032557712765190545
offset: -128
past_key_20_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.11934278142733837
offset: -128
past_value_20_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.027935825288295746
offset: -128
past_key_21_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.13167049377922
offset: -128
past_value_21_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.04469357803463936
offset: -128
past_key_22_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.13328912689929873
offset: -128
past_value_22_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.03745593875646591
offset: -128
past_key_23_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.12827403151144193
offset: -128
past_value_23_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.032266516238451004
offset: -128
past_key_24_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.14478352665901184
offset: -128
past_value_24_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.04626827540360098
offset: -128
past_key_25_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.17984355716254768
offset: -128
past_value_25_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.04539092882411686
offset: -128
past_key_26_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.1248558149563046
offset: -128
past_value_26_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.05314682051539421
offset: -128
past_key_27_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.13326138257980347
offset: -128
past_value_27_out:
shape:
- 8
- 1
- 128
- 128
dtype: uint8
quantization_parameters:
scale: 0.04699636995792389
offset: -128
ar1_cl4096_1_of_3:
inputs:
input_ids:
shape:
- 1
- 1
dtype: int32
outputs:
/model/model/embed_tokens/Gather_output_0:
shape:
- 1
- 1
- 3072
dtype: uint16
quantization_parameters:
scale: 8.687509536888685e-06
offset: -27540
ar1_cl4096_2_of_3:
inputs:
/model/model/embed_tokens/Gather_output_0:
shape:
- 1
- 1
- 3072
dtype: uint16
quantization_parameters:
scale: 8.687509536888685e-06
offset: -27540
attention_mask:
shape:
- 1
- 1
- 1
- 4096
dtype: uint16
quantization_parameters:
scale: 0.0007629510948348211
offset: -65535
position_ids_cos:
shape:
- 1
- 1
- 1
- 64
dtype: uint16
quantization_parameters:
scale: 3.0518043793392844e-05
offset: -32768
position_ids_sin:
shape:
- 1
- 1
- 1
- 64
dtype: uint16
quantization_parameters:
scale: 3.0518043793392844e-05
offset: -32768
past_key_0_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.15415094792842865
offset: -128
past_value_0_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.0076334718614816666
offset: -128
past_key_1_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.09501257538795471
offset: -128
past_value_1_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.018255468428604246
offset: -128
past_key_2_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.12992745637893677
offset: -128
past_value_2_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.025279987515426997
offset: -128
past_key_3_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.1447511507770208
offset: -128
past_value_3_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.028148770332336426
offset: -128
past_key_4_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.12199506233996293
offset: -128
past_value_4_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.02944980831596795
offset: -128
past_key_5_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.12595602869987488
offset: -128
past_value_5_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.028479063883423805
offset: -128
past_key_6_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.12635421752929688
offset: -128
past_value_6_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.025955546647310257
offset: -128
past_key_7_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.13450774550437927
offset: -128
past_value_7_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.024671796738632082
offset: -128
past_key_8_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.1284546439103254
offset: -128
past_value_8_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.030749136582016945
offset: -128
past_key_9_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.16326546669006348
offset: -128
past_value_9_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.03575011125699742
offset: -128
past_key_10_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.1229240819811821
offset: -128
past_value_10_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.032141510397195816
offset: -128
past_key_11_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.13410881161689758
offset: -128
past_value_11_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.02836606258482445
offset: -128
past_key_12_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.11923930794000626
offset: -128
past_value_12_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.03199498108991488
offset: -128
past_key_13_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.14966769481268455
offset: -128
past_value_13_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.028661564579160196
offset: -128
outputs:
/model/model/layers.13/Add_1_output_0:
shape:
- 1
- 1
- 3072
dtype: uint16
quantization_parameters:
scale: 0.011241219636185578
offset: -31773
past_key_0_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.15415094792842865
offset: -128
past_value_0_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.0076334718614816666
offset: -128
past_key_1_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.09501257538795471
offset: -128
past_value_1_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.018255468428604246
offset: -128
past_key_2_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.12992745637893677
offset: -128
past_value_2_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.025279987515426997
offset: -128
past_key_3_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.1447511507770208
offset: -128
past_value_3_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.028148770332336426
offset: -128
past_key_4_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.12199506233996293
offset: -128
past_value_4_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.02944980831596795
offset: -128
past_key_5_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.12595602869987488
offset: -128
past_value_5_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.028479063883423805
offset: -128
past_key_6_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.12635421752929688
offset: -128
past_value_6_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.025955546647310257
offset: -128
past_key_7_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.13450774550437927
offset: -128
past_value_7_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.024671796738632082
offset: -128
past_key_8_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.1284546439103254
offset: -128
past_value_8_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.030749136582016945
offset: -128
past_key_9_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.16326546669006348
offset: -128
past_value_9_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.03575011125699742
offset: -128
past_key_10_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.1229240819811821
offset: -128
past_value_10_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.032141510397195816
offset: -128
past_key_11_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.13410881161689758
offset: -128
past_value_11_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.02836606258482445
offset: -128
past_key_12_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.11923930794000626
offset: -128
past_value_12_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.03199498108991488
offset: -128
past_key_13_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.14966769481268455
offset: -128
past_value_13_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.028661564579160196
offset: -128
ar1_cl4096_3_of_3:
inputs:
/model/model/layers.13/Add_1_output_0:
shape:
- 1
- 1
- 3072
dtype: uint16
quantization_parameters:
scale: 0.011241219636185578
offset: -31773
attention_mask:
shape:
- 1
- 1
- 1
- 4096
dtype: uint16
quantization_parameters:
scale: 0.0007629510948348211
offset: -65535
position_ids_cos:
shape:
- 1
- 1
- 1
- 64
dtype: uint16
quantization_parameters:
scale: 3.0518043793392844e-05
offset: -32768
position_ids_sin:
shape:
- 1
- 1
- 1
- 64
dtype: uint16
quantization_parameters:
scale: 3.0518043793392844e-05
offset: -32768
past_key_14_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.13996938269908035
offset: -128
past_value_14_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.028563514469176765
offset: -128
past_key_15_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.125163213474544
offset: -128
past_value_15_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.027315943259892502
offset: -128
past_key_16_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.12602750522883857
offset: -128
past_value_16_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.033199068158864975
offset: -128
past_key_17_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.13656628131866455
offset: -128
past_value_17_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.030598552208247146
offset: -128
past_key_18_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.11889628913458877
offset: -128
past_value_18_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.032141249626874924
offset: -128
past_key_19_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.13976185023784637
offset: -128
past_value_19_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.032557712765190545
offset: -128
past_key_20_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.11934278142733837
offset: -128
past_value_20_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.027935825288295746
offset: -128
past_key_21_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.13167049377922
offset: -128
past_value_21_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.04469357803463936
offset: -128
past_key_22_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.13328912689929873
offset: -128
past_value_22_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.03745593875646591
offset: -128
past_key_23_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.12827403151144193
offset: -128
past_value_23_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.032266516238451004
offset: -128
past_key_24_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.14478352665901184
offset: -128
past_value_24_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.04626827540360098
offset: -128
past_key_25_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.17984355716254768
offset: -128
past_value_25_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.04539092882411686
offset: -128
past_key_26_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.1248558149563046
offset: -128
past_value_26_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.05314682051539421
offset: -128
past_key_27_in:
shape:
- 8
- 1
- 128
- 4095
dtype: uint8
quantization_parameters:
scale: 0.13326138257980347
offset: -128
past_value_27_in:
shape:
- 8
- 1
- 4095
- 128
dtype: uint8
quantization_parameters:
scale: 0.04699636995792389
offset: -128
outputs:
logits:
shape:
- 1
- 1
- 128256
dtype: uint16
quantization_parameters:
scale: 0.0006455201492198974
offset: -24293
past_key_14_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.13996938269908035
offset: -128
past_value_14_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.028563514469176765
offset: -128
past_key_15_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.125163213474544
offset: -128
past_value_15_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.027315943259892502
offset: -128
past_key_16_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.12602750522883857
offset: -128
past_value_16_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.033199068158864975
offset: -128
past_key_17_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.13656628131866455
offset: -128
past_value_17_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.030598552208247146
offset: -128
past_key_18_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.11889628913458877
offset: -128
past_value_18_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.032141249626874924
offset: -128
past_key_19_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.13976185023784637
offset: -128
past_value_19_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.032557712765190545
offset: -128
past_key_20_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.11934278142733837
offset: -128
past_value_20_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.027935825288295746
offset: -128
past_key_21_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.13167049377922
offset: -128
past_value_21_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.04469357803463936
offset: -128
past_key_22_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.13328912689929873
offset: -128
past_value_22_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.03745593875646591
offset: -128
past_key_23_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.12827403151144193
offset: -128
past_value_23_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.032266516238451004
offset: -128
past_key_24_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.14478352665901184
offset: -128
past_value_24_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.04626827540360098
offset: -128
past_key_25_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.17984355716254768
offset: -128
past_value_25_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.04539092882411686
offset: -128
past_key_26_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.1248558149563046
offset: -128
past_value_26_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.05314682051539421
offset: -128
past_key_27_out:
shape:
- 8
- 1
- 128
- 1
dtype: uint8
quantization_parameters:
scale: 0.13326138257980347
offset: -128
past_value_27_out:
shape:
- 8
- 1
- 1
- 128
dtype: uint8
quantization_parameters:
scale: 0.04699636995792389
offset: -128
precision: w4a16
runtime: genie