PicoDAC / ckpt_step6000 /model_scales.json
Mattimax's picture
Upload 30 files
a5a2325 verified
{
"tok_emb.weight": {
"scale": 3.576527897602447e-05,
"nbits": 18,
"dtype": "int32",
"emulated": true
},
"pos_emb.weight": {
"scale": 3.314935944296658e-05,
"nbits": 18,
"dtype": "int32",
"emulated": true
},
"blocks.0.norm1.weight": {
"scale": 1.378099091186781e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.0.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.0.attn.W_qkv.weight": {
"scale": 0.0014341432127296268,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.0.attn.W_o.weight": {
"scale": 0.0012399422454891053,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.0.norm2.weight": {
"scale": 1.444702817222867e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.0.ff.fc1.weight": {
"scale": 0.0015248707513040257,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.0.ff.fc2.weight": {
"scale": 0.0010049688852547473,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.norm1.weight": {
"scale": 1.4253803157631853e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.1.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.attn.W_qkv.weight": {
"scale": 0.0015096906919986994,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.attn.W_o.weight": {
"scale": 0.001277954873030655,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.norm2.weight": {
"scale": 1.541712233740787e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.1.ff.fc1.weight": {
"scale": 0.0015895651399489275,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.ff.fc2.weight": {
"scale": 0.0012072392419436597,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.norm1.weight": {
"scale": 1.4674046603366728e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.2.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.attn.W_qkv.weight": {
"scale": 0.0015400529225849543,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.attn.W_o.weight": {
"scale": 0.001324677874615887,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.norm2.weight": {
"scale": 1.5549068718082285e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.2.ff.fc1.weight": {
"scale": 0.0017806409699579675,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.ff.fc2.weight": {
"scale": 0.0012346178376637857,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.norm1.weight": {
"scale": 1.3762538114820376e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.3.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.attn.W_qkv.weight": {
"scale": 0.0013776558439431979,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.attn.W_o.weight": {
"scale": 0.0011256408352420836,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.norm2.weight": {
"scale": 1.3971424914265849e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.3.ff.fc1.weight": {
"scale": 0.0014574954442959883,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.ff.fc2.weight": {
"scale": 0.0010178668375898346,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.norm1.weight": {
"scale": 1.3411574015166323e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.4.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.attn.W_qkv.weight": {
"scale": 0.0013714578995423804,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.attn.W_o.weight": {
"scale": 0.0011894033733184694,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.norm2.weight": {
"scale": 1.3673849581055776e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.4.ff.fc1.weight": {
"scale": 0.0016128875691929013,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.ff.fc2.weight": {
"scale": 0.0008645662725310438,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.norm1.weight": {
"scale": 1.342961611846353e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.5.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.attn.W_qkv.weight": {
"scale": 0.0014681386280116884,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.attn.W_o.weight": {
"scale": 0.0011316316887019,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.norm2.weight": {
"scale": 1.3784712735161398e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.5.ff.fc1.weight": {
"scale": 0.0012796107792348186,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.ff.fc2.weight": {
"scale": 0.0006700524395714406,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"norm_f.weight": {
"scale": 1.1577189063489027e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"lm_head.weight": {
"scale": 0.03691173921784649,
"nbits": 8,
"dtype": "int8",
"emulated": false
}
}