PicoDAC / ckpt_step15000 /model_scales.json
Mattimax's picture
Upload 30 files
a5a2325 verified
{
"tok_emb.weight": {
"scale": 3.582241750710105e-05,
"nbits": 18,
"dtype": "int32",
"emulated": true
},
"pos_emb.weight": {
"scale": 3.314935944296658e-05,
"nbits": 18,
"dtype": "int32",
"emulated": true
},
"blocks.0.norm1.weight": {
"scale": 1.3958851148514928e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.0.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.0.attn.W_qkv.weight": {
"scale": 0.0015343368606173897,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.0.attn.W_o.weight": {
"scale": 0.0013366482003336628,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.0.norm2.weight": {
"scale": 1.4852239374053823e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.0.ff.fc1.weight": {
"scale": 0.0017223362461657786,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.0.ff.fc2.weight": {
"scale": 0.0012296341619023,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.norm1.weight": {
"scale": 1.452415333213305e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.1.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.attn.W_qkv.weight": {
"scale": 0.0018171136152895229,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.attn.W_o.weight": {
"scale": 0.0015010006163871945,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.norm2.weight": {
"scale": 1.5992701812381174e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.1.ff.fc1.weight": {
"scale": 0.0018112181527277429,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.1.ff.fc2.weight": {
"scale": 0.001341285394764397,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.norm1.weight": {
"scale": 1.5114929896950427e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.2.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.attn.W_qkv.weight": {
"scale": 0.0016495684787852368,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.attn.W_o.weight": {
"scale": 0.0014644906594866655,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.norm2.weight": {
"scale": 1.6378242346764618e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.2.ff.fc1.weight": {
"scale": 0.0018133741278217345,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.2.ff.fc2.weight": {
"scale": 0.0014244531166704433,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.norm1.weight": {
"scale": 1.4009069472894002e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.3.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.attn.W_qkv.weight": {
"scale": 0.0016187947649524718,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.attn.W_o.weight": {
"scale": 0.0013597113259064683,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.norm2.weight": {
"scale": 1.4961687126861084e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.3.ff.fc1.weight": {
"scale": 0.0018500898850115077,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.3.ff.fc2.weight": {
"scale": 0.001232206548185874,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.norm1.weight": {
"scale": 1.3731041172674875e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.4.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.attn.W_qkv.weight": {
"scale": 0.0015964233113894123,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.attn.W_o.weight": {
"scale": 0.0012714107992950199,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.norm2.weight": {
"scale": 1.4444463112646798e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.4.ff.fc1.weight": {
"scale": 0.002053559843842514,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.4.ff.fc2.weight": {
"scale": 0.0010909055245073573,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.norm1.weight": {
"scale": 1.3806844722933192e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.5.attn.mask": {
"scale": 0.007874015826771653,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.attn.W_qkv.weight": {
"scale": 0.0016286599203226885,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.attn.W_o.weight": {
"scale": 0.001324042756619491,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.norm2.weight": {
"scale": 1.445963035969241e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"blocks.5.ff.fc1.weight": {
"scale": 0.0015793989107234082,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"blocks.5.ff.fc2.weight": {
"scale": 0.0008934699492999699,
"nbits": 8,
"dtype": "int8",
"emulated": false
},
"norm_f.weight": {
"scale": 1.1484186113718572e-07,
"nbits": 24,
"dtype": "int32",
"emulated": true
},
"lm_head.weight": {
"scale": 0.03697070933128537,
"nbits": 8,
"dtype": "int8",
"emulated": false
}
}