| { | |
| "blocks.0.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.0.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.0.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.0.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.0.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.0.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.0.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.1.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.1.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.1.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.1.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.1.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.1.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.1.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.2.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.2.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.2.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.2.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.2.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.2.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.2.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.3.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.3.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.3.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.3.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.3.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.3.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.3.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.4.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.4.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.4.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.4.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.4.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.4.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.4.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.5.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.5.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.5.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.5.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.5.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.5.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.5.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.6.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.6.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.6.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.6.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.6.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.6.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.6.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.7.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.7.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.7.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.7.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.7.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.7.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.7.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.8.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.8.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.8.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.8.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.8.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.8.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.8.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.9.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.9.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.9.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.9.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.9.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.9.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.9.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.10.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.10.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.10.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.10.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.10.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.10.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.10.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.11.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.11.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.11.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.11.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.11.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.11.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.11.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.12.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.12.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.12.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.12.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.12.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.12.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.12.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.13.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.13.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.13.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.13.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.13.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.13.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.13.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.14.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.14.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.14.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.14.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.14.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.14.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.14.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.15.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.15.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.15.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.15.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.15.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.15.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.15.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.16.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.16.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.16.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.16.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.16.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.16.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.16.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.17.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.17.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.17.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.17.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.17.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.17.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.17.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.18.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.18.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.18.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.18.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.18.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.18.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.18.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.19.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.19.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.19.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.19.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.19.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.19.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.19.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.20.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.20.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.20.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.20.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.20.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.20.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.20.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.21.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.21.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.21.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.21.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.21.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.21.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.21.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.22.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.22.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.22.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.22.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.22.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.22.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.22.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.23.attn.q": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.23.attn.k": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.23.attn.v": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.23.attn.o": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.23.ffn.gate.0": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.23.ffn.fc1": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| }, | |
| "blocks.23.ffn.fc2": { | |
| "weights": "qfloat8_e4m3fn", | |
| "activations": "none" | |
| } | |
| } |