Buckets:

rparwani's picture
download
raw
10.8 kB
{
"num_grads": 1,
"dtype": {
"names": [
"layers.0.self_attn.q_proj",
"layers.0.self_attn.k_proj",
"layers.0.self_attn.v_proj",
"layers.0.self_attn.o_proj",
"layers.0.mlp.gate_proj",
"layers.0.mlp.up_proj",
"layers.0.mlp.down_proj",
"layers.1.self_attn.q_proj",
"layers.1.self_attn.k_proj",
"layers.1.self_attn.v_proj",
"layers.1.self_attn.o_proj",
"layers.1.mlp.gate_proj",
"layers.1.mlp.up_proj",
"layers.1.mlp.down_proj",
"layers.2.self_attn.q_proj",
"layers.2.self_attn.k_proj",
"layers.2.self_attn.v_proj",
"layers.2.self_attn.o_proj",
"layers.2.mlp.gate_proj",
"layers.2.mlp.up_proj",
"layers.2.mlp.down_proj",
"layers.3.self_attn.q_proj",
"layers.3.self_attn.k_proj",
"layers.3.self_attn.v_proj",
"layers.3.self_attn.o_proj",
"layers.3.mlp.gate_proj",
"layers.3.mlp.up_proj",
"layers.3.mlp.down_proj",
"layers.4.self_attn.q_proj",
"layers.4.self_attn.k_proj",
"layers.4.self_attn.v_proj",
"layers.4.self_attn.o_proj",
"layers.4.mlp.gate_proj",
"layers.4.mlp.up_proj",
"layers.4.mlp.down_proj",
"layers.5.self_attn.q_proj",
"layers.5.self_attn.k_proj",
"layers.5.self_attn.v_proj",
"layers.5.self_attn.o_proj",
"layers.5.mlp.gate_proj",
"layers.5.mlp.up_proj",
"layers.5.mlp.down_proj",
"layers.6.self_attn.q_proj",
"layers.6.self_attn.k_proj",
"layers.6.self_attn.v_proj",
"layers.6.self_attn.o_proj",
"layers.6.mlp.gate_proj",
"layers.6.mlp.up_proj",
"layers.6.mlp.down_proj",
"layers.7.self_attn.q_proj",
"layers.7.self_attn.k_proj",
"layers.7.self_attn.v_proj",
"layers.7.self_attn.o_proj",
"layers.7.mlp.gate_proj",
"layers.7.mlp.up_proj",
"layers.7.mlp.down_proj",
"layers.8.self_attn.q_proj",
"layers.8.self_attn.k_proj",
"layers.8.self_attn.v_proj",
"layers.8.self_attn.o_proj",
"layers.8.mlp.gate_proj",
"layers.8.mlp.up_proj",
"layers.8.mlp.down_proj",
"layers.9.self_attn.q_proj",
"layers.9.self_attn.k_proj",
"layers.9.self_attn.v_proj",
"layers.9.self_attn.o_proj",
"layers.9.mlp.gate_proj",
"layers.9.mlp.up_proj",
"layers.9.mlp.down_proj",
"layers.10.self_attn.q_proj",
"layers.10.self_attn.k_proj",
"layers.10.self_attn.v_proj",
"layers.10.self_attn.o_proj",
"layers.10.mlp.gate_proj",
"layers.10.mlp.up_proj",
"layers.10.mlp.down_proj",
"layers.11.self_attn.q_proj",
"layers.11.self_attn.k_proj",
"layers.11.self_attn.v_proj",
"layers.11.self_attn.o_proj",
"layers.11.mlp.gate_proj",
"layers.11.mlp.up_proj",
"layers.11.mlp.down_proj",
"layers.12.self_attn.q_proj",
"layers.12.self_attn.k_proj",
"layers.12.self_attn.v_proj",
"layers.12.self_attn.o_proj",
"layers.12.mlp.gate_proj",
"layers.12.mlp.up_proj",
"layers.12.mlp.down_proj",
"layers.13.self_attn.q_proj",
"layers.13.self_attn.k_proj",
"layers.13.self_attn.v_proj",
"layers.13.self_attn.o_proj",
"layers.13.mlp.gate_proj",
"layers.13.mlp.up_proj",
"layers.13.mlp.down_proj",
"layers.14.self_attn.q_proj",
"layers.14.self_attn.k_proj",
"layers.14.self_attn.v_proj",
"layers.14.self_attn.o_proj",
"layers.14.mlp.gate_proj",
"layers.14.mlp.up_proj",
"layers.14.mlp.down_proj",
"layers.15.self_attn.q_proj",
"layers.15.self_attn.k_proj",
"layers.15.self_attn.v_proj",
"layers.15.self_attn.o_proj",
"layers.15.mlp.gate_proj",
"layers.15.mlp.up_proj",
"layers.15.mlp.down_proj",
"layers.16.self_attn.q_proj",
"layers.16.self_attn.k_proj",
"layers.16.self_attn.v_proj",
"layers.16.self_attn.o_proj",
"layers.16.mlp.gate_proj",
"layers.16.mlp.up_proj",
"layers.16.mlp.down_proj"
],
"formats": [
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4",
"(256,)<f4"
],
"itemsize": 121856
},
"grad_sizes": {
"layers.0.self_attn.q_proj": 256,
"layers.0.self_attn.k_proj": 256,
"layers.0.self_attn.v_proj": 256,
"layers.0.self_attn.o_proj": 256,
"layers.0.mlp.gate_proj": 256,
"layers.0.mlp.up_proj": 256,
"layers.0.mlp.down_proj": 256,
"layers.1.self_attn.q_proj": 256,
"layers.1.self_attn.k_proj": 256,
"layers.1.self_attn.v_proj": 256,
"layers.1.self_attn.o_proj": 256,
"layers.1.mlp.gate_proj": 256,
"layers.1.mlp.up_proj": 256,
"layers.1.mlp.down_proj": 256,
"layers.2.self_attn.q_proj": 256,
"layers.2.self_attn.k_proj": 256,
"layers.2.self_attn.v_proj": 256,
"layers.2.self_attn.o_proj": 256,
"layers.2.mlp.gate_proj": 256,
"layers.2.mlp.up_proj": 256,
"layers.2.mlp.down_proj": 256,
"layers.3.self_attn.q_proj": 256,
"layers.3.self_attn.k_proj": 256,
"layers.3.self_attn.v_proj": 256,
"layers.3.self_attn.o_proj": 256,
"layers.3.mlp.gate_proj": 256,
"layers.3.mlp.up_proj": 256,
"layers.3.mlp.down_proj": 256,
"layers.4.self_attn.q_proj": 256,
"layers.4.self_attn.k_proj": 256,
"layers.4.self_attn.v_proj": 256,
"layers.4.self_attn.o_proj": 256,
"layers.4.mlp.gate_proj": 256,
"layers.4.mlp.up_proj": 256,
"layers.4.mlp.down_proj": 256,
"layers.5.self_attn.q_proj": 256,
"layers.5.self_attn.k_proj": 256,
"layers.5.self_attn.v_proj": 256,
"layers.5.self_attn.o_proj": 256,
"layers.5.mlp.gate_proj": 256,
"layers.5.mlp.up_proj": 256,
"layers.5.mlp.down_proj": 256,
"layers.6.self_attn.q_proj": 256,
"layers.6.self_attn.k_proj": 256,
"layers.6.self_attn.v_proj": 256,
"layers.6.self_attn.o_proj": 256,
"layers.6.mlp.gate_proj": 256,
"layers.6.mlp.up_proj": 256,
"layers.6.mlp.down_proj": 256,
"layers.7.self_attn.q_proj": 256,
"layers.7.self_attn.k_proj": 256,
"layers.7.self_attn.v_proj": 256,
"layers.7.self_attn.o_proj": 256,
"layers.7.mlp.gate_proj": 256,
"layers.7.mlp.up_proj": 256,
"layers.7.mlp.down_proj": 256,
"layers.8.self_attn.q_proj": 256,
"layers.8.self_attn.k_proj": 256,
"layers.8.self_attn.v_proj": 256,
"layers.8.self_attn.o_proj": 256,
"layers.8.mlp.gate_proj": 256,
"layers.8.mlp.up_proj": 256,
"layers.8.mlp.down_proj": 256,
"layers.9.self_attn.q_proj": 256,
"layers.9.self_attn.k_proj": 256,
"layers.9.self_attn.v_proj": 256,
"layers.9.self_attn.o_proj": 256,
"layers.9.mlp.gate_proj": 256,
"layers.9.mlp.up_proj": 256,
"layers.9.mlp.down_proj": 256,
"layers.10.self_attn.q_proj": 256,
"layers.10.self_attn.k_proj": 256,
"layers.10.self_attn.v_proj": 256,
"layers.10.self_attn.o_proj": 256,
"layers.10.mlp.gate_proj": 256,
"layers.10.mlp.up_proj": 256,
"layers.10.mlp.down_proj": 256,
"layers.11.self_attn.q_proj": 256,
"layers.11.self_attn.k_proj": 256,
"layers.11.self_attn.v_proj": 256,
"layers.11.self_attn.o_proj": 256,
"layers.11.mlp.gate_proj": 256,
"layers.11.mlp.up_proj": 256,
"layers.11.mlp.down_proj": 256,
"layers.12.self_attn.q_proj": 256,
"layers.12.self_attn.k_proj": 256,
"layers.12.self_attn.v_proj": 256,
"layers.12.self_attn.o_proj": 256,
"layers.12.mlp.gate_proj": 256,
"layers.12.mlp.up_proj": 256,
"layers.12.mlp.down_proj": 256,
"layers.13.self_attn.q_proj": 256,
"layers.13.self_attn.k_proj": 256,
"layers.13.self_attn.v_proj": 256,
"layers.13.self_attn.o_proj": 256,
"layers.13.mlp.gate_proj": 256,
"layers.13.mlp.up_proj": 256,
"layers.13.mlp.down_proj": 256,
"layers.14.self_attn.q_proj": 256,
"layers.14.self_attn.k_proj": 256,
"layers.14.self_attn.v_proj": 256,
"layers.14.self_attn.o_proj": 256,
"layers.14.mlp.gate_proj": 256,
"layers.14.mlp.up_proj": 256,
"layers.14.mlp.down_proj": 256,
"layers.15.self_attn.q_proj": 256,
"layers.15.self_attn.k_proj": 256,
"layers.15.self_attn.v_proj": 256,
"layers.15.self_attn.o_proj": 256,
"layers.15.mlp.gate_proj": 256,
"layers.15.mlp.up_proj": 256,
"layers.15.mlp.down_proj": 256,
"layers.16.self_attn.q_proj": 256,
"layers.16.self_attn.k_proj": 256,
"layers.16.self_attn.v_proj": 256,
"layers.16.self_attn.o_proj": 256,
"layers.16.mlp.gate_proj": 256,
"layers.16.mlp.up_proj": 256,
"layers.16.mlp.down_proj": 256
},
"base_dtype": "float32"
}

Xet Storage Details

Size:
10.8 kB
·
Xet hash:
cb35c2edc40b1cb9153311c02b5ad9ca38757944a542a0e312f1ac3ad6f46b42

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.