| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43091416358947754, |
| "max": 0.2991102933883667, |
| "mean": -0.002557656727731228, |
| "std": 0.04255230724811554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06317874044179916, |
| "max": 0.10845368355512619, |
| "mean": 0.0006046494818292558, |
| "std": 0.0341438427567482, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4125913977622986, |
| "max": 0.8363389372825623, |
| "mean": -0.0002094925002893433, |
| "std": 0.024107541888952255, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11634448170661926, |
| "max": 0.32392504811286926, |
| "mean": -0.0009387563331983984, |
| "std": 0.019654380157589912, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.8076894283294678, |
| "max": 2.8856873512268066, |
| "mean": -0.0003593244473449886, |
| "std": 0.6153794527053833, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.2804395258426666, |
| "max": 0.38235825300216675, |
| "mean": 0.00042111962102353573, |
| "std": 0.0427500456571579, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22397927939891815, |
| "max": 0.21124881505966187, |
| "mean": -0.004504885524511337, |
| "std": 0.04102449491620064, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.42797791957855225, |
| "max": 0.4753724932670593, |
| "mean": 3.1681217933510197e-06, |
| "std": 0.024508841335773468, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.3278864026069641, |
| "max": 0.15815186500549316, |
| "mean": -0.046754755079746246, |
| "std": 0.05172203853726387, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4108750522136688, |
| "max": 0.3548462688922882, |
| "mean": -0.0001276329276151955, |
| "std": 0.023600950837135315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.231490820646286, |
| "max": 0.26459917426109314, |
| "mean": -0.029202936217188835, |
| "std": 0.049504559487104416, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2546604871749878, |
| "max": 0.8254969120025635, |
| "mean": 0.5257646441459656, |
| "std": 0.08148879557847977, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.2975306808948517, |
| "max": 0.26634442806243896, |
| "mean": -0.0004239020636305213, |
| "std": 0.032103944569826126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.093165822327137, |
| "max": 0.12537634372711182, |
| "mean": 0.0006500760791823268, |
| "std": 0.0257789958268404, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.2912229299545288, |
| "max": 0.2824551463127136, |
| "mean": -7.682169962208718e-05, |
| "std": 0.03093571960926056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.9252495765686035, |
| "max": 5.839654445648193, |
| "mean": -0.00940663367509842, |
| "std": 1.2986583709716797, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4255436658859253, |
| "max": 0.34462970495224, |
| "mean": 9.765196591615677e-05, |
| "std": 0.02995290234684944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028961628675460815, |
| "max": 0.027653951197862625, |
| "mean": -0.000311878917273134, |
| "std": 0.012572262436151505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4547809660434723, |
| "max": 0.44922640919685364, |
| "mean": 2.2741787688573822e-05, |
| "std": 0.023854725062847137, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08907536417245865, |
| "max": 0.09154797345399857, |
| "mean": 0.0022746319882571697, |
| "std": 0.019537169486284256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.2665960192680359, |
| "max": 1.0631530284881592, |
| "mean": 0.5315366387367249, |
| "std": 0.10529287159442902, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5752094984054565, |
| "max": 0.6091693043708801, |
| "mean": -0.0004337065329309553, |
| "std": 0.038595084100961685, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18266847729682922, |
| "max": 0.04574590548872948, |
| "mean": -0.02949558012187481, |
| "std": 0.042705073952674866, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.168283462524414, |
| "max": 1.6358791589736938, |
| "mean": 0.0003184601664543152, |
| "std": 0.027693841606378555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.1632407307624817, |
| "max": 0.20662632584571838, |
| "mean": -0.02112644352018833, |
| "std": 0.027983704581856728, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.2244432270526886, |
| "max": 0.8492330312728882, |
| "mean": 0.4877929091453552, |
| "std": 0.07575991004705429, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.25644662976264954, |
| "max": 0.30648505687713623, |
| "mean": -9.105999197345227e-06, |
| "std": 0.03347046673297882, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09590143710374832, |
| "max": 0.11091545224189758, |
| "mean": 5.9943689848296344e-05, |
| "std": 0.02701094001531601, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.29843541979789734, |
| "max": 0.29746681451797485, |
| "mean": 5.037898154114373e-05, |
| "std": 0.0325385183095932, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.186855792999268, |
| "max": 5.106731414794922, |
| "mean": -0.014725911431014538, |
| "std": 1.1609561443328857, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.34537965059280396, |
| "max": 0.3438728153705597, |
| "mean": 7.886411185609177e-05, |
| "std": 0.030058259144425392, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.036315590143203735, |
| "max": 0.033395010977983475, |
| "mean": -0.00014420351362787187, |
| "std": 0.013025550171732903, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3161202371120453, |
| "max": 0.37616145610809326, |
| "mean": -2.1655154341715388e-05, |
| "std": 0.02405548468232155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10574664920568466, |
| "max": 0.12242550402879715, |
| "mean": -0.0019548372365534306, |
| "std": 0.028876660391688347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.31179988384246826, |
| "max": 1.1284958124160767, |
| "mean": 0.6666731238365173, |
| "std": 0.09859278053045273, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8728909492492676, |
| "max": 0.6278397440910339, |
| "mean": 0.0016749973874539137, |
| "std": 0.047438185662031174, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.2722160518169403, |
| "max": 0.0340891033411026, |
| "mean": -0.046644046902656555, |
| "std": 0.04069075360894203, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.922055184841156, |
| "max": 0.9654105305671692, |
| "mean": 0.0010205680737271905, |
| "std": 0.04070195555686951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14518415927886963, |
| "max": 0.07515987008810043, |
| "mean": -0.009094657376408577, |
| "std": 0.025729060173034668, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.2397412657737732, |
| "max": 0.7171911001205444, |
| "mean": 0.447447270154953, |
| "std": 0.05987730622291565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.2741525173187256, |
| "max": 0.29877936840057373, |
| "mean": 8.61497210280504e-06, |
| "std": 0.03547372668981552, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11957156658172607, |
| "max": 0.11899449676275253, |
| "mean": 0.0007509939605370164, |
| "std": 0.0276488047093153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2823837697505951, |
| "max": 0.28084659576416016, |
| "mean": -7.657262904103845e-05, |
| "std": 0.035102009773254395, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5205748081207275, |
| "max": 2.532623291015625, |
| "mean": 0.02687813714146614, |
| "std": 0.5879213809967041, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.2220122367143631, |
| "max": 0.27260157465934753, |
| "mean": 2.5499884941382334e-06, |
| "std": 0.030731454491615295, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03331878036260605, |
| "max": 0.031287048012018204, |
| "mean": 0.00011721440387191251, |
| "std": 0.01239620428532362, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.2359972894191742, |
| "max": 0.23261798918247223, |
| "mean": 5.7136268878821284e-05, |
| "std": 0.025697365403175354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13661594688892365, |
| "max": 0.12854568660259247, |
| "mean": -0.005501019302755594, |
| "std": 0.03999658301472664, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3546392619609833, |
| "max": 1.180222511291504, |
| "mean": 0.7107274532318115, |
| "std": 0.10418680310249329, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6183957457542419, |
| "max": 0.5562719106674194, |
| "mean": 0.001160319778136909, |
| "std": 0.04611416533589363, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.19019058346748352, |
| "max": 0.024931631982326508, |
| "mean": -0.034878939390182495, |
| "std": 0.028703488409519196, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1339737176895142, |
| "max": 0.9729978442192078, |
| "mean": 0.00035909697180613875, |
| "std": 0.04234269633889198, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.6004759073257446, |
| "max": 0.06302264332771301, |
| "mean": -0.004885237663984299, |
| "std": 0.028683220967650414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.37538695335388184, |
| "max": 0.9469302892684937, |
| "mean": 0.5929263234138489, |
| "std": 0.0680219903588295, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3926527798175812, |
| "max": 0.37037163972854614, |
| "mean": 7.004380313446745e-05, |
| "std": 0.03718654066324234, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11952866613864899, |
| "max": 0.1371433585882187, |
| "mean": 0.0009209888521581888, |
| "std": 0.029237791895866394, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6214983463287354, |
| "max": 0.5109242796897888, |
| "mean": 1.5226184586936142e-05, |
| "std": 0.036439333111047745, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.222587585449219, |
| "max": 8.827320098876953, |
| "mean": -0.10952811688184738, |
| "std": 1.7043956518173218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.2775035798549652, |
| "max": 0.24042560160160065, |
| "mean": 5.222904292168096e-05, |
| "std": 0.03261308744549751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05175856128334999, |
| "max": 0.03964223712682724, |
| "mean": 9.375870286021382e-05, |
| "std": 0.012972756288945675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23131398856639862, |
| "max": 0.2357378751039505, |
| "mean": -2.203516305598896e-05, |
| "std": 0.02938969060778618, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.2051505148410797, |
| "max": 0.10573741793632507, |
| "mean": -0.0040251207537949085, |
| "std": 0.032664697617292404, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3397069573402405, |
| "max": 1.01918625831604, |
| "mean": 0.7008247375488281, |
| "std": 0.0969780907034874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5670483708381653, |
| "max": 0.8365305662155151, |
| "mean": 0.00041504879482090473, |
| "std": 0.042294830083847046, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.2130415141582489, |
| "max": 0.029987983405590057, |
| "mean": -0.03220636397600174, |
| "std": 0.02657567895948887, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7582250833511353, |
| "max": 0.7219672799110413, |
| "mean": -1.576655267854221e-05, |
| "std": 0.03683546185493469, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.26458415389060974, |
| "max": 0.10674209892749786, |
| "mean": -0.003017352893948555, |
| "std": 0.02890385128557682, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28402721881866455, |
| "max": 0.6998150944709778, |
| "mean": 0.49963071942329407, |
| "std": 0.04700654000043869, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27952155470848083, |
| "max": 0.23467987775802612, |
| "mean": -0.00011085892765549943, |
| "std": 0.038757603615522385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15429016947746277, |
| "max": 0.12700684368610382, |
| "mean": -0.002232399070635438, |
| "std": 0.033386100083589554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41612547636032104, |
| "max": 0.6611561179161072, |
| "mean": -1.8461763829691336e-05, |
| "std": 0.03909667953848839, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.2564592361450195, |
| "max": 4.743135929107666, |
| "mean": -0.020397484302520752, |
| "std": 1.0097577571868896, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.2459408938884735, |
| "max": 0.2083207219839096, |
| "mean": 4.4360454921843484e-05, |
| "std": 0.03396270051598549, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03462521731853485, |
| "max": 0.045053571462631226, |
| "mean": -2.1719199139624834e-05, |
| "std": 0.012641450390219688, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20202401280403137, |
| "max": 0.20743757486343384, |
| "mean": -2.9260227165650576e-05, |
| "std": 0.031020890921354294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.20072369277477264, |
| "max": 0.11369979381561279, |
| "mean": -0.002900277031585574, |
| "std": 0.03456325829029083, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.3669256269931793, |
| "max": 1.064845323562622, |
| "mean": 0.6706051230430603, |
| "std": 0.06665434688329697, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.4000990390777588, |
| "max": 0.5037862062454224, |
| "mean": -3.870507498504594e-05, |
| "std": 0.04113040864467621, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12917247414588928, |
| "max": 0.026963019743561745, |
| "mean": -0.030557911843061447, |
| "std": 0.021937619894742966, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.4511619806289673, |
| "max": 0.4353387653827667, |
| "mean": 7.546078268205747e-05, |
| "std": 0.03489077836275101, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.26869964599609375, |
| "max": 0.07339140772819519, |
| "mean": -0.0010946399997919798, |
| "std": 0.023160062730312347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.2875079810619354, |
| "max": 0.6899884343147278, |
| "mean": 0.5247476696968079, |
| "std": 0.04796215519309044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22366264462471008, |
| "max": 0.2245350182056427, |
| "mean": 1.589955536474008e-05, |
| "std": 0.038949232548475266, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13696447014808655, |
| "max": 0.10982562601566315, |
| "mean": 0.0002473338390700519, |
| "std": 0.029272515326738358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37620943784713745, |
| "max": 0.4390593469142914, |
| "mean": -9.372964996146038e-06, |
| "std": 0.039287250488996506, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8626632690429688, |
| "max": 5.021180629730225, |
| "mean": 0.009756950661540031, |
| "std": 0.8471038937568665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.2235114425420761, |
| "max": 0.2212144434452057, |
| "mean": -3.48434696206823e-07, |
| "std": 0.03441031649708748, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04396739602088928, |
| "max": 0.03608814626932144, |
| "mean": -0.00025925497175194323, |
| "std": 0.012080671265721321, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.2138509899377823, |
| "max": 0.18955761194229126, |
| "mean": -1.6947185940807685e-05, |
| "std": 0.03153672814369202, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18172238767147064, |
| "max": 0.12127514183521271, |
| "mean": -0.0023971181362867355, |
| "std": 0.04130159318447113, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.42289772629737854, |
| "max": 0.9483197927474976, |
| "mean": 0.6628358364105225, |
| "std": 0.05716627463698387, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.37180185317993164, |
| "max": 0.47763875126838684, |
| "mean": -8.19972192402929e-05, |
| "std": 0.040889617055654526, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.209408238530159, |
| "max": 0.027359697967767715, |
| "mean": -0.0302574522793293, |
| "std": 0.021417709067463875, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3422113060951233, |
| "max": 0.7372819185256958, |
| "mean": 8.242652984336019e-05, |
| "std": 0.034766409546136856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2412174493074417, |
| "max": 0.05068235844373703, |
| "mean": -0.0011914315400645137, |
| "std": 0.020485328510403633, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.30587607622146606, |
| "max": 0.6579968333244324, |
| "mean": 0.5253006219863892, |
| "std": 0.0464390330016613, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30547264218330383, |
| "max": 0.21810249984264374, |
| "mean": 6.997188756940886e-05, |
| "std": 0.039497073739767075, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14979463815689087, |
| "max": 0.13157697021961212, |
| "mean": 0.00032728962833061814, |
| "std": 0.030529892072081566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.25832319259643555, |
| "max": 0.20298458635807037, |
| "mean": 3.122862472082488e-05, |
| "std": 0.039488088339567184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.3464906215667725, |
| "max": 2.3862874507904053, |
| "mean": -0.0262940414249897, |
| "std": 0.45072564482688904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18955294787883759, |
| "max": 0.211393803358078, |
| "mean": 3.7051289837108925e-05, |
| "std": 0.03479388728737831, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03182046860456467, |
| "max": 0.03580700233578682, |
| "mean": -0.0001974685292225331, |
| "std": 0.012292041443288326, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18930117785930634, |
| "max": 0.17112135887145996, |
| "mean": -6.836307875346392e-05, |
| "std": 0.03217054903507233, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.14002393186092377, |
| "max": 0.1378386914730072, |
| "mean": -0.0025169737637043, |
| "std": 0.05131695047020912, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4669981598854065, |
| "max": 0.9623145461082458, |
| "mean": 0.669116199016571, |
| "std": 0.053326528519392014, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.324962854385376, |
| "max": 0.3098026514053345, |
| "mean": -9.876448530121706e-07, |
| "std": 0.0409456230700016, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12541106343269348, |
| "max": 0.025640888139605522, |
| "mean": -0.030711790546774864, |
| "std": 0.019869431853294373, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.44164079427719116, |
| "max": 0.4474758803844452, |
| "mean": 9.588097600499168e-05, |
| "std": 0.03511932119727135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.2256106585264206, |
| "max": 0.052044421434402466, |
| "mean": -0.0011865352280437946, |
| "std": 0.018494844436645508, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.33912554383277893, |
| "max": 0.7450283169746399, |
| "mean": 0.558834433555603, |
| "std": 0.041677191853523254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27382639050483704, |
| "max": 0.27962929010391235, |
| "mean": 2.034128556260839e-05, |
| "std": 0.0410577729344368, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13741885125637054, |
| "max": 0.14038565754890442, |
| "mean": 0.0004929338465444744, |
| "std": 0.02668425627052784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.49240002036094666, |
| "max": 0.35733160376548767, |
| "mean": 8.901266846805811e-05, |
| "std": 0.04069547727704048, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.3072962760925293, |
| "max": 1.7529240846633911, |
| "mean": -0.021147169172763824, |
| "std": 0.5008938312530518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.21894769370555878, |
| "max": 0.19816064834594727, |
| "mean": -4.0161168726626784e-05, |
| "std": 0.03423343971371651, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.04133184999227524, |
| "max": 0.03901350870728493, |
| "mean": -0.00013613827468361706, |
| "std": 0.012887353077530861, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17847225069999695, |
| "max": 0.1837986409664154, |
| "mean": 4.7998124500736594e-05, |
| "std": 0.031556759029626846, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.180707648396492, |
| "max": 0.18469232320785522, |
| "mean": -0.0022159582003951073, |
| "std": 0.05485893413424492, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4741988480091095, |
| "max": 1.0330065488815308, |
| "mean": 0.6454803347587585, |
| "std": 0.05105094239115715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.2723560929298401, |
| "max": 0.3096334636211395, |
| "mean": 0.00011242127220612019, |
| "std": 0.040681805461645126, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10577475279569626, |
| "max": 0.026752889156341553, |
| "mean": -0.029537281021475792, |
| "std": 0.01797310821712017, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.3403210937976837, |
| "max": 0.33086487650871277, |
| "mean": 5.282106576487422e-05, |
| "std": 0.034412968903779984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18259213864803314, |
| "max": 0.04268056899309158, |
| "mean": -0.0010635886574164033, |
| "std": 0.017230909317731857, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.32514795660972595, |
| "max": 0.6914159655570984, |
| "mean": 0.5113943219184875, |
| "std": 0.03739636018872261, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.2348308116197586, |
| "max": 0.22631730139255524, |
| "mean": -3.621048017521389e-05, |
| "std": 0.0391756109893322, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11563856154680252, |
| "max": 0.13239268958568573, |
| "mean": 0.00015192970749922097, |
| "std": 0.029222996905446053, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.35409149527549744, |
| "max": 0.2863385081291199, |
| "mean": 6.707018656015862e-06, |
| "std": 0.03924466669559479, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.1504130363464355, |
| "max": 3.5592541694641113, |
| "mean": -0.011647488921880722, |
| "std": 0.6845048069953918, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21134592592716217, |
| "max": 0.21000461280345917, |
| "mean": 3.47579552908428e-05, |
| "std": 0.03448459133505821, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.036000702530145645, |
| "max": 0.04817511513829231, |
| "mean": 0.0007898924523033202, |
| "std": 0.012873834930360317, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.2113579511642456, |
| "max": 0.19389942288398743, |
| "mean": -1.0706971806939691e-06, |
| "std": 0.0316954106092453, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.1872350424528122, |
| "max": 0.1779821664094925, |
| "mean": -0.002844380447641015, |
| "std": 0.058656178414821625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4746103286743164, |
| "max": 1.0489076375961304, |
| "mean": 0.6516687870025635, |
| "std": 0.05057830363512039, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24878337979316711, |
| "max": 0.3296516239643097, |
| "mean": 0.00018073963292408735, |
| "std": 0.04057016968727112, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12595486640930176, |
| "max": 0.02493392489850521, |
| "mean": -0.030515050515532494, |
| "std": 0.01764742285013199, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.4225960969924927, |
| "max": 0.4839133322238922, |
| "mean": 1.030291969073005e-06, |
| "std": 0.035397231578826904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.1520412415266037, |
| "max": 0.043631311506032944, |
| "mean": 4.209935286780819e-05, |
| "std": 0.014901721850037575, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31559863686561584, |
| "max": 0.686523973941803, |
| "mean": 0.553006649017334, |
| "std": 0.040904585272073746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20726847648620605, |
| "max": 0.22089692950248718, |
| "mean": 3.191033465554938e-05, |
| "std": 0.03829946741461754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13833385705947876, |
| "max": 0.11308565735816956, |
| "mean": 2.6655456167645752e-05, |
| "std": 0.025857754051685333, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.4046614170074463, |
| "max": 0.37271684408187866, |
| "mean": 2.56894181802636e-05, |
| "std": 0.0381796769797802, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7873597145080566, |
| "max": 2.881237506866455, |
| "mean": 0.0011979229748249054, |
| "std": 0.5181517601013184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20434829592704773, |
| "max": 0.19823738932609558, |
| "mean": 2.9684193577850237e-05, |
| "std": 0.03429735451936722, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.050780050456523895, |
| "max": 0.040064383298158646, |
| "mean": -0.00042128204950131476, |
| "std": 0.01341989729553461, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.1970871537923813, |
| "max": 0.20266157388687134, |
| "mean": -1.2426969988155179e-05, |
| "std": 0.031805407255887985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.1938190907239914, |
| "max": 0.19595396518707275, |
| "mean": -0.0029727788642048836, |
| "std": 0.06256895512342453, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.34895268082618713, |
| "max": 1.0913121700286865, |
| "mean": 0.6674203276634216, |
| "std": 0.056132975965738297, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22646191716194153, |
| "max": 0.25265538692474365, |
| "mean": 0.0003584488877095282, |
| "std": 0.040759678930044174, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09146817028522491, |
| "max": 0.04364684969186783, |
| "mean": -0.030097611248493195, |
| "std": 0.017646051943302155, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.35469669103622437, |
| "max": 0.30548718571662903, |
| "mean": -4.469315172173083e-05, |
| "std": 0.03712276369333267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.1623995155096054, |
| "max": 0.06374479830265045, |
| "mean": -8.042766421567649e-05, |
| "std": 0.01944616436958313, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34871119260787964, |
| "max": 0.7271286249160767, |
| "mean": 0.5425379872322083, |
| "std": 0.03944627195596695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.2201070785522461, |
| "max": 0.2242431491613388, |
| "mean": -1.1387233826098964e-05, |
| "std": 0.03923100233078003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11890711635351181, |
| "max": 0.1713198721408844, |
| "mean": 0.0002833662729244679, |
| "std": 0.025163158774375916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24783332645893097, |
| "max": 0.30217495560646057, |
| "mean": -3.6862991692032665e-05, |
| "std": 0.038930460810661316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.520315170288086, |
| "max": 3.7306737899780273, |
| "mean": 0.015852145850658417, |
| "std": 0.7850235104560852, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21981129050254822, |
| "max": 0.23816066980361938, |
| "mean": -1.3107633094477933e-05, |
| "std": 0.036303482949733734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04740596562623978, |
| "max": 0.05159047618508339, |
| "mean": 0.000481397844851017, |
| "std": 0.013528619892895222, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.2151964157819748, |
| "max": 0.21832282841205597, |
| "mean": 5.642603355227038e-05, |
| "std": 0.03361587971448898, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.2122570425271988, |
| "max": 0.23222938179969788, |
| "mean": -0.005098365712910891, |
| "std": 0.06190234050154686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36193394660949707, |
| "max": 1.1087924242019653, |
| "mean": 0.6995820999145508, |
| "std": 0.05450976639986038, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23606520891189575, |
| "max": 0.24584993720054626, |
| "mean": 0.00046336432569660246, |
| "std": 0.041269052773714066, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09852692484855652, |
| "max": 0.06841564178466797, |
| "mean": -0.0314490832388401, |
| "std": 0.01816665753722191, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.30322569608688354, |
| "max": 0.3532632291316986, |
| "mean": -8.268543751910329e-05, |
| "std": 0.04027474299073219, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15293245017528534, |
| "max": 0.1503082662820816, |
| "mean": 0.0002610071678645909, |
| "std": 0.023066464811563492, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9987825155258179, |
| "max": 1.011022686958313, |
| "mean": 1.0016167163848877, |
| "std": 0.004121079575270414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.03126484900712967, |
| "max": 0.03125990182161331, |
| "mean": -1.9292880097054876e-05, |
| "std": 0.0180410947650671, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031222796067595482, |
| "max": 0.030990226194262505, |
| "mean": -0.001084181945770979, |
| "std": 0.017950553447008133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.03126567602157593, |
| "max": 0.031269483268260956, |
| "mean": 3.546300376910949e-06, |
| "std": 0.018041500821709633, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.03114791214466095, |
| "max": 0.03117155283689499, |
| "mean": 0.0003340535331517458, |
| "std": 0.018062960356473923, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0005971609498374164, |
| "max": 0.0006745979771949351, |
| "mean": 4.374485797598027e-06, |
| "std": 0.0001794710842659697, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9978547096252441, |
| "max": 1.0122681856155396, |
| "mean": 1.0009429454803467, |
| "std": 0.0034361695870757103, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03340178728103638, |
| "max": 0.033508703112602234, |
| "mean": -6.2318931668414734e-06, |
| "std": 0.01804722100496292, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03293577954173088, |
| "max": 0.03327555954456329, |
| "mean": -0.00015042479208204895, |
| "std": 0.017954858019948006, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.00139134272467345, |
| "max": 0.0014818700728937984, |
| "mean": 1.7994759673456429e-06, |
| "std": 0.0002722168283071369, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0005520335980691016, |
| "max": 0.0007331477245315909, |
| "mean": 7.149023986130487e-06, |
| "std": 0.0001629332109587267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3833008110523224, |
| "max": 0.7242851853370667, |
| "mean": 0.5809347033500671, |
| "std": 0.039344511926174164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.2398604303598404, |
| "max": 0.19741135835647583, |
| "mean": 2.61208933807211e-05, |
| "std": 0.037466324865818024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.1193285658955574, |
| "max": 0.16746975481510162, |
| "mean": 0.0009843853767961264, |
| "std": 0.027611562982201576, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.24755319952964783, |
| "max": 0.5020493268966675, |
| "mean": -5.023340054322034e-05, |
| "std": 0.037623729556798935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.959080934524536, |
| "max": 3.785468339920044, |
| "mean": -0.003608043771237135, |
| "std": 0.6828969120979309, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.2280745655298233, |
| "max": 0.25265711545944214, |
| "mean": -1.1726486263796687e-05, |
| "std": 0.037434931844472885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07189386337995529, |
| "max": 0.08095899969339371, |
| "mean": -0.0005116118700243533, |
| "std": 0.015669817104935646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22852574288845062, |
| "max": 0.2589001953601837, |
| "mean": -2.8789245334337465e-05, |
| "std": 0.035421740263700485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20139215886592865, |
| "max": 0.21579185128211975, |
| "mean": -0.005532890558242798, |
| "std": 0.06838470697402954, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.40495166182518005, |
| "max": 1.1977423429489136, |
| "mean": 0.7382426857948303, |
| "std": 0.05618907883763313, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.22189897298812866, |
| "max": 0.24627524614334106, |
| "mean": 0.0005210949457250535, |
| "std": 0.0413360670208931, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10370241105556488, |
| "max": 0.024191563948988914, |
| "mean": -0.03269057348370552, |
| "std": 0.018939778208732605, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.45156151056289673, |
| "max": 0.42444875836372375, |
| "mean": -0.00043494877172634006, |
| "std": 0.046896398067474365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.25261297821998596, |
| "max": 0.47218039631843567, |
| "mean": 0.0032064011320471764, |
| "std": 0.0446014478802681, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3172667622566223, |
| "max": 0.33354270458221436, |
| "mean": -2.519888585084118e-05, |
| "std": 0.021287826821208, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3245178461074829, |
| "max": 0.6904165148735046, |
| "mean": 0.5711733102798462, |
| "std": 0.04502657428383827, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16521431505680084, |
| "max": 0.1752052754163742, |
| "mean": -4.8754882300272584e-05, |
| "std": 0.033182479441165924, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18773159384727478, |
| "max": 0.14384877681732178, |
| "mean": 3.672283492051065e-05, |
| "std": 0.02975340373814106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.38243839144706726, |
| "max": 0.24725475907325745, |
| "mean": -9.841056453296915e-06, |
| "std": 0.03276367485523224, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6714818477630615, |
| "max": 3.3041720390319824, |
| "mean": -0.014343326911330223, |
| "std": 0.9862688779830933, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23551921546459198, |
| "max": 0.24833251535892487, |
| "mean": -1.8171514966525137e-05, |
| "std": 0.041698355227708817, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07285058498382568, |
| "max": 0.1551419198513031, |
| "mean": 0.0006671739974990487, |
| "std": 0.02518472634255886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.26684004068374634, |
| "max": 0.2486322820186615, |
| "mean": -1.5217347026919015e-05, |
| "std": 0.040139369666576385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.19041800498962402, |
| "max": 0.19548022747039795, |
| "mean": -0.001239710720255971, |
| "std": 0.06670945882797241, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.3291718661785126, |
| "max": 1.0067707300186157, |
| "mean": 0.7195272445678711, |
| "std": 0.053192976862192154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23261909186840057, |
| "max": 0.24629585444927216, |
| "mean": 0.0001829106913646683, |
| "std": 0.04090041667222977, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11500220745801926, |
| "max": 0.01902289316058159, |
| "mean": -0.042502518743276596, |
| "std": 0.01891784742474556, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.3915143311023712, |
| "max": 0.4093465507030487, |
| "mean": -2.1941355953458697e-05, |
| "std": 0.04853365942835808, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6959867477416992, |
| "max": 0.41447487473487854, |
| "mean": 0.0008487096056342125, |
| "std": 0.06040440872311592, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0013131406158208847, |
| "max": 1.000697135925293, |
| "mean": 0.00048820037045516074, |
| "std": 0.022089475765824318, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9987786412239075, |
| "max": 1.0108789205551147, |
| "mean": 1.0015242099761963, |
| "std": 0.003978394437581301, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03125932812690735, |
| "max": 0.031260255724191666, |
| "mean": -2.101710924762301e-05, |
| "std": 0.018032435327768326, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031216789036989212, |
| "max": 0.0312344953417778, |
| "mean": -0.0006770212785340846, |
| "std": 0.017827019095420837, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03126361221075058, |
| "max": 0.03126442804932594, |
| "mean": -8.826009434415027e-06, |
| "std": 0.018031461164355278, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031229715794324875, |
| "max": 0.031247057020664215, |
| "mean": -0.0007297845440916717, |
| "std": 0.01794196106493473, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.0004946183180436492, |
| "max": 0.00040109679684974253, |
| "mean": -3.799516889557708e-06, |
| "std": 0.00014799994823988527, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9972319006919861, |
| "max": 1.0116411447525024, |
| "mean": 1.0005743503570557, |
| "std": 0.0034592244774103165, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03315997123718262, |
| "max": 0.032729245722293854, |
| "mean": -2.570214064689935e-06, |
| "std": 0.018028665333986282, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.03235220909118652, |
| "max": 0.03128715977072716, |
| "mean": -0.00045961630530655384, |
| "std": 0.018038177862763405, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0016143623506650329, |
| "max": 0.001427292707376182, |
| "mean": -1.0927603852906032e-06, |
| "std": 0.00026996160158887506, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.00045358933857642114, |
| "max": 0.00036658692988567054, |
| "mean": -3.5024249882553704e-06, |
| "std": 0.0001358992449240759, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23466038703918457, |
| "max": 0.2728899419307709, |
| "mean": 6.680695605609799e-06, |
| "std": 0.018810251727700233, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.3215275704860687, |
| "max": 0.6988651752471924, |
| "mean": 0.5818086862564087, |
| "std": 0.04628920555114746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18249788880348206, |
| "max": 0.1985490918159485, |
| "mean": -1.1619875294854864e-05, |
| "std": 0.0331842340528965, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16120854020118713, |
| "max": 0.12988702952861786, |
| "mean": -0.0010746754705905914, |
| "std": 0.034188635647296906, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.3333602249622345, |
| "max": 0.31210559606552124, |
| "mean": -1.0246277270198334e-05, |
| "std": 0.03223477676510811, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.836638927459717, |
| "max": 8.800041198730469, |
| "mean": 0.09370891749858856, |
| "std": 1.6243042945861816, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23471659421920776, |
| "max": 0.24255934357643127, |
| "mean": 4.1660623537609354e-05, |
| "std": 0.04085636883974075, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07628928869962692, |
| "max": 0.06604960560798645, |
| "mean": 0.0004821753827854991, |
| "std": 0.01943657174706459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24707570672035217, |
| "max": 0.2350512593984604, |
| "mean": -3.330966137582436e-06, |
| "std": 0.03943110629916191, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16370660066604614, |
| "max": 0.16159522533416748, |
| "mean": 0.0016214787028729916, |
| "std": 0.06530040502548218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.556998610496521, |
| "max": 0.9505069851875305, |
| "mean": 0.7131754159927368, |
| "std": 0.04095931351184845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22923102974891663, |
| "max": 0.25587573647499084, |
| "mean": -4.568279109662399e-05, |
| "std": 0.040574610233306885, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13533200323581696, |
| "max": 0.022116411477327347, |
| "mean": -0.041375163942575455, |
| "std": 0.018435189500451088, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.42361417412757874, |
| "max": 0.39315521717071533, |
| "mean": -4.420744517119601e-06, |
| "std": 0.047783900052309036, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6098850965499878, |
| "max": 0.6541793942451477, |
| "mean": 0.001589474268257618, |
| "std": 0.056938592344522476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.2520405650138855, |
| "max": 0.3211195170879364, |
| "mean": -6.1747768995701335e-06, |
| "std": 0.019613485783338547, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.35947033762931824, |
| "max": 0.6870434284210205, |
| "mean": 0.5708057880401611, |
| "std": 0.04320356622338295, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22096499800682068, |
| "max": 0.1776382476091385, |
| "mean": -3.44411309924908e-05, |
| "std": 0.034298770129680634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16386361420154572, |
| "max": 0.23379802703857422, |
| "mean": 0.0003647217818070203, |
| "std": 0.032876912504434586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.2648993730545044, |
| "max": 0.2407570779323578, |
| "mean": -5.283746577333659e-05, |
| "std": 0.03389748930931091, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.875531196594238, |
| "max": 5.112789630889893, |
| "mean": 0.04403312876820564, |
| "std": 1.231998324394226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24717208743095398, |
| "max": 0.2512055039405823, |
| "mean": 7.22141849109903e-05, |
| "std": 0.043986547738313675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06276638805866241, |
| "max": 0.054656121879816055, |
| "mean": 0.0006459522992372513, |
| "std": 0.017198164016008377, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2877632677555084, |
| "max": 0.2726806104183197, |
| "mean": -5.0024795200442895e-05, |
| "std": 0.042984671890735626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16170376539230347, |
| "max": 0.1710934340953827, |
| "mean": -0.0028864555060863495, |
| "std": 0.05931045860052109, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.51991868019104, |
| "max": 0.9398472905158997, |
| "mean": 0.7137647867202759, |
| "std": 0.03922666609287262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23831400275230408, |
| "max": 0.2492961287498474, |
| "mean": 0.00046471404493786395, |
| "std": 0.040453460067510605, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14562036097049713, |
| "max": 0.04111756384372711, |
| "mean": -0.039718322455883026, |
| "std": 0.02059181034564972, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5351076126098633, |
| "max": 0.5854408740997314, |
| "mean": 5.962188879493624e-06, |
| "std": 0.0488593615591526, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5212635397911072, |
| "max": 0.4954894483089447, |
| "mean": 0.0023677186109125614, |
| "std": 0.05354826897382736, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.27395325899124146, |
| "max": 0.31585943698883057, |
| "mean": 1.8985367660206975e-06, |
| "std": 0.020050065591931343, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.3660656809806824, |
| "max": 0.7167491316795349, |
| "mean": 0.593307375907898, |
| "std": 0.04627520218491554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21157211065292358, |
| "max": 0.19981449842453003, |
| "mean": 3.063139592995867e-05, |
| "std": 0.03486718237400055, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.1879485547542572, |
| "max": 0.2043510377407074, |
| "mean": 0.0009530138340778649, |
| "std": 0.031568389385938644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.29089149832725525, |
| "max": 0.341105580329895, |
| "mean": -4.692538641393185e-05, |
| "std": 0.03458765521645546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.893813371658325, |
| "max": 3.4017703533172607, |
| "mean": 0.014513500966131687, |
| "std": 0.8598799705505371, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22526344656944275, |
| "max": 0.250789076089859, |
| "mean": -3.7296154005161952e-06, |
| "std": 0.042229536920785904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05549817904829979, |
| "max": 0.046731892973184586, |
| "mean": -2.1666113752871752e-05, |
| "std": 0.0158494021743536, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.29372450709342957, |
| "max": 0.2908160388469696, |
| "mean": -7.59748127165949e-06, |
| "std": 0.041944604367017746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12536406517028809, |
| "max": 0.2601471245288849, |
| "mean": -0.0032426435500383377, |
| "std": 0.05318090319633484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.45628464221954346, |
| "max": 0.8507043719291687, |
| "mean": 0.7057910561561584, |
| "std": 0.03590774908661842, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5123029351234436, |
| "max": 0.34838762879371643, |
| "mean": 0.0003429077914915979, |
| "std": 0.04019884020090103, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.1866319328546524, |
| "max": 0.039536003023386, |
| "mean": -0.03940858319401741, |
| "std": 0.021406862884759903, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5465707778930664, |
| "max": 0.5584931969642639, |
| "mean": -7.126475975383073e-05, |
| "std": 0.050734348595142365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5138925909996033, |
| "max": 0.6670938730239868, |
| "mean": 0.0024418262764811516, |
| "std": 0.04960782080888748, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.33276569843292236, |
| "max": 0.26628994941711426, |
| "mean": 3.292404471721966e-06, |
| "std": 0.01938711293041706, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.3219706416130066, |
| "max": 0.7718862295150757, |
| "mean": 0.651161789894104, |
| "std": 0.04554183781147003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.2507328987121582, |
| "max": 0.22062398493289948, |
| "mean": -2.0154016056039836e-06, |
| "std": 0.03650148585438728, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.3283964991569519, |
| "max": 0.2880261540412903, |
| "mean": -0.0006875221151858568, |
| "std": 0.038663797080516815, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3113596737384796, |
| "max": 0.37169572710990906, |
| "mean": 6.504646444227546e-05, |
| "std": 0.03624209389090538, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.737742900848389, |
| "max": 5.83281946182251, |
| "mean": 0.03801126033067703, |
| "std": 1.4163931608200073, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.2227693796157837, |
| "max": 0.2069622278213501, |
| "mean": -7.526973786298186e-05, |
| "std": 0.042485106736421585, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07798711210489273, |
| "max": 0.05173616483807564, |
| "mean": -0.0009264935506507754, |
| "std": 0.016420088708400726, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.3309612274169922, |
| "max": 0.3296358287334442, |
| "mean": -4.774779426952591e-06, |
| "std": 0.04279141128063202, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.28600984811782837, |
| "max": 0.11250722408294678, |
| "mean": -0.0012054404942318797, |
| "std": 0.04702861234545708, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4860813617706299, |
| "max": 0.8933811783790588, |
| "mean": 0.7376744747161865, |
| "std": 0.038892824202775955, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.36275342106819153, |
| "max": 0.2756327986717224, |
| "mean": 5.113358929520473e-05, |
| "std": 0.04064434394240379, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.2486657202243805, |
| "max": 0.046376701444387436, |
| "mean": -0.03928756341338158, |
| "std": 0.023350302129983902, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6290910840034485, |
| "max": 0.5994174480438232, |
| "mean": -6.010006836731918e-05, |
| "std": 0.0531165786087513, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.712557315826416, |
| "max": 0.26695698499679565, |
| "mean": 0.000916715245693922, |
| "std": 0.051312319934368134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3435560464859009, |
| "max": 0.3038403391838074, |
| "mean": 2.054806600426673e-07, |
| "std": 0.01913570426404476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34980928897857666, |
| "max": 0.7884078621864319, |
| "mean": 0.6389412879943848, |
| "std": 0.04949204996228218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.2064303159713745, |
| "max": 0.2077268660068512, |
| "mean": -5.987969052512199e-05, |
| "std": 0.03769605979323387, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25974684953689575, |
| "max": 0.26921483874320984, |
| "mean": -0.000399288343032822, |
| "std": 0.04469470679759979, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.35545018315315247, |
| "max": 0.32378923892974854, |
| "mean": -6.928052243893035e-06, |
| "std": 0.03720466047525406, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.283975601196289, |
| "max": 4.222393035888672, |
| "mean": -0.0264443326741457, |
| "std": 1.0090056657791138, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23976586759090424, |
| "max": 0.24442994594573975, |
| "mean": -2.508235047571361e-05, |
| "std": 0.04320976510643959, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06259545683860779, |
| "max": 0.0569254532456398, |
| "mean": 0.00034189436701126397, |
| "std": 0.014161717146635056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.4372391402721405, |
| "max": 0.37368500232696533, |
| "mean": 1.4562616343027912e-05, |
| "std": 0.044121067970991135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09685619175434113, |
| "max": 0.17668433487415314, |
| "mean": -0.0006592039717361331, |
| "std": 0.035167545080184937, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.42172640562057495, |
| "max": 1.0772342681884766, |
| "mean": 0.7485133409500122, |
| "std": 0.04247161000967026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.26711001992225647, |
| "max": 0.2980104982852936, |
| "mean": -7.953734166221693e-05, |
| "std": 0.04080444946885109, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18652470409870148, |
| "max": 0.04387153312563896, |
| "mean": -0.03684595599770546, |
| "std": 0.025674043223261833, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4576263427734375, |
| "max": 0.488967627286911, |
| "mean": 4.3991476559313014e-05, |
| "std": 0.05420954152941704, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.287752240896225, |
| "max": 0.5537111759185791, |
| "mean": -0.0008832515450194478, |
| "std": 0.0479048416018486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.29307857155799866, |
| "max": 0.32305020093917847, |
| "mean": 6.496340574813075e-06, |
| "std": 0.01996980607509613, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.29093778133392334, |
| "max": 0.7654404640197754, |
| "mean": 0.6508903503417969, |
| "std": 0.05225415527820587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.2440621256828308, |
| "max": 0.26225581765174866, |
| "mean": -5.966384833300253e-06, |
| "std": 0.03961286321282387, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.268706738948822, |
| "max": 0.20074717700481415, |
| "mean": -0.0008819116046652198, |
| "std": 0.05185216665267944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2733410894870758, |
| "max": 0.2549380958080292, |
| "mean": 4.216280103719328e-06, |
| "std": 0.03870992735028267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -13.020317077636719, |
| "max": 16.015220642089844, |
| "mean": 0.033375781029462814, |
| "std": 1.9953062534332275, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.2079249769449234, |
| "max": 0.22674520313739777, |
| "mean": -7.217413804028183e-05, |
| "std": 0.04055381566286087, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06965012848377228, |
| "max": 0.06350152939558029, |
| "mean": 0.00015418700058944523, |
| "std": 0.014755439944565296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.4655463695526123, |
| "max": 0.3209993243217468, |
| "mean": 1.953401260834653e-05, |
| "std": 0.04058877378702164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06434516608715057, |
| "max": 0.1157260537147522, |
| "mean": 0.001194344600662589, |
| "std": 0.02471684291958809, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.37466296553611755, |
| "max": 0.9391067624092102, |
| "mean": 0.7509991526603699, |
| "std": 0.04050418362021446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.28077101707458496, |
| "max": 0.274548202753067, |
| "mean": -0.00016862244228832424, |
| "std": 0.04099500924348831, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19967925548553467, |
| "max": 0.0508696548640728, |
| "mean": -0.03204797953367233, |
| "std": 0.025167953222990036, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6607509851455688, |
| "max": 0.5379750728607178, |
| "mean": -4.8667719966033474e-05, |
| "std": 0.052846137434244156, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.1939390003681183, |
| "max": 0.584657609462738, |
| "mean": -0.0005122774746268988, |
| "std": 0.041145551949739456, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41793951392173767, |
| "max": 0.37214717268943787, |
| "mean": 6.048314844520064e-06, |
| "std": 0.02162175066769123, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21421198546886444, |
| "max": 0.7522769570350647, |
| "mean": 0.6496115922927856, |
| "std": 0.054447393864393234, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.21056805551052094, |
| "max": 0.1966959536075592, |
| "mean": 4.008851828984916e-05, |
| "std": 0.039464544504880905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.33072784543037415, |
| "max": 0.26050281524658203, |
| "mean": -0.003235320094972849, |
| "std": 0.056362900882959366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.20648598670959473, |
| "max": 0.2557448148727417, |
| "mean": 5.435877392301336e-05, |
| "std": 0.038566704839468, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.270581245422363, |
| "max": 6.962486743927002, |
| "mean": 0.048468317836523056, |
| "std": 1.3885526657104492, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.21042834222316742, |
| "max": 0.23116129636764526, |
| "mean": -5.202562988415593e-06, |
| "std": 0.04131306707859039, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.044061992317438126, |
| "max": 0.03610403463244438, |
| "mean": 4.031957359984517e-06, |
| "std": 0.012803297489881516, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.39820992946624756, |
| "max": 0.3451625406742096, |
| "mean": -5.5655600590398535e-05, |
| "std": 0.04238949343562126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.05527956411242485, |
| "max": 0.06314276903867722, |
| "mean": 0.00036968549829907715, |
| "std": 0.01868215762078762, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3502121865749359, |
| "max": 1.0526388883590698, |
| "mean": 0.789475679397583, |
| "std": 0.049056656658649445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.333749383687973, |
| "max": 0.386434406042099, |
| "mean": -0.00016950398276094347, |
| "std": 0.04148067533969879, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15795546770095825, |
| "max": 0.05914008617401123, |
| "mean": -0.031855080276727676, |
| "std": 0.025188777595758438, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6976608633995056, |
| "max": 0.4709860682487488, |
| "mean": -9.084228804567829e-05, |
| "std": 0.051792342215776443, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24932992458343506, |
| "max": 0.3299875855445862, |
| "mean": -0.00024624879006296396, |
| "std": 0.04149326682090759, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2875395119190216, |
| "max": 0.3506205677986145, |
| "mean": -2.1794317035528366e-06, |
| "std": 0.02423883229494095, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19665004312992096, |
| "max": 0.7845895886421204, |
| "mean": 0.6703099608421326, |
| "std": 0.05872485041618347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22986678779125214, |
| "max": 0.23209868371486664, |
| "mean": -1.9775907276198268e-05, |
| "std": 0.040440451353788376, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22065043449401855, |
| "max": 0.2417624443769455, |
| "mean": 0.0007816089782863855, |
| "std": 0.05589631199836731, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21658743917942047, |
| "max": 0.22758929431438446, |
| "mean": -7.156423816923052e-05, |
| "std": 0.03937661275267601, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.943953514099121, |
| "max": 9.107547760009766, |
| "mean": -0.0012157298624515533, |
| "std": 1.8536982536315918, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2707418203353882, |
| "max": 0.2602587938308716, |
| "mean": 4.357028228696436e-05, |
| "std": 0.03840764984488487, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05789529159665108, |
| "max": 0.05795900523662567, |
| "mean": 0.0003505878266878426, |
| "std": 0.014736429788172245, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.2662392258644104, |
| "max": 0.2892150580883026, |
| "mean": -6.152272544568405e-05, |
| "std": 0.03907401114702225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04396943002939224, |
| "max": 0.037484679371118546, |
| "mean": -8.678687299834564e-05, |
| "std": 0.013375459238886833, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.3395363390445709, |
| "max": 1.100338101387024, |
| "mean": 0.863823413848877, |
| "std": 0.06409083306789398, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.423621267080307, |
| "max": 0.4195392429828644, |
| "mean": 0.0003127713571302593, |
| "std": 0.04350290074944496, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21570223569869995, |
| "max": 0.17136934399604797, |
| "mean": -0.029504353180527687, |
| "std": 0.032010503113269806, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.602144181728363, |
| "max": 0.5620326995849609, |
| "mean": -0.00015219957276713103, |
| "std": 0.05344673991203308, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17926719784736633, |
| "max": 0.37834614515304565, |
| "mean": 0.0013675567461177707, |
| "std": 0.037359848618507385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.39466091990470886, |
| "max": 0.36930760741233826, |
| "mean": 3.647102857939899e-05, |
| "std": 0.028620684519410133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2902662754058838, |
| "max": 0.832281231880188, |
| "mean": 0.7056034207344055, |
| "std": 0.06793806701898575, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9263004064559937, |
| "max": 1.0266234874725342, |
| "mean": -2.5708328394102864e-05, |
| "std": 0.04762601479887962, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8822629451751709, |
| "max": 0.8186339139938354, |
| "mean": -0.00031781112193129957, |
| "std": 0.09582255780696869, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.27002349495887756, |
| "max": 0.24192620813846588, |
| "mean": -2.2872980480315164e-05, |
| "std": 0.03895563259720802, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.84510040283203, |
| "max": 22.94961166381836, |
| "mean": -0.09204111993312836, |
| "std": 4.085866928100586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22870811820030212, |
| "max": 0.24587669968605042, |
| "mean": -2.573069286881946e-05, |
| "std": 0.03863922879099846, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06067140772938728, |
| "max": 0.046225275844335556, |
| "mean": -0.0001460441417293623, |
| "std": 0.014704843983054161, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.3391576111316681, |
| "max": 0.3760104775428772, |
| "mean": 7.383272532024421e-06, |
| "std": 0.040815357118844986, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04665788635611534, |
| "max": 0.19654953479766846, |
| "mean": 0.0002728282706812024, |
| "std": 0.013587887398898602, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.37436628341674805, |
| "max": 1.138013482093811, |
| "mean": 0.8901113271713257, |
| "std": 0.06415355205535889, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.44819676876068115, |
| "max": 0.5436740517616272, |
| "mean": 2.450778629281558e-05, |
| "std": 0.04556773602962494, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.2250596135854721, |
| "max": 0.08822774887084961, |
| "mean": -0.03204711154103279, |
| "std": 0.0378473699092865, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7285163402557373, |
| "max": 0.6922004222869873, |
| "mean": 3.462535823928192e-05, |
| "std": 0.051778655499219894, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.1753203570842743, |
| "max": 0.21950407326221466, |
| "mean": 4.071232979185879e-05, |
| "std": 0.0318208709359169, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.34123340249061584, |
| "max": 0.37526530027389526, |
| "mean": 4.290333163226023e-05, |
| "std": 0.0341440849006176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.31759148836135864, |
| "max": 1.2954586744308472, |
| "mean": 0.6016563177108765, |
| "std": 0.08407581597566605, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2837989628314972, |
| "max": 0.2609255015850067, |
| "mean": -3.0735166092199506e-06, |
| "std": 0.035984087735414505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23655052483081818, |
| "max": 0.2062867432832718, |
| "mean": 0.0002321804640814662, |
| "std": 0.05606939643621445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.436277836561203, |
| "max": 0.3261794447898865, |
| "mean": 2.4473378289258108e-05, |
| "std": 0.03413478285074234, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.569121360778809, |
| "max": 7.344529628753662, |
| "mean": -0.007453735917806625, |
| "std": 0.7020133137702942, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.3451450765132904, |
| "max": 0.36535224318504333, |
| "mean": 0.0001032469590427354, |
| "std": 0.047828368842601776, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07407404482364655, |
| "max": 0.06063373386859894, |
| "mean": 0.0009325749706476927, |
| "std": 0.014960682019591331, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.25645625591278076, |
| "max": 0.28786128759384155, |
| "mean": 4.184576027910225e-06, |
| "std": 0.041555535048246384, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05557131767272949, |
| "max": 0.06310223042964935, |
| "mean": 0.00014075382205192, |
| "std": 0.0071859210729599, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.4938402473926544, |
| "max": 1.2290534973144531, |
| "mean": 1.0134642124176025, |
| "std": 0.1175011619925499, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0939291715621948, |
| "max": 1.0472568273544312, |
| "mean": -4.937269113725051e-05, |
| "std": 0.052410781383514404, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22465373575687408, |
| "max": 0.17359215021133423, |
| "mean": -0.027279244735836983, |
| "std": 0.0364469476044178, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8881030678749084, |
| "max": 0.9261159300804138, |
| "mean": -0.00014599041605833918, |
| "std": 0.05328277125954628, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17176949977874756, |
| "max": 0.3815639615058899, |
| "mean": 0.003376794047653675, |
| "std": 0.03997529670596123, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7789531350135803, |
| "max": 0.725176990032196, |
| "mean": 1.8912758605438285e-05, |
| "std": 0.04616439342498779, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3386198878288269, |
| "max": 1.43718421459198, |
| "mean": 0.9484164714813232, |
| "std": 0.2068886160850525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7457443475723267, |
| "max": 1.7046759128570557, |
| "mean": 0.00022706578602083027, |
| "std": 0.15868695080280304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.2048320770263672, |
| "max": 1.1044596433639526, |
| "mean": -0.009567854925990105, |
| "std": 0.20464132726192474, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4219454526901245, |
| "max": 0.42726483941078186, |
| "mean": 6.450812361435965e-05, |
| "std": 0.04801829159259796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.830074310302734, |
| "max": 19.624286651611328, |
| "mean": -0.24912264943122864, |
| "std": 4.795468807220459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32499611377716064, |
| "max": 0.43987926840782166, |
| "mean": -1.1840356819448061e-05, |
| "std": 0.04616156592965126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.034201864153146744, |
| "max": 0.03727949783205986, |
| "mean": 0.0006420350982807577, |
| "std": 0.012923939153552055, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7049213647842407, |
| "max": 0.6658478379249573, |
| "mean": 4.366881330497563e-05, |
| "std": 0.057883720844984055, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07255180925130844, |
| "max": 0.06780894845724106, |
| "mean": -0.00013478109030984342, |
| "std": 0.012948636896908283, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38018205761909485, |
| "max": 1.3912252187728882, |
| "mean": 1.0665678977966309, |
| "std": 0.21972529590129852, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6171136498451233, |
| "max": 0.7182933688163757, |
| "mean": 0.00011123980220872909, |
| "std": 0.05802140384912491, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.22050145268440247, |
| "max": 0.2261514961719513, |
| "mean": 0.006267528980970383, |
| "std": 0.04982294142246246, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6300009489059448, |
| "max": 0.8896978497505188, |
| "mean": 1.1602171070990153e-05, |
| "std": 0.023528659716248512, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5090406537055969, |
| "max": 0.47603797912597656, |
| "mean": -0.003031304571777582, |
| "std": 0.0695611834526062, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5378094911575317, |
| "max": 1.184032917022705, |
| "mean": 0.7829163670539856, |
| "std": 0.09918713569641113, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.26840853691101074, |
| "max": 0.21375010907649994, |
| "mean": -0.00022396638814825565, |
| "std": 0.05399699881672859, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23899979889392853, |
| "max": 0.014829290099442005, |
| "mean": -0.04399246349930763, |
| "std": 0.034442439675331116, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |