| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.4302472174167633, |
| "max": 0.2981015741825104, |
| "mean": -0.0025541300419718027, |
| "std": 0.04255979508161545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06310182064771652, |
| "max": 0.10759169608354568, |
| "mean": 0.0006188107072375715, |
| "std": 0.03408230096101761, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4127681851387024, |
| "max": 0.8368753790855408, |
| "mean": -0.00020183739252388477, |
| "std": 0.024111691862344742, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11528493463993073, |
| "max": 0.32169410586357117, |
| "mean": -0.0009411157225258648, |
| "std": 0.019568322226405144, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7921671867370605, |
| "max": 2.8708858489990234, |
| "mean": -0.00036475385422818363, |
| "std": 0.6154695153236389, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.279247909784317, |
| "max": 0.3815617561340332, |
| "mean": 0.0004244846059009433, |
| "std": 0.04274849221110344, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22255778312683105, |
| "max": 0.2097877562046051, |
| "mean": -0.00448887562379241, |
| "std": 0.040919456630945206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.42842426896095276, |
| "max": 0.47603461146354675, |
| "mean": 3.9225278669619e-06, |
| "std": 0.024510197341442108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.3252594470977783, |
| "max": 0.1568366438150406, |
| "mean": -0.04670371487736702, |
| "std": 0.05158696323633194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.41043803095817566, |
| "max": 0.3547053635120392, |
| "mean": -0.00013071295688860118, |
| "std": 0.023602206259965897, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.22980256378650665, |
| "max": 0.26275309920310974, |
| "mean": -0.02913004904985428, |
| "std": 0.04934975132346153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.25458577275276184, |
| "max": 0.8201687335968018, |
| "mean": 0.5254767537117004, |
| "std": 0.08081887662410736, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.2970397174358368, |
| "max": 0.2657235562801361, |
| "mean": -0.00042574311373755336, |
| "std": 0.03210281580686569, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09289710968732834, |
| "max": 0.1248435452580452, |
| "mean": 0.0006472540553659201, |
| "std": 0.025739869102835655, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.2908317744731903, |
| "max": 0.2814251184463501, |
| "mean": -7.539847865700722e-05, |
| "std": 0.030931154265999794, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.900259494781494, |
| "max": 5.815035820007324, |
| "mean": -0.009333062916994095, |
| "std": 1.2956619262695312, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.42508748173713684, |
| "max": 0.3436461091041565, |
| "mean": 9.804315777728334e-05, |
| "std": 0.029953401535749435, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028917992487549782, |
| "max": 0.027773840352892876, |
| "mean": -0.00031790570938028395, |
| "std": 0.012571282684803009, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4539951980113983, |
| "max": 0.44834843277931213, |
| "mean": 2.359610516577959e-05, |
| "std": 0.02385314740240574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08872788399457932, |
| "max": 0.0911579355597496, |
| "mean": 0.0022788788191974163, |
| "std": 0.01951882243156433, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.26684099435806274, |
| "max": 1.056283712387085, |
| "mean": 0.5311816930770874, |
| "std": 0.10443845391273499, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5745589733123779, |
| "max": 0.608278751373291, |
| "mean": -0.0004312347446102649, |
| "std": 0.03859887644648552, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18254584074020386, |
| "max": 0.04550725594162941, |
| "mean": -0.02946603111922741, |
| "std": 0.042608592659235, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1672769784927368, |
| "max": 1.6339865922927856, |
| "mean": 0.0003258037322666496, |
| "std": 0.027695847675204277, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16238044202327728, |
| "max": 0.205756276845932, |
| "mean": -0.021133966743946075, |
| "std": 0.02794249914586544, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.2238895148038864, |
| "max": 0.8438186645507812, |
| "mean": 0.48762065172195435, |
| "std": 0.07522151619195938, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.2554994523525238, |
| "max": 0.30581825971603394, |
| "mean": -6.700396625092253e-06, |
| "std": 0.03347325325012207, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09536930173635483, |
| "max": 0.11054016649723053, |
| "mean": 6.769842002540827e-05, |
| "std": 0.026959657669067383, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.2971096336841583, |
| "max": 0.2961491346359253, |
| "mean": 5.292622518027201e-05, |
| "std": 0.03254416957497597, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.165089130401611, |
| "max": 5.085312843322754, |
| "mean": -0.01459675282239914, |
| "std": 1.1575658321380615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.34498170018196106, |
| "max": 0.3433385491371155, |
| "mean": 7.90221311035566e-05, |
| "std": 0.03006155788898468, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03615141659975052, |
| "max": 0.03325657546520233, |
| "mean": -0.00014247104991227388, |
| "std": 0.01303154043853283, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3154723644256592, |
| "max": 0.37497249245643616, |
| "mean": -2.0466719433898106e-05, |
| "std": 0.02405875362455845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10546914488077164, |
| "max": 0.12202588468790054, |
| "mean": -0.0019681837875396013, |
| "std": 0.028853828087449074, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.3114376366138458, |
| "max": 1.12091863155365, |
| "mean": 0.6662803292274475, |
| "std": 0.09775093197822571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8727247714996338, |
| "max": 0.6275021433830261, |
| "mean": 0.001675525214523077, |
| "std": 0.047438088804483414, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.2714252769947052, |
| "max": 0.03427727520465851, |
| "mean": -0.04661863297224045, |
| "std": 0.04059664160013199, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9225524067878723, |
| "max": 0.9647303223609924, |
| "mean": 0.0010189020540565252, |
| "std": 0.04070537909865379, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.1445721685886383, |
| "max": 0.07502365112304688, |
| "mean": -0.009085974656045437, |
| "std": 0.02569437585771084, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.24001570045948029, |
| "max": 0.7130113244056702, |
| "mean": 0.44724389910697937, |
| "std": 0.059336330741643906, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.27252721786499023, |
| "max": 0.2977474629878998, |
| "mean": 9.076926289708354e-06, |
| "std": 0.03546866402029991, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11933133751153946, |
| "max": 0.11861857026815414, |
| "mean": 0.000759843154810369, |
| "std": 0.027626313269138336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2810227572917938, |
| "max": 0.2797848582267761, |
| "mean": -7.693594670854509e-05, |
| "std": 0.03509817644953728, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5099942684173584, |
| "max": 2.5219902992248535, |
| "mean": 0.026751244440674782, |
| "std": 0.5868741273880005, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.2210882604122162, |
| "max": 0.27153223752975464, |
| "mean": 2.4560677047702484e-06, |
| "std": 0.030732404440641403, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.0335114523768425, |
| "max": 0.031222868710756302, |
| "mean": 0.00011844941036542878, |
| "std": 0.01240864023566246, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23524264991283417, |
| "max": 0.23183144629001617, |
| "mean": 5.6907440011855215e-05, |
| "std": 0.025696856901049614, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13587476313114166, |
| "max": 0.12763848900794983, |
| "mean": -0.005494903773069382, |
| "std": 0.039958395063877106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.35451608896255493, |
| "max": 1.1720539331436157, |
| "mean": 0.7106262445449829, |
| "std": 0.10376716405153275, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6174105405807495, |
| "max": 0.5545085072517395, |
| "mean": 0.0011598969576880336, |
| "std": 0.04611882567405701, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.1878771334886551, |
| "max": 0.024924062192440033, |
| "mean": -0.0348367840051651, |
| "std": 0.028611591085791588, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1316187381744385, |
| "max": 0.971271812915802, |
| "mean": 0.0003585200756788254, |
| "std": 0.0423467643558979, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5980486869812012, |
| "max": 0.06288419663906097, |
| "mean": -0.0048779072239995, |
| "std": 0.028619417920708656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3752831816673279, |
| "max": 0.9404632449150085, |
| "mean": 0.5925332307815552, |
| "std": 0.0669492781162262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3914392590522766, |
| "max": 0.36907821893692017, |
| "mean": 7.118703797459602e-05, |
| "std": 0.03718792647123337, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11911813914775848, |
| "max": 0.1366533637046814, |
| "mean": 0.0009285138221457601, |
| "std": 0.029234997928142548, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6193273067474365, |
| "max": 0.5089406967163086, |
| "mean": 1.5145867109822575e-05, |
| "std": 0.036441244184970856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.18839168548584, |
| "max": 8.790501594543457, |
| "mean": -0.1092919334769249, |
| "std": 1.6991198062896729, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.27663567662239075, |
| "max": 0.23973354697227478, |
| "mean": 5.2983978093834594e-05, |
| "std": 0.032615091651678085, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05204087495803833, |
| "max": 0.03958116099238396, |
| "mean": 9.567412780597806e-05, |
| "std": 0.012961393222212791, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23071666061878204, |
| "max": 0.234710693359375, |
| "mean": -2.1666935936082155e-05, |
| "std": 0.029391352087259293, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20436595380306244, |
| "max": 0.10555993020534515, |
| "mean": -0.004022484645247459, |
| "std": 0.032626353204250336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3398347795009613, |
| "max": 1.0127081871032715, |
| "mean": 0.7008411884307861, |
| "std": 0.09675740450620651, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5648741126060486, |
| "max": 0.8332529664039612, |
| "mean": 0.00041526954737491906, |
| "std": 0.04230087623000145, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.2118305265903473, |
| "max": 0.030412573367357254, |
| "mean": -0.032187312841415405, |
| "std": 0.026507310569286346, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7544606924057007, |
| "max": 0.718633234500885, |
| "mean": -1.3493583537638187e-05, |
| "std": 0.03684115782380104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.26357486844062805, |
| "max": 0.10591558367013931, |
| "mean": -0.0030233184807002544, |
| "std": 0.028867946937680244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.2842615246772766, |
| "max": 0.6951268911361694, |
| "mean": 0.4995192289352417, |
| "std": 0.04653889685869217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.2790677845478058, |
| "max": 0.2343253642320633, |
| "mean": -0.00011120391718577594, |
| "std": 0.03876161575317383, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15418817102909088, |
| "max": 0.12667444348335266, |
| "mean": -0.0022305608727037907, |
| "std": 0.033373840153217316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.4139367640018463, |
| "max": 0.660070538520813, |
| "mean": -1.9737122784135863e-05, |
| "std": 0.03909851238131523, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.238705635070801, |
| "max": 4.723268985748291, |
| "mean": -0.020462416112422943, |
| "std": 1.0078494548797607, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24497364461421967, |
| "max": 0.20763254165649414, |
| "mean": 4.4202079152455553e-05, |
| "std": 0.033965613692998886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03459359332919121, |
| "max": 0.04478804022073746, |
| "mean": -2.136104740202427e-05, |
| "std": 0.012631777673959732, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20072373747825623, |
| "max": 0.20615817606449127, |
| "mean": -2.975538700411562e-05, |
| "std": 0.031023193150758743, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.19997990131378174, |
| "max": 0.11331257969141006, |
| "mean": -0.0029115378856658936, |
| "std": 0.03451942652463913, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.36702099442481995, |
| "max": 1.0571231842041016, |
| "mean": 0.6706027388572693, |
| "std": 0.06639590114355087, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.3983962833881378, |
| "max": 0.5022679567337036, |
| "mean": -3.846201434498653e-05, |
| "std": 0.04113590717315674, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12814512848854065, |
| "max": 0.02683641016483307, |
| "mean": -0.03054228238761425, |
| "std": 0.02187994495034218, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.44913211464881897, |
| "max": 0.433132529258728, |
| "mean": 7.945985271362588e-05, |
| "std": 0.0348953977227211, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.2676845192909241, |
| "max": 0.0728912353515625, |
| "mean": -0.0011024216655641794, |
| "std": 0.023127950727939606, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.2873011827468872, |
| "max": 0.6852278709411621, |
| "mean": 0.5245736837387085, |
| "std": 0.047536205500364304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22235900163650513, |
| "max": 0.2234368920326233, |
| "mean": 1.5712306776549667e-05, |
| "std": 0.0389518178999424, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13648226857185364, |
| "max": 0.10937032103538513, |
| "mean": 0.00023500403040088713, |
| "std": 0.02922363579273224, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.3750153183937073, |
| "max": 0.4373463988304138, |
| "mean": -9.542611223878339e-06, |
| "std": 0.03928782045841217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8463687896728516, |
| "max": 5.000114917755127, |
| "mean": 0.00974472425878048, |
| "std": 0.8453519344329834, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22320200502872467, |
| "max": 0.2200344353914261, |
| "mean": -1.8790160538628697e-07, |
| "std": 0.03441300988197327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04361514747142792, |
| "max": 0.03597420081496239, |
| "mean": -0.0002564755268394947, |
| "std": 0.01208114717155695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21329528093338013, |
| "max": 0.1889103502035141, |
| "mean": -1.6649610188324004e-05, |
| "std": 0.031539641320705414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18086224794387817, |
| "max": 0.12070237100124359, |
| "mean": -0.002405309583991766, |
| "std": 0.041269298642873764, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4225497245788574, |
| "max": 0.9420632123947144, |
| "mean": 0.6627737283706665, |
| "std": 0.056812334805727005, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.3715151250362396, |
| "max": 0.4758515954017639, |
| "mean": -8.248311496572569e-05, |
| "std": 0.040895167738199234, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20838980376720428, |
| "max": 0.027207521721720695, |
| "mean": -0.030246354639530182, |
| "std": 0.02134900726377964, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3401075303554535, |
| "max": 0.7336291074752808, |
| "mean": 8.389431604882702e-05, |
| "std": 0.034770816564559937, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.24028635025024414, |
| "max": 0.05047708749771118, |
| "mean": -0.001194795360788703, |
| "std": 0.020465141162276268, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.30595850944519043, |
| "max": 0.6537705063819885, |
| "mean": 0.5251566767692566, |
| "std": 0.04612725228071213, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30432915687561035, |
| "max": 0.21739104390144348, |
| "mean": 6.996125739533454e-05, |
| "std": 0.03949799761176109, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14943227171897888, |
| "max": 0.13134317100048065, |
| "mean": 0.00034546080860309303, |
| "std": 0.030460603535175323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.25738975405693054, |
| "max": 0.20207944512367249, |
| "mean": 3.1017469154903665e-05, |
| "std": 0.03948727250099182, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.336665153503418, |
| "max": 2.376288890838623, |
| "mean": -0.026247629895806313, |
| "std": 0.44984105229377747, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18894457817077637, |
| "max": 0.21059554815292358, |
| "mean": 3.7193480238784105e-05, |
| "std": 0.034797847270965576, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03164611756801605, |
| "max": 0.03540992736816406, |
| "mean": -0.00020107367890886962, |
| "std": 0.012292974628508091, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18840090930461884, |
| "max": 0.17046599090099335, |
| "mean": -6.797777314204723e-05, |
| "std": 0.032174453139305115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13930758833885193, |
| "max": 0.13733482360839844, |
| "mean": -0.002516954904422164, |
| "std": 0.05130286514759064, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.46718037128448486, |
| "max": 0.9563874006271362, |
| "mean": 0.6689748764038086, |
| "std": 0.05278700590133667, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.3242974579334259, |
| "max": 0.3098086714744568, |
| "mean": -1.3617936929222196e-06, |
| "std": 0.04095118120312691, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12469282001256943, |
| "max": 0.02526070550084114, |
| "mean": -0.030708763748407364, |
| "std": 0.019816862419247627, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.4401431083679199, |
| "max": 0.44523754715919495, |
| "mean": 9.650168067310005e-05, |
| "std": 0.03512365743517876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22469638288021088, |
| "max": 0.05176383629441261, |
| "mean": -0.0011855906341224909, |
| "std": 0.018477564677596092, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.3391278088092804, |
| "max": 0.7394291162490845, |
| "mean": 0.5587280988693237, |
| "std": 0.04140337556600571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27262935042381287, |
| "max": 0.2784675061702728, |
| "mean": 1.984157097467687e-05, |
| "std": 0.041061654686927795, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.1370200663805008, |
| "max": 0.13985797762870789, |
| "mean": 0.0004876606981270015, |
| "std": 0.026632333174347878, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.49073120951652527, |
| "max": 0.35599616169929504, |
| "mean": 8.872401667758822e-05, |
| "std": 0.040699537843465805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.2974724769592285, |
| "max": 1.7454196214675903, |
| "mean": -0.021081820130348206, |
| "std": 0.5002042055130005, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.21770520508289337, |
| "max": 0.19793029129505157, |
| "mean": -4.0488688682671636e-05, |
| "std": 0.03423655033111572, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.0412483848631382, |
| "max": 0.038579147309064865, |
| "mean": -0.00014048503362573683, |
| "std": 0.012878325767815113, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17742925882339478, |
| "max": 0.1836576759815216, |
| "mean": 4.762586468132213e-05, |
| "std": 0.031559526920318604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.17993344366550446, |
| "max": 0.18376585841178894, |
| "mean": -0.0022200806997716427, |
| "std": 0.05484066903591156, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4742898643016815, |
| "max": 1.0256999731063843, |
| "mean": 0.6453396677970886, |
| "std": 0.05035531893372536, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.27185067534446716, |
| "max": 0.3093453645706177, |
| "mean": 0.00011244456982240081, |
| "std": 0.040687281638383865, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10582341998815536, |
| "max": 0.02690320834517479, |
| "mean": -0.02951919659972191, |
| "std": 0.017931465059518814, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.3390185832977295, |
| "max": 0.32922977209091187, |
| "mean": 5.620906449621543e-05, |
| "std": 0.034417424350976944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18173733353614807, |
| "max": 0.04227666184306145, |
| "mean": -0.0010707223555073142, |
| "std": 0.017213771119713783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.3254404067993164, |
| "max": 0.6867184638977051, |
| "mean": 0.5112515091896057, |
| "std": 0.036953605711460114, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23387184739112854, |
| "max": 0.22577211260795593, |
| "mean": -3.611366992117837e-05, |
| "std": 0.039180755615234375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11533147841691971, |
| "max": 0.13174240291118622, |
| "mean": 0.00015339103993028402, |
| "std": 0.029181061312556267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3528231382369995, |
| "max": 0.28539976477622986, |
| "mean": 7.355230991379358e-06, |
| "std": 0.03924909234046936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.133138179779053, |
| "max": 3.544285774230957, |
| "mean": -0.011592379771173, |
| "std": 0.682723343372345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.211366206407547, |
| "max": 0.20918519794940948, |
| "mean": 3.47092718584463e-05, |
| "std": 0.03448852524161339, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03565165773034096, |
| "max": 0.04795990511775017, |
| "mean": 0.0007935892790555954, |
| "std": 0.012854626402258873, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21064111590385437, |
| "max": 0.1932363212108612, |
| "mean": -1.2698478712991346e-06, |
| "std": 0.03169921413064003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18659119307994843, |
| "max": 0.17711447179317474, |
| "mean": -0.0028428896330296993, |
| "std": 0.05864271521568298, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4746437668800354, |
| "max": 1.0418283939361572, |
| "mean": 0.6514592170715332, |
| "std": 0.049664221704006195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24862074851989746, |
| "max": 0.3290244936943054, |
| "mean": 0.0001805826323106885, |
| "std": 0.04057569056749344, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12504367530345917, |
| "max": 0.024559227749705315, |
| "mean": -0.030504360795021057, |
| "std": 0.017604367807507515, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.42111000418663025, |
| "max": 0.4816901385784149, |
| "mean": -1.4580382412532344e-07, |
| "std": 0.03540220111608505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15185561776161194, |
| "max": 0.04354217275977135, |
| "mean": 4.59605835203547e-05, |
| "std": 0.014884229749441147, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.3155389130115509, |
| "max": 0.6820871829986572, |
| "mean": 0.5529488921165466, |
| "std": 0.04071735590696335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20643381774425507, |
| "max": 0.21991202235221863, |
| "mean": 3.090859536314383e-05, |
| "std": 0.03830238804221153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13782010972499847, |
| "max": 0.11272551119327545, |
| "mean": 1.9601531676016748e-05, |
| "std": 0.025822695344686508, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.40278956294059753, |
| "max": 0.37109923362731934, |
| "mean": 2.618670441734139e-05, |
| "std": 0.03818415477871895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7713959217071533, |
| "max": 2.8690977096557617, |
| "mean": 0.0011573811061680317, |
| "std": 0.5169072151184082, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20292331278324127, |
| "max": 0.1974206268787384, |
| "mean": 2.9524358978960663e-05, |
| "std": 0.03429995849728584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.05099167302250862, |
| "max": 0.040043603628873825, |
| "mean": -0.00041941594099625945, |
| "std": 0.01342028472572565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19649569690227509, |
| "max": 0.20179419219493866, |
| "mean": -1.231730857398361e-05, |
| "std": 0.031807754188776016, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19327867031097412, |
| "max": 0.195101797580719, |
| "mean": -0.002969510853290558, |
| "std": 0.06256763637065887, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.3495103716850281, |
| "max": 1.0841096639633179, |
| "mean": 0.6672286987304688, |
| "std": 0.055231790989637375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22513826191425323, |
| "max": 0.25143498182296753, |
| "mean": 0.00035896283225156367, |
| "std": 0.040764935314655304, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.0910005122423172, |
| "max": 0.043744608759880066, |
| "mean": -0.030088767409324646, |
| "std": 0.017610033974051476, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.3535524308681488, |
| "max": 0.30403411388397217, |
| "mean": -4.383287887321785e-05, |
| "std": 0.03712723031640053, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16203957796096802, |
| "max": 0.063482366502285, |
| "mean": -8.168067142833024e-05, |
| "std": 0.019403086975216866, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.3487982153892517, |
| "max": 0.7220908999443054, |
| "mean": 0.542417049407959, |
| "std": 0.039066411554813385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.2193998396396637, |
| "max": 0.22306619584560394, |
| "mean": -1.1200094377272762e-05, |
| "std": 0.039234746247529984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.118410125374794, |
| "max": 0.17068907618522644, |
| "mean": 0.00027954723918810487, |
| "std": 0.02511775679886341, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.2468089461326599, |
| "max": 0.3010835647583008, |
| "mean": -3.6559536965796724e-05, |
| "std": 0.03893429413437843, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5055222511291504, |
| "max": 3.714968204498291, |
| "mean": 0.015851959586143494, |
| "std": 0.7825093269348145, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21874836087226868, |
| "max": 0.2377166897058487, |
| "mean": -1.354666437691776e-05, |
| "std": 0.036306966096162796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04711933806538582, |
| "max": 0.051407281309366226, |
| "mean": 0.0004819422902073711, |
| "std": 0.013517641462385654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21396194398403168, |
| "max": 0.2176503837108612, |
| "mean": 5.661203613271937e-05, |
| "std": 0.033618949353694916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21143636107444763, |
| "max": 0.23150545358657837, |
| "mean": -0.0051071615889668465, |
| "std": 0.061890047043561935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36224639415740967, |
| "max": 1.1013858318328857, |
| "mean": 0.6993460655212402, |
| "std": 0.053608398884534836, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23459650576114655, |
| "max": 0.2449653446674347, |
| "mean": 0.00046337299863807857, |
| "std": 0.04127378761768341, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09813369810581207, |
| "max": 0.06841138750314713, |
| "mean": -0.03143805265426636, |
| "std": 0.018124299123883247, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.3017565906047821, |
| "max": 0.35157960653305054, |
| "mean": -8.145418541971594e-05, |
| "std": 0.04027964174747467, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15233245491981506, |
| "max": 0.1496550738811493, |
| "mean": 0.0002547369513195008, |
| "std": 0.023038377985358238, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.999387264251709, |
| "max": 1.0017390251159668, |
| "mean": 1.0002288818359375, |
| "std": 0.0006608659168705344, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.03126532956957817, |
| "max": 0.03126157820224762, |
| "mean": -1.929386235133279e-05, |
| "std": 0.01804366707801819, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031232358887791634, |
| "max": 0.030991962179541588, |
| "mean": -0.0010843182681128383, |
| "std": 0.01795327477157116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031262245029211044, |
| "max": 0.031266022473573685, |
| "mean": 3.54884014086565e-06, |
| "std": 0.01804407499730587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.03115827776491642, |
| "max": 0.031178638339042664, |
| "mean": 0.00033397332299500704, |
| "std": 0.01806548982858658, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0002825965639203787, |
| "max": 0.0002991823712363839, |
| "mean": 9.51684285155352e-07, |
| "std": 8.538085967302322e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9994979500770569, |
| "max": 1.0022096633911133, |
| "mean": 1.0004006624221802, |
| "std": 0.0006605891394428909, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03165094926953316, |
| "max": 0.03164109215140343, |
| "mean": -8.348271876457147e-06, |
| "std": 0.018046928569674492, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03134633228182793, |
| "max": 0.031511712819337845, |
| "mean": 0.00030681173666380346, |
| "std": 0.018000956624746323, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.00043215902405790985, |
| "max": 0.00046604761155322194, |
| "mean": 6.842553190722356e-09, |
| "std": 8.495857764501125e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.00026889159926213324, |
| "max": 0.0002754697925411165, |
| "mean": -3.8592878581766854e-07, |
| "std": 8.52422381285578e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3829193115234375, |
| "max": 0.7194843292236328, |
| "mean": 0.5807508826255798, |
| "std": 0.03887004032731056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23809631168842316, |
| "max": 0.1965617835521698, |
| "mean": 2.6561519916867837e-05, |
| "std": 0.03746955841779709, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11867669969797134, |
| "max": 0.1661195158958435, |
| "mean": 0.0009914024267345667, |
| "std": 0.02754930779337883, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.246256485581398, |
| "max": 0.5006742477416992, |
| "mean": -5.049802712164819e-05, |
| "std": 0.03762722760438919, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.9423649311065674, |
| "max": 3.7695066928863525, |
| "mean": -0.003572166431695223, |
| "std": 0.6814473271369934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22738605737686157, |
| "max": 0.2515488564968109, |
| "mean": -1.1636337148956954e-05, |
| "std": 0.03743850067257881, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07163971662521362, |
| "max": 0.08085085451602936, |
| "mean": -0.0005172090604901314, |
| "std": 0.015671856701374054, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22821125388145447, |
| "max": 0.25809258222579956, |
| "mean": -2.8563266823766753e-05, |
| "std": 0.03542532026767731, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20053939521312714, |
| "max": 0.2151157110929489, |
| "mean": -0.005536144133657217, |
| "std": 0.06835491210222244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4050634801387787, |
| "max": 1.1895967721939087, |
| "mean": 0.7380250096321106, |
| "std": 0.055244140326976776, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.22111129760742188, |
| "max": 0.24610112607479095, |
| "mean": 0.0005211608950048685, |
| "std": 0.04134161397814751, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.1035081148147583, |
| "max": 0.02415246143937111, |
| "mean": -0.03267139568924904, |
| "std": 0.0188875924795866, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.4493615925312042, |
| "max": 0.4224270284175873, |
| "mean": -0.00043286356958560646, |
| "std": 0.046902477741241455, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.25133273005485535, |
| "max": 0.47000864148139954, |
| "mean": 0.003200301667675376, |
| "std": 0.04454173892736435, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.31721800565719604, |
| "max": 0.3333887755870819, |
| "mean": -2.5312700017821044e-05, |
| "std": 0.021290434524416924, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3246031403541565, |
| "max": 0.6853436231613159, |
| "mean": 0.5710366368293762, |
| "std": 0.04471459612250328, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16453500092029572, |
| "max": 0.1740685999393463, |
| "mean": -4.849593824474141e-05, |
| "std": 0.03318438306450844, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.1867130845785141, |
| "max": 0.14271552860736847, |
| "mean": 4.246922617312521e-05, |
| "std": 0.02968418225646019, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.3805757164955139, |
| "max": 0.24612776935100555, |
| "mean": -9.95914979284862e-06, |
| "std": 0.03276544809341431, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.655998945236206, |
| "max": 3.29028582572937, |
| "mean": -0.014252795837819576, |
| "std": 0.9852345585823059, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23509258031845093, |
| "max": 0.24746716022491455, |
| "mean": -1.7896145436679944e-05, |
| "std": 0.041701558977365494, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.0727391242980957, |
| "max": 0.15445110201835632, |
| "mean": 0.0006684996769763529, |
| "std": 0.0251635629683733, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2665387690067291, |
| "max": 0.24852725863456726, |
| "mean": -1.545724444440566e-05, |
| "std": 0.040141962468624115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.1895304173231125, |
| "max": 0.1947212517261505, |
| "mean": -0.0012303038965910673, |
| "std": 0.06668580323457718, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32925331592559814, |
| "max": 0.9993983507156372, |
| "mean": 0.7192491888999939, |
| "std": 0.05233968794345856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23172250390052795, |
| "max": 0.24564699828624725, |
| "mean": 0.0001827301166485995, |
| "std": 0.040905360132455826, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11416275054216385, |
| "max": 0.01871776208281517, |
| "mean": -0.04247911646962166, |
| "std": 0.018855031579732895, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.3899572193622589, |
| "max": 0.4073238670825958, |
| "mean": -2.1964835468679667e-05, |
| "std": 0.048539649695158005, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6931350827217102, |
| "max": 0.4125315248966217, |
| "mean": 0.0008539482369087636, |
| "std": 0.060291603207588196, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.00041267118649557233, |
| "max": 1.0002655982971191, |
| "mean": 0.00048818063805811107, |
| "std": 0.022091196849942207, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9994122982025146, |
| "max": 1.0017499923706055, |
| "mean": 1.000227689743042, |
| "std": 0.0006477160495705903, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03126005083322525, |
| "max": 0.03126395121216774, |
| "mean": -2.102299185935408e-05, |
| "std": 0.018035007640719414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031219350174069405, |
| "max": 0.031236182898283005, |
| "mean": -0.0006771045736968517, |
| "std": 0.017829518765211105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03126310929656029, |
| "max": 0.03126853331923485, |
| "mean": -8.832646017253865e-06, |
| "std": 0.018034033477306366, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.03123609907925129, |
| "max": 0.0312487930059433, |
| "mean": -0.0007298641721718013, |
| "std": 0.0179444570094347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.00027907025651074946, |
| "max": 0.0002400849189143628, |
| "mean": 2.689231223484967e-06, |
| "std": 8.426107524428517e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9995393753051758, |
| "max": 1.00211501121521, |
| "mean": 1.0004167556762695, |
| "std": 0.0006692331517115235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.031639304012060165, |
| "max": 0.03170545771718025, |
| "mean": 2.9571647246484645e-06, |
| "std": 0.018044477328658104, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.031226763501763344, |
| "max": 0.03141167387366295, |
| "mean": 0.0003237017663195729, |
| "std": 0.018078280612826347, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.00040698132943362, |
| "max": 0.0004357137659098953, |
| "mean": 1.1018712484656135e-06, |
| "std": 8.384210377698764e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.0002671520342119038, |
| "max": 0.00023483953555114567, |
| "mean": 2.1393277620518347e-06, |
| "std": 8.360463834833354e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23457324504852295, |
| "max": 0.2725456655025482, |
| "mean": 7.03098658050294e-06, |
| "std": 0.018811851739883423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32138916850090027, |
| "max": 0.6936908960342407, |
| "mean": 0.5816767811775208, |
| "std": 0.04592788219451904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18192121386528015, |
| "max": 0.19770289957523346, |
| "mean": -1.1671071661112364e-05, |
| "std": 0.033187344670295715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16075590252876282, |
| "max": 0.12948612868785858, |
| "mean": -0.0010705746244639158, |
| "std": 0.03414509445428848, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.3322606384754181, |
| "max": 0.3115905225276947, |
| "mean": -1.047878777171718e-05, |
| "std": 0.032237909734249115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.803586006164551, |
| "max": 8.763325691223145, |
| "mean": 0.09346922487020493, |
| "std": 1.6197253465652466, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23383009433746338, |
| "max": 0.241935133934021, |
| "mean": 4.1345643694512546e-05, |
| "std": 0.04086088761687279, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.0759628489613533, |
| "max": 0.06582564860582352, |
| "mean": 0.0004808574158232659, |
| "std": 0.01941247656941414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24585530161857605, |
| "max": 0.23399215936660767, |
| "mean": -2.9465345505741425e-06, |
| "std": 0.03943563625216484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16296693682670593, |
| "max": 0.16089047491550446, |
| "mean": 0.001630417536944151, |
| "std": 0.06527554988861084, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5569126605987549, |
| "max": 0.9438663125038147, |
| "mean": 0.7129403352737427, |
| "std": 0.04013482853770256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.2285519540309906, |
| "max": 0.2551051676273346, |
| "mean": -4.54609798907768e-05, |
| "std": 0.040580034255981445, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.1348046511411667, |
| "max": 0.022271839901804924, |
| "mean": -0.04135382920503616, |
| "std": 0.01838485151529312, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4216119349002838, |
| "max": 0.3923768699169159, |
| "mean": -4.429011823958717e-06, |
| "std": 0.047790225595235825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6072338819503784, |
| "max": 0.651410698890686, |
| "mean": 0.0015874950913712382, |
| "std": 0.05684793367981911, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.2518640160560608, |
| "max": 0.3208119571208954, |
| "mean": -6.068093171052169e-06, |
| "std": 0.019615380093455315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.35968896746635437, |
| "max": 0.6824969053268433, |
| "mean": 0.5707405805587769, |
| "std": 0.04298046976327896, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.2206181287765503, |
| "max": 0.177145317196846, |
| "mean": -3.474977711448446e-05, |
| "std": 0.034301795065402985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16330677270889282, |
| "max": 0.2329079806804657, |
| "mean": 0.0003651169245131314, |
| "std": 0.032845281064510345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.26389849185943604, |
| "max": 0.23990698158740997, |
| "mean": -5.2482428145594895e-05, |
| "std": 0.033900897949934006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.855096817016602, |
| "max": 5.091324329376221, |
| "mean": 0.043882716447114944, |
| "std": 1.2292898893356323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24652959406375885, |
| "max": 0.25042256712913513, |
| "mean": 7.212234049802646e-05, |
| "std": 0.043991539627313614, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06257897615432739, |
| "max": 0.05448286980390549, |
| "mean": 0.0006493264227174222, |
| "std": 0.017185840755701065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2864921987056732, |
| "max": 0.2719077467918396, |
| "mean": -4.989763692719862e-05, |
| "std": 0.04298979416489601, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16073212027549744, |
| "max": 0.17026235163211823, |
| "mean": -0.0028884499333798885, |
| "std": 0.059281300753355026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5197049379348755, |
| "max": 0.9328829050064087, |
| "mean": 0.7135671973228455, |
| "std": 0.038414619863033295, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23812194168567657, |
| "max": 0.24923060834407806, |
| "mean": 0.0004647884052246809, |
| "std": 0.040460310876369476, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14453770220279694, |
| "max": 0.041513390839099884, |
| "mean": -0.039691261947155, |
| "std": 0.020545845851302147, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5327961444854736, |
| "max": 0.5830419063568115, |
| "mean": 6.150515218905639e-06, |
| "std": 0.04886715114116669, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5192174911499023, |
| "max": 0.493362694978714, |
| "mean": 0.002359903883188963, |
| "std": 0.05345294252038002, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.2736135721206665, |
| "max": 0.31528207659721375, |
| "mean": 1.917778718052432e-06, |
| "std": 0.020052393898367882, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.3661349415779114, |
| "max": 0.7114736437797546, |
| "mean": 0.5932135581970215, |
| "std": 0.045942142605781555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.2110714167356491, |
| "max": 0.19956345856189728, |
| "mean": 3.0644099751953036e-05, |
| "std": 0.03486814722418785, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18721066415309906, |
| "max": 0.20390057563781738, |
| "mean": 0.0009557952871546149, |
| "std": 0.031514741480350494, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.2895534336566925, |
| "max": 0.3397268056869507, |
| "mean": -4.745465412270278e-05, |
| "std": 0.034589096903800964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.877371311187744, |
| "max": 3.3874666690826416, |
| "mean": 0.014458310790359974, |
| "std": 0.8584496378898621, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.2243141233921051, |
| "max": 0.24994920194149017, |
| "mean": -4.160197022429202e-06, |
| "std": 0.04223477095365524, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.055164966732263565, |
| "max": 0.046595554798841476, |
| "mean": -1.914352469611913e-05, |
| "std": 0.01584389992058277, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.29299500584602356, |
| "max": 0.29091835021972656, |
| "mean": -7.332260793191381e-06, |
| "std": 0.041949693113565445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12478315085172653, |
| "max": 0.2593647241592407, |
| "mean": -0.0032380004413425922, |
| "std": 0.05315803363919258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4563468396663666, |
| "max": 0.8445391654968262, |
| "mean": 0.7056366205215454, |
| "std": 0.03522425889968872, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5114319920539856, |
| "max": 0.34831947088241577, |
| "mean": 0.0003425391623750329, |
| "std": 0.04020523279905319, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18698035180568695, |
| "max": 0.0395214818418026, |
| "mean": -0.039389487355947495, |
| "std": 0.021351324394345284, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5443570613861084, |
| "max": 0.556300938129425, |
| "mean": -7.182909030234441e-05, |
| "std": 0.05074186250567436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5117379426956177, |
| "max": 0.6643521785736084, |
| "mean": 0.002444902202114463, |
| "std": 0.04953118786215782, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3325459361076355, |
| "max": 0.26552852988243103, |
| "mean": 3.543416823958978e-06, |
| "std": 0.019390085712075233, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.3221725821495056, |
| "max": 0.7663489580154419, |
| "mean": 0.6510671973228455, |
| "std": 0.045311350375413895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.24964848160743713, |
| "max": 0.21960312128067017, |
| "mean": -2.564733222243376e-06, |
| "std": 0.0365014374256134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.3271941542625427, |
| "max": 0.2872978150844574, |
| "mean": -0.0006782531854696572, |
| "std": 0.038559023290872574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3100341856479645, |
| "max": 0.36996597051620483, |
| "mean": 6.477468559751287e-05, |
| "std": 0.036241985857486725, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.717563152313232, |
| "max": 5.807804584503174, |
| "mean": 0.037958286702632904, |
| "std": 1.4132274389266968, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.2217635214328766, |
| "max": 0.20596979558467865, |
| "mean": -7.51121697248891e-05, |
| "std": 0.04249033331871033, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07755438983440399, |
| "max": 0.051571402698755264, |
| "mean": -0.0009240633808076382, |
| "std": 0.016407648101449013, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.3310355246067047, |
| "max": 0.32923752069473267, |
| "mean": -4.983477538189618e-06, |
| "std": 0.04279704764485359, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2849341332912445, |
| "max": 0.11188604682683945, |
| "mean": -0.0012093198020011187, |
| "std": 0.04701279476284981, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4862992763519287, |
| "max": 0.8870015740394592, |
| "mean": 0.7375336289405823, |
| "std": 0.038240909576416016, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3614071309566498, |
| "max": 0.2742360532283783, |
| "mean": 5.11927210027352e-05, |
| "std": 0.040651749819517136, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24774602055549622, |
| "max": 0.04635339602828026, |
| "mean": -0.03926930949091911, |
| "std": 0.02325906977057457, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6263424754142761, |
| "max": 0.5970045328140259, |
| "mean": -5.938729736953974e-05, |
| "std": 0.05312504991889, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7097107172012329, |
| "max": 0.26584240794181824, |
| "mean": 0.0009143413626588881, |
| "std": 0.051234155893325806, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3434857726097107, |
| "max": 0.30358248949050903, |
| "mean": 1.7036518329405226e-07, |
| "std": 0.019139336422085762, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.3498973548412323, |
| "max": 0.782823920249939, |
| "mean": 0.6388742327690125, |
| "std": 0.04923625662922859, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20567476749420166, |
| "max": 0.20698602497577667, |
| "mean": -5.99086306465324e-05, |
| "std": 0.03769771382212639, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25861528515815735, |
| "max": 0.2681594491004944, |
| "mean": -0.00040319678373634815, |
| "std": 0.04461444541811943, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.354155570268631, |
| "max": 0.3225230574607849, |
| "mean": -7.215602636279073e-06, |
| "std": 0.03720592334866524, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.261765480041504, |
| "max": 4.204793453216553, |
| "mean": -0.026421742513775826, |
| "std": 1.0068086385726929, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23872113227844238, |
| "max": 0.24366846680641174, |
| "mean": -2.556562321842648e-05, |
| "std": 0.043214697390794754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.0623321607708931, |
| "max": 0.056722186505794525, |
| "mean": 0.0003460783918853849, |
| "std": 0.014153210446238518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.4375341534614563, |
| "max": 0.3737650513648987, |
| "mean": 1.4479240235232282e-05, |
| "std": 0.04412652924656868, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.0964576005935669, |
| "max": 0.1761614829301834, |
| "mean": -0.0006592521094717085, |
| "std": 0.035152681171894073, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4216594994068146, |
| "max": 1.0695232152938843, |
| "mean": 0.7485226988792419, |
| "std": 0.042068321257829666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2659566104412079, |
| "max": 0.2967792749404907, |
| "mean": -7.885666127549484e-05, |
| "std": 0.04081219807267189, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18503932654857635, |
| "max": 0.04330001026391983, |
| "mean": -0.03681433945894241, |
| "std": 0.025581127032637596, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4577261507511139, |
| "max": 0.4869215786457062, |
| "mean": 4.5667507947655395e-05, |
| "std": 0.05421961098909378, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.286339670419693, |
| "max": 0.5517974495887756, |
| "mean": -0.0008834124309942126, |
| "std": 0.047834936529397964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2927553355693817, |
| "max": 0.32282471656799316, |
| "mean": 6.005510840623174e-06, |
| "std": 0.01997239701449871, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2911321222782135, |
| "max": 0.7601316571235657, |
| "mean": 0.6508502960205078, |
| "std": 0.052130550146102905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.24379833042621613, |
| "max": 0.26165705919265747, |
| "mean": -5.548093668039655e-06, |
| "std": 0.03961396589875221, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.267425537109375, |
| "max": 0.20018436014652252, |
| "mean": -0.0008745841332711279, |
| "std": 0.05175970122218132, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.27216100692749023, |
| "max": 0.2537060081958771, |
| "mean": 4.9225500333704986e-06, |
| "std": 0.03871043771505356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.965754508972168, |
| "max": 15.947580337524414, |
| "mean": 0.03322947770357132, |
| "std": 1.9892938137054443, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20684319734573364, |
| "max": 0.22589777410030365, |
| "mean": -7.25259305909276e-05, |
| "std": 0.040558259934186935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06933015584945679, |
| "max": 0.06318464130163193, |
| "mean": 0.00015395943773910403, |
| "std": 0.014743377454578876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.4654809832572937, |
| "max": 0.3203279674053192, |
| "mean": 1.985491326195188e-05, |
| "std": 0.0405937097966671, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06401513516902924, |
| "max": 0.11543548107147217, |
| "mean": 0.0011928649619221687, |
| "std": 0.024708228185772896, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.37496218085289, |
| "max": 0.9319577217102051, |
| "mean": 0.7510663270950317, |
| "std": 0.04019522666931152, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.27932143211364746, |
| "max": 0.2732137441635132, |
| "mean": -0.00016841731849126518, |
| "std": 0.04100305214524269, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19859075546264648, |
| "max": 0.05119071155786514, |
| "mean": -0.032025426626205444, |
| "std": 0.02508244849741459, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6584441065788269, |
| "max": 0.5357497930526733, |
| "mean": -4.779139635502361e-05, |
| "std": 0.05285602807998657, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19279344379901886, |
| "max": 0.5823235511779785, |
| "mean": -0.0005150774959474802, |
| "std": 0.04108597710728645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.417548805475235, |
| "max": 0.3718253970146179, |
| "mean": 6.455363291024696e-06, |
| "std": 0.021627577021718025, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.2144990712404251, |
| "max": 0.7469203472137451, |
| "mean": 0.6495254039764404, |
| "std": 0.054346147924661636, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.2095586657524109, |
| "max": 0.19582423567771912, |
| "mean": 4.027899194625206e-05, |
| "std": 0.039461854845285416, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.3295177221298218, |
| "max": 0.25955715775489807, |
| "mean": -0.003232627874240279, |
| "std": 0.056272272020578384, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.20599152147769928, |
| "max": 0.2547609806060791, |
| "mean": 5.4062355047790334e-05, |
| "std": 0.0385642908513546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.2438578605651855, |
| "max": 6.932709217071533, |
| "mean": 0.0483400858938694, |
| "std": 1.3851662874221802, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.2099662721157074, |
| "max": 0.23050634562969208, |
| "mean": -4.679883659264306e-06, |
| "std": 0.04131751507520676, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04376063123345375, |
| "max": 0.03601124510169029, |
| "mean": -5.941561539657414e-06, |
| "std": 0.012793137691915035, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.39767444133758545, |
| "max": 0.34496286511421204, |
| "mean": -5.524931111722253e-05, |
| "std": 0.04239441454410553, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.055049996823072433, |
| "max": 0.06284762173891068, |
| "mean": 0.0003571161942090839, |
| "std": 0.018672263249754906, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.35070401430130005, |
| "max": 1.045300006866455, |
| "mean": 0.7896326184272766, |
| "std": 0.04874366521835327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3336896300315857, |
| "max": 0.38648444414138794, |
| "mean": -0.00016903391224332154, |
| "std": 0.04148908331990242, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15745577216148376, |
| "max": 0.05912669003009796, |
| "mean": -0.03182134032249451, |
| "std": 0.02510516531765461, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6961610317230225, |
| "max": 0.46920138597488403, |
| "mean": -8.453470945823938e-05, |
| "std": 0.051804590970277786, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.2479942888021469, |
| "max": 0.32869523763656616, |
| "mean": -0.00026210874784737825, |
| "std": 0.04145258665084839, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2870560884475708, |
| "max": 0.3504050374031067, |
| "mean": -2.7076764581579482e-06, |
| "std": 0.024242233484983444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19671642780303955, |
| "max": 0.779133677482605, |
| "mean": 0.6702357530593872, |
| "std": 0.058674510568380356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.2289534956216812, |
| "max": 0.23123182356357574, |
| "mean": -2.0453815523069352e-05, |
| "std": 0.040439117699861526, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22002340853214264, |
| "max": 0.24095596373081207, |
| "mean": 0.0007837469456717372, |
| "std": 0.05583859235048294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.2165220081806183, |
| "max": 0.22644749283790588, |
| "mean": -7.203388668131083e-05, |
| "std": 0.03937385976314545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.905970573425293, |
| "max": 9.068842887878418, |
| "mean": -0.001253342255949974, |
| "std": 1.848394513130188, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2694862186908722, |
| "max": 0.2589434087276459, |
| "mean": 4.364973574411124e-05, |
| "std": 0.038410402834415436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05793758109211922, |
| "max": 0.05797392502427101, |
| "mean": 0.0003538080782163888, |
| "std": 0.01471701916307211, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.26422733068466187, |
| "max": 0.28839007019996643, |
| "mean": -6.168079562485218e-05, |
| "std": 0.039077237248420715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04391145706176758, |
| "max": 0.03739985078573227, |
| "mean": -9.783620771486312e-05, |
| "std": 0.013347266241908073, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.3393727242946625, |
| "max": 1.0925297737121582, |
| "mean": 0.8639394640922546, |
| "std": 0.06387537717819214, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.4232022762298584, |
| "max": 0.41904953122138977, |
| "mean": 0.000313526950776577, |
| "std": 0.043511807918548584, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21472877264022827, |
| "max": 0.1706702560186386, |
| "mean": -0.029442301020026207, |
| "std": 0.03188013657927513, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5987806916236877, |
| "max": 0.5598706007003784, |
| "mean": -0.00014896712673362345, |
| "std": 0.05345924198627472, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17873696982860565, |
| "max": 0.3771279752254486, |
| "mean": 0.001353989471681416, |
| "std": 0.037307873368263245, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.39440417289733887, |
| "max": 0.36891528964042664, |
| "mean": 3.757418380700983e-05, |
| "std": 0.028618069365620613, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2904903292655945, |
| "max": 0.8274624347686768, |
| "mean": 0.7055505514144897, |
| "std": 0.06785926967859268, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9264864325523376, |
| "max": 1.0268279314041138, |
| "mean": -2.7663820219459012e-05, |
| "std": 0.04763999581336975, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8792101740837097, |
| "max": 0.8157498240470886, |
| "mean": -0.00029962146072648466, |
| "std": 0.09555412083864212, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.26960939168930054, |
| "max": 0.24089379608631134, |
| "mean": -2.2403137336368673e-05, |
| "std": 0.038951266556978226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.743011474609375, |
| "max": 22.851470947265625, |
| "mean": -0.09188262373209, |
| "std": 4.07051944732666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.2278052568435669, |
| "max": 0.2454863339662552, |
| "mean": -2.561333167250268e-05, |
| "std": 0.0386415459215641, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.060211505740880966, |
| "max": 0.04552706331014633, |
| "mean": -0.00013798139116261154, |
| "std": 0.014687996357679367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.3381482660770416, |
| "max": 0.3747510015964508, |
| "mean": 7.467011528206058e-06, |
| "std": 0.04082018882036209, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04627379775047302, |
| "max": 0.19550754129886627, |
| "mean": 0.00027567092911340296, |
| "std": 0.01355433464050293, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.3735484182834625, |
| "max": 1.130308985710144, |
| "mean": 0.8902099132537842, |
| "std": 0.06400929391384125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.4475434124469757, |
| "max": 0.5425565838813782, |
| "mean": 2.4953253159765154e-05, |
| "std": 0.0455789715051651, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.223903626203537, |
| "max": 0.08773155510425568, |
| "mean": -0.0320122167468071, |
| "std": 0.03775562718510628, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7259138822555542, |
| "max": 0.6885775923728943, |
| "mean": 3.529630339471623e-05, |
| "std": 0.05179176479578018, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.17448118329048157, |
| "max": 0.2181989699602127, |
| "mean": 3.60202684532851e-05, |
| "std": 0.03176648169755936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.33964094519615173, |
| "max": 0.3732447624206543, |
| "mean": 4.3327472667442635e-05, |
| "std": 0.03413660451769829, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.31786343455314636, |
| "max": 1.2872315645217896, |
| "mean": 0.6015468835830688, |
| "std": 0.08348662406206131, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2830578088760376, |
| "max": 0.26022711396217346, |
| "mean": -2.739398723861086e-06, |
| "std": 0.03598024696111679, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.2355988472700119, |
| "max": 0.205682173371315, |
| "mean": 0.00023985601728782058, |
| "std": 0.05602918937802315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.4354512095451355, |
| "max": 0.3249225318431854, |
| "mean": 2.4408442186540924e-05, |
| "std": 0.034124936908483505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.546271324157715, |
| "max": 7.313862323760986, |
| "mean": -0.007370356470346451, |
| "std": 0.6993649005889893, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.3440709412097931, |
| "max": 0.3629132807254791, |
| "mean": 0.00010299268615199253, |
| "std": 0.04783618077635765, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07372982054948807, |
| "max": 0.060475897043943405, |
| "mean": 0.0009333858033642173, |
| "std": 0.014939810149371624, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2562521696090698, |
| "max": 0.2865331768989563, |
| "mean": 4.6935901991673745e-06, |
| "std": 0.04156438633799553, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05538477003574371, |
| "max": 0.06286550313234329, |
| "mean": 0.00012986664660274982, |
| "std": 0.007165286689996719, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49387338757514954, |
| "max": 1.2207623720169067, |
| "mean": 1.0135465860366821, |
| "std": 0.11748857796192169, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0939587354660034, |
| "max": 1.0474854707717896, |
| "mean": -4.887886461801827e-05, |
| "std": 0.052416812628507614, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22367814183235168, |
| "max": 0.17331884801387787, |
| "mean": -0.027228882536292076, |
| "std": 0.03631311282515526, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8839902877807617, |
| "max": 0.9222039580345154, |
| "mean": -0.00014613418898079544, |
| "std": 0.0532962903380394, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17102710902690887, |
| "max": 0.37978917360305786, |
| "mean": 0.0033693695440888405, |
| "std": 0.03987928107380867, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7775930762290955, |
| "max": 0.7230536341667175, |
| "mean": 1.795422940631397e-05, |
| "std": 0.04615578427910805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.33868706226348877, |
| "max": 1.428168535232544, |
| "mean": 0.948466420173645, |
| "std": 0.206797257065773, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7458410263061523, |
| "max": 1.7044554948806763, |
| "mean": 0.00022709151380695403, |
| "std": 0.1587017923593521, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.199466586112976, |
| "max": 1.1009190082550049, |
| "mean": -0.009544244036078453, |
| "std": 0.20388931035995483, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4210292100906372, |
| "max": 0.42779824137687683, |
| "mean": 6.407736509572715e-05, |
| "std": 0.04801918938755989, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.747289657592773, |
| "max": 19.542404174804688, |
| "mean": -0.24833638966083527, |
| "std": 4.7769317626953125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32385140657424927, |
| "max": 0.4385547339916229, |
| "mean": -1.1735279258573428e-05, |
| "std": 0.04616609960794449, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.0338931679725647, |
| "max": 0.036946121603250504, |
| "mean": 0.0006420122808776796, |
| "std": 0.012915823608636856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7037211656570435, |
| "max": 0.668102502822876, |
| "mean": 4.292663652449846e-05, |
| "std": 0.05789082497358322, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07235053181648254, |
| "max": 0.06769613176584244, |
| "mean": -0.0001348661899100989, |
| "std": 0.01290997676551342, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38041582703590393, |
| "max": 1.3927761316299438, |
| "mean": 1.06671142578125, |
| "std": 0.21977396309375763, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6164037585258484, |
| "max": 0.7183761596679688, |
| "mean": 0.00011247429210925475, |
| "std": 0.05802652984857559, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21889959275722504, |
| "max": 0.22502842545509338, |
| "mean": 0.006201672367751598, |
| "std": 0.049709536135196686, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6296432018280029, |
| "max": 0.8894878029823303, |
| "mean": 1.1972185347985942e-05, |
| "std": 0.02354392781853676, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5068784356117249, |
| "max": 0.47380438446998596, |
| "mean": -0.0030183307826519012, |
| "std": 0.06925629079341888, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5380737781524658, |
| "max": 1.1801798343658447, |
| "mean": 0.7828105092048645, |
| "std": 0.09876621514558792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2670763432979584, |
| "max": 0.21297039091587067, |
| "mean": -0.0002238377055618912, |
| "std": 0.05400474742054939, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23828226327896118, |
| "max": 0.014816822484135628, |
| "mean": -0.043933507055044174, |
| "std": 0.034287311136722565, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |