| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.429890900850296, |
| "max": 0.2975340783596039, |
| "mean": -0.002528043230995536, |
| "std": 0.042567234486341476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06285920739173889, |
| "max": 0.10713651776313782, |
| "mean": 0.0006724470877088606, |
| "std": 0.03401060774922371, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4127168655395508, |
| "max": 0.8372595310211182, |
| "mean": -0.0001970978337340057, |
| "std": 0.024115173146128654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11470083892345428, |
| "max": 0.3203592598438263, |
| "mean": -0.0009399179834872484, |
| "std": 0.019510779529809952, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.786435842514038, |
| "max": 2.8647964000701904, |
| "mean": -0.00036496162647381425, |
| "std": 0.6155204772949219, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.2788304090499878, |
| "max": 0.38129961490631104, |
| "mean": 0.00042573572136461735, |
| "std": 0.042747072875499725, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22175073623657227, |
| "max": 0.208872988820076, |
| "mean": -0.0044786068610847, |
| "std": 0.040869712829589844, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4284111559391022, |
| "max": 0.47638577222824097, |
| "mean": 4.7679491217422765e-06, |
| "std": 0.024512330070137978, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32299283146858215, |
| "max": 0.15659146010875702, |
| "mean": -0.04666333645582199, |
| "std": 0.051485899835824966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.41033437848091125, |
| "max": 0.35466355085372925, |
| "mean": -0.00013342559395823628, |
| "std": 0.023606186732649803, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.2283795177936554, |
| "max": 0.2609671354293823, |
| "mean": -0.029088540002703667, |
| "std": 0.04924432560801506, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.25455695390701294, |
| "max": 0.8167241811752319, |
| "mean": 0.5252928733825684, |
| "std": 0.08043710887432098, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.29693663120269775, |
| "max": 0.26587796211242676, |
| "mean": -0.00042661806219257414, |
| "std": 0.03210223466157913, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09257981181144714, |
| "max": 0.12483392655849457, |
| "mean": 0.0006469582440331578, |
| "std": 0.02571757137775421, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.29060953855514526, |
| "max": 0.281120628118515, |
| "mean": -7.341133459703997e-05, |
| "std": 0.030930932611227036, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.8982954025268555, |
| "max": 5.813107013702393, |
| "mean": -0.009337348863482475, |
| "std": 1.2953522205352783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.42515280842781067, |
| "max": 0.3437501788139343, |
| "mean": 9.81355260591954e-05, |
| "std": 0.029954733327031136, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028982222080230713, |
| "max": 0.027547072619199753, |
| "mean": -0.0003299822274129838, |
| "std": 0.012570270337164402, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4541594088077545, |
| "max": 0.44774138927459717, |
| "mean": 2.4147137082763948e-05, |
| "std": 0.02385564148426056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08854468911886215, |
| "max": 0.09074825048446655, |
| "mean": 0.0022885985672473907, |
| "std": 0.019506951794028282, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.2667747437953949, |
| "max": 1.0526666641235352, |
| "mean": 0.5310115814208984, |
| "std": 0.10401110351085663, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5744121670722961, |
| "max": 0.6080161333084106, |
| "mean": -0.00042898603715002537, |
| "std": 0.038603950291872025, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.1828344166278839, |
| "max": 0.04558030515909195, |
| "mean": -0.02944895066320896, |
| "std": 0.04260854050517082, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1668061017990112, |
| "max": 1.6334388256072998, |
| "mean": 0.0003250878071412444, |
| "std": 0.02769906260073185, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.1617957502603531, |
| "max": 0.20511887967586517, |
| "mean": -0.021121997386217117, |
| "std": 0.027915872633457184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22389063239097595, |
| "max": 0.8404398560523987, |
| "mean": 0.48753583431243896, |
| "std": 0.07487782090902328, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.25540560483932495, |
| "max": 0.30576375126838684, |
| "mean": -5.286063242238015e-06, |
| "std": 0.0334775373339653, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09518040716648102, |
| "max": 0.11029241979122162, |
| "mean": 7.437964086420834e-05, |
| "std": 0.026927735656499863, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.29654812812805176, |
| "max": 0.29580071568489075, |
| "mean": 5.465543654281646e-05, |
| "std": 0.03255033493041992, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.152629852294922, |
| "max": 5.073052883148193, |
| "mean": -0.014528467319905758, |
| "std": 1.1556384563446045, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.34482821822166443, |
| "max": 0.3431924283504486, |
| "mean": 7.847632514312863e-05, |
| "std": 0.030065450817346573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.0359608419239521, |
| "max": 0.03339020535349846, |
| "mean": -0.00013936487084720284, |
| "std": 0.013043079525232315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.31543099880218506, |
| "max": 0.37475085258483887, |
| "mean": -1.99221267394023e-05, |
| "std": 0.024063827469944954, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.1053055077791214, |
| "max": 0.12205620855093002, |
| "mean": -0.0019772218074649572, |
| "std": 0.028851687908172607, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.31148025393486023, |
| "max": 1.1159186363220215, |
| "mean": 0.6660937070846558, |
| "std": 0.09731028974056244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8725345730781555, |
| "max": 0.6275786757469177, |
| "mean": 0.0016754826065152884, |
| "std": 0.04743966832756996, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27123701572418213, |
| "max": 0.034363195300102234, |
| "mean": -0.04658954590559006, |
| "std": 0.040568556636571884, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9233484268188477, |
| "max": 0.9644548296928406, |
| "mean": 0.001022880314849317, |
| "std": 0.040709808468818665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14417493343353271, |
| "max": 0.07486628741025925, |
| "mean": -0.00909160915762186, |
| "std": 0.025672299787402153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.24042263627052307, |
| "max": 0.7109521627426147, |
| "mean": 0.4471237063407898, |
| "std": 0.05905117839574814, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.2719106674194336, |
| "max": 0.29774755239486694, |
| "mean": 9.55516952672042e-06, |
| "std": 0.035470303148031235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11921010911464691, |
| "max": 0.11835695803165436, |
| "mean": 0.0007637137896381319, |
| "std": 0.027623096480965614, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.28068092465400696, |
| "max": 0.2797088027000427, |
| "mean": -7.736143015790731e-05, |
| "std": 0.03509894013404846, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.503926992416382, |
| "max": 2.515892505645752, |
| "mean": 0.02668764814734459, |
| "std": 0.5862060785293579, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.22096332907676697, |
| "max": 0.2714470624923706, |
| "mean": 3.3548758437973447e-06, |
| "std": 0.030734958127141, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.0337090790271759, |
| "max": 0.03134975582361221, |
| "mean": 0.00010986338020302355, |
| "std": 0.012415189296007156, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.2351670116186142, |
| "max": 0.23143303394317627, |
| "mean": 5.6707456678850576e-05, |
| "std": 0.025697972625494003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13545046746730804, |
| "max": 0.12696555256843567, |
| "mean": -0.00549742579460144, |
| "std": 0.03995845839381218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.35431793332099915, |
| "max": 1.168055772781372, |
| "mean": 0.7104406356811523, |
| "std": 0.10342107713222504, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6171623468399048, |
| "max": 0.5538070201873779, |
| "mean": 0.0011603726306930184, |
| "std": 0.04612257331609726, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.187709778547287, |
| "max": 0.025375014171004295, |
| "mean": -0.03482068330049515, |
| "std": 0.028561368584632874, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1314054727554321, |
| "max": 0.9714292287826538, |
| "mean": 0.0003602738433983177, |
| "std": 0.0423499159514904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5970888137817383, |
| "max": 0.06280609965324402, |
| "mean": -0.004877342376857996, |
| "std": 0.028585655614733696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.37514442205429077, |
| "max": 0.9365863800048828, |
| "mean": 0.5923141837120056, |
| "std": 0.06635680049657822, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3909958004951477, |
| "max": 0.36877286434173584, |
| "mean": 7.174501661211252e-05, |
| "std": 0.037190962582826614, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11852732300758362, |
| "max": 0.13606122136116028, |
| "mean": 0.0009374335058964789, |
| "std": 0.02925141341984272, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6188546419143677, |
| "max": 0.508575975894928, |
| "mean": 1.5391087799798697e-05, |
| "std": 0.03644438832998276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.168816566467285, |
| "max": 8.769427299499512, |
| "mean": -0.10911353677511215, |
| "std": 1.696131944656372, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.2764376997947693, |
| "max": 0.2397889643907547, |
| "mean": 5.34953796886839e-05, |
| "std": 0.03261784091591835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05230281502008438, |
| "max": 0.03951656445860863, |
| "mean": 8.823134703561664e-05, |
| "std": 0.01295400783419609, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23082277178764343, |
| "max": 0.23429568111896515, |
| "mean": -2.1679703422705643e-05, |
| "std": 0.0293941181153059, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20415563881397247, |
| "max": 0.1055976152420044, |
| "mean": -0.004027670249342918, |
| "std": 0.03260914608836174, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3398659825325012, |
| "max": 1.008574366569519, |
| "mean": 0.7007372975349426, |
| "std": 0.09649426490068436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5645706057548523, |
| "max": 0.8320877552032471, |
| "mean": 0.00041511692688800395, |
| "std": 0.042306262999773026, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21099911630153656, |
| "max": 0.03097626566886902, |
| "mean": -0.032180383801460266, |
| "std": 0.026477735489606857, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7537994980812073, |
| "max": 0.7179465293884277, |
| "mean": -7.129359801183455e-06, |
| "std": 0.03684566915035248, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.2629236578941345, |
| "max": 0.10548774898052216, |
| "mean": -0.00303501239977777, |
| "std": 0.028845027089118958, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28467807173728943, |
| "max": 0.6921964883804321, |
| "mean": 0.49945610761642456, |
| "std": 0.04626332223415375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.279328316450119, |
| "max": 0.23436570167541504, |
| "mean": -0.00011136279499623924, |
| "std": 0.03876578062772751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15460819005966187, |
| "max": 0.12665635347366333, |
| "mean": -0.002232019789516926, |
| "std": 0.03342032432556152, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41363096237182617, |
| "max": 0.6597210764884949, |
| "mean": -2.0344648874015547e-05, |
| "std": 0.03910161554813385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.231404781341553, |
| "max": 4.715085029602051, |
| "mean": -0.020485566928982735, |
| "std": 1.0069705247879028, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.2449151873588562, |
| "max": 0.20747897028923035, |
| "mean": 4.346559217083268e-05, |
| "std": 0.033968474715948105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03452696651220322, |
| "max": 0.04465686157345772, |
| "mean": -1.5960962628014386e-05, |
| "std": 0.012621430680155754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20041236281394958, |
| "max": 0.20551952719688416, |
| "mean": -2.960992424050346e-05, |
| "std": 0.031025830656290054, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.19978956878185272, |
| "max": 0.11348189413547516, |
| "mean": -0.002926791785284877, |
| "std": 0.034484151750802994, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.36731821298599243, |
| "max": 1.0521864891052246, |
| "mean": 0.6705360412597656, |
| "std": 0.06614020466804504, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.39791443943977356, |
| "max": 0.5023131966590881, |
| "mean": -3.831370850093663e-05, |
| "std": 0.04114069044589996, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.1279803365468979, |
| "max": 0.026696184650063515, |
| "mean": -0.030547261238098145, |
| "std": 0.021858656778931618, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.44846877455711365, |
| "max": 0.43229183554649353, |
| "mean": 8.759970660321414e-05, |
| "std": 0.034898921847343445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.2670278549194336, |
| "max": 0.07220447063446045, |
| "mean": -0.0011172632221132517, |
| "std": 0.023101668804883957, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.2872157692909241, |
| "max": 0.6838868260383606, |
| "mean": 0.5244971513748169, |
| "std": 0.047394201159477234, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22190631926059723, |
| "max": 0.22351428866386414, |
| "mean": 1.5601781342411414e-05, |
| "std": 0.038955170661211014, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13637839257717133, |
| "max": 0.10904650390148163, |
| "mean": 0.0002307215763721615, |
| "std": 0.02925163321197033, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37520402669906616, |
| "max": 0.4367537200450897, |
| "mean": -9.730283636599779e-06, |
| "std": 0.03929009288549423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8370232582092285, |
| "max": 4.988061904907227, |
| "mean": 0.0097434613853693, |
| "std": 0.8443066477775574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22342386841773987, |
| "max": 0.21985094249248505, |
| "mean": -9.139148460235447e-08, |
| "std": 0.034415289759635925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04353320971131325, |
| "max": 0.03576282411813736, |
| "mean": -0.0002566012553870678, |
| "std": 0.012079274281859398, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.2132977545261383, |
| "max": 0.18884801864624023, |
| "mean": -1.671975405770354e-05, |
| "std": 0.031542494893074036, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.1805061399936676, |
| "max": 0.12078476697206497, |
| "mean": -0.0024164910428225994, |
| "std": 0.041246652603149414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.42202678322792053, |
| "max": 0.9410442113876343, |
| "mean": 0.6627340912818909, |
| "std": 0.056649643927812576, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.3713216483592987, |
| "max": 0.47501668334007263, |
| "mean": -8.242137118941173e-05, |
| "std": 0.04089945927262306, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20792357623577118, |
| "max": 0.027002831920981407, |
| "mean": -0.03024197369813919, |
| "std": 0.02132386527955532, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.33984270691871643, |
| "max": 0.7327128648757935, |
| "mean": 8.53092860779725e-05, |
| "std": 0.03477407246828079, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.23982134461402893, |
| "max": 0.050322338938713074, |
| "mean": -0.0011965972371399403, |
| "std": 0.020453661680221558, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3062271773815155, |
| "max": 0.6509252786636353, |
| "mean": 0.5250095725059509, |
| "std": 0.04592073708772659, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30402758717536926, |
| "max": 0.21729634702205658, |
| "mean": 7.005365478107706e-05, |
| "std": 0.03949893265962601, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14918896555900574, |
| "max": 0.13127601146697998, |
| "mean": 0.00036064194864593446, |
| "std": 0.030438335612416267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.25730884075164795, |
| "max": 0.20225763320922852, |
| "mean": 3.0886923923389986e-05, |
| "std": 0.03948678448796272, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.334343671798706, |
| "max": 2.3739240169525146, |
| "mean": -0.02623903937637806, |
| "std": 0.4496191143989563, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.1891229748725891, |
| "max": 0.21049852669239044, |
| "mean": 3.720186577993445e-05, |
| "std": 0.03480042889714241, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03178652375936508, |
| "max": 0.03553091734647751, |
| "mean": -0.0002019420498982072, |
| "std": 0.012286705896258354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18846523761749268, |
| "max": 0.1703805774450302, |
| "mean": -6.774859502911568e-05, |
| "std": 0.032177072018384933, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13940556347370148, |
| "max": 0.13744769990444183, |
| "mean": -0.0025155385956168175, |
| "std": 0.051295846700668335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4672105014324188, |
| "max": 0.9528681039810181, |
| "mean": 0.6688433885574341, |
| "std": 0.05244635045528412, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.3241286277770996, |
| "max": 0.3096275329589844, |
| "mean": -1.696625076874625e-06, |
| "std": 0.04095519334077835, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.1246853619813919, |
| "max": 0.025154586881399155, |
| "mean": -0.03071470744907856, |
| "std": 0.019795699045062065, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.43982067704200745, |
| "max": 0.44470375776290894, |
| "mean": 9.459229477215558e-05, |
| "std": 0.03512655198574066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22400110960006714, |
| "max": 0.05141644552350044, |
| "mean": -0.0011801186483353376, |
| "std": 0.018454499542713165, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.33955061435699463, |
| "max": 0.7357662320137024, |
| "mean": 0.55861496925354, |
| "std": 0.04118064045906067, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.2722431421279907, |
| "max": 0.27798357605934143, |
| "mean": 1.9865790818585083e-05, |
| "std": 0.04106421023607254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.1370246559381485, |
| "max": 0.1397887021303177, |
| "mean": 0.0004894830053672194, |
| "std": 0.026618896052241325, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.4905315637588501, |
| "max": 0.3558432161808014, |
| "mean": 8.873307524481788e-05, |
| "std": 0.04070229455828667, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.291904926300049, |
| "max": 1.7411547899246216, |
| "mean": -0.02105572447180748, |
| "std": 0.4997440576553345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.2170916199684143, |
| "max": 0.19797761738300323, |
| "mean": -4.09621607104782e-05, |
| "std": 0.034239448606967926, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.0413656160235405, |
| "max": 0.038547735661268234, |
| "mean": -0.00015065219486132264, |
| "std": 0.012881237082183361, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17731794714927673, |
| "max": 0.18395833671092987, |
| "mean": 4.7481313231401145e-05, |
| "std": 0.03156236186623573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.17941592633724213, |
| "max": 0.18339262902736664, |
| "mean": -0.0022199342492967844, |
| "std": 0.05482170730829239, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4743531346321106, |
| "max": 1.0208531618118286, |
| "mean": 0.6452549695968628, |
| "std": 0.04991196468472481, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.2717994153499603, |
| "max": 0.3095380365848541, |
| "mean": 0.00011231788084842265, |
| "std": 0.04069165140390396, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10581093281507492, |
| "max": 0.02687394618988037, |
| "mean": -0.029505720362067223, |
| "std": 0.01791212521493435, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.3386741280555725, |
| "max": 0.3290008306503296, |
| "mean": 5.870793393114582e-05, |
| "std": 0.03442065790295601, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18140022456645966, |
| "max": 0.041891518980264664, |
| "mean": -0.0010755020193755627, |
| "std": 0.017211386933922768, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.32555529475212097, |
| "max": 0.6836872696876526, |
| "mean": 0.5111882090568542, |
| "std": 0.03670286759734154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.2333182841539383, |
| "max": 0.22538095712661743, |
| "mean": -3.595184534788132e-05, |
| "std": 0.03918481990695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11544553935527802, |
| "max": 0.13142207264900208, |
| "mean": 0.00015133176930248737, |
| "std": 0.029199015349149704, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3520807921886444, |
| "max": 0.2848276197910309, |
| "mean": 7.631589141965378e-06, |
| "std": 0.03925250843167305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.123228073120117, |
| "max": 3.5356757640838623, |
| "mean": -0.011553899385035038, |
| "std": 0.6816845536231995, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.2112175077199936, |
| "max": 0.20856595039367676, |
| "mean": 3.472584648989141e-05, |
| "std": 0.03449223190546036, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03566575422883034, |
| "max": 0.0481027290225029, |
| "mean": 0.0007965473923832178, |
| "std": 0.01284803170710802, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21010246872901917, |
| "max": 0.19273991882801056, |
| "mean": -1.5139250990614528e-06, |
| "std": 0.031702835112810135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.1862909346818924, |
| "max": 0.17676132917404175, |
| "mean": -0.0028484249487519264, |
| "std": 0.0586179718375206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.47472548484802246, |
| "max": 1.0383955240249634, |
| "mean": 0.6513745784759521, |
| "std": 0.049231819808483124, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24837423861026764, |
| "max": 0.3289947211742401, |
| "mean": 0.00018063507741317153, |
| "std": 0.04057996720075607, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.1235797256231308, |
| "max": 0.024505803361535072, |
| "mean": -0.0304916650056839, |
| "std": 0.01757434755563736, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.4211723804473877, |
| "max": 0.48196032643318176, |
| "mean": 1.983910806302447e-06, |
| "std": 0.03540581464767456, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.1518622189760208, |
| "max": 0.04325510933995247, |
| "mean": 3.965849464293569e-05, |
| "std": 0.014866944402456284, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31559497117996216, |
| "max": 0.6791313290596008, |
| "mean": 0.552861213684082, |
| "std": 0.040544018149375916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20591191947460175, |
| "max": 0.21929602324962616, |
| "mean": 3.05178873531986e-05, |
| "std": 0.03830549493432045, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13762998580932617, |
| "max": 0.11262793093919754, |
| "mean": 2.1001505956519395e-05, |
| "std": 0.02581183984875679, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.4020220637321472, |
| "max": 0.3705553412437439, |
| "mean": 2.6537300072959624e-05, |
| "std": 0.03818797320127487, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.767557382583618, |
| "max": 2.8661978244781494, |
| "mean": 0.00114790303632617, |
| "std": 0.5165696144104004, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.2021435797214508, |
| "max": 0.19701559841632843, |
| "mean": 2.942326318589039e-05, |
| "std": 0.03430229425430298, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.051028795540332794, |
| "max": 0.03999846801161766, |
| "mean": -0.0004189596220385283, |
| "std": 0.01342750433832407, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19608062505722046, |
| "max": 0.20127296447753906, |
| "mean": -1.228029668709496e-05, |
| "std": 0.0318099670112133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19270533323287964, |
| "max": 0.1945824921131134, |
| "mean": -0.0029681914020329714, |
| "std": 0.06255524605512619, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.34965983033180237, |
| "max": 1.0794146060943604, |
| "mean": 0.6671044826507568, |
| "std": 0.054688673466444016, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22492384910583496, |
| "max": 0.2511879801750183, |
| "mean": 0.0003592889988794923, |
| "std": 0.04076888784766197, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.0908823236823082, |
| "max": 0.04379650950431824, |
| "mean": -0.030081426724791527, |
| "std": 0.01758776418864727, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.35308927297592163, |
| "max": 0.3038119673728943, |
| "mean": -4.2369181755930185e-05, |
| "std": 0.03713066130876541, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16173776984214783, |
| "max": 0.06332767009735107, |
| "mean": -8.476080256514251e-05, |
| "std": 0.019383691251277924, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34886276721954346, |
| "max": 0.7204337120056152, |
| "mean": 0.5423545241355896, |
| "std": 0.03890771418809891, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.2189498394727707, |
| "max": 0.22237031161785126, |
| "mean": -1.0949186616926454e-05, |
| "std": 0.03923875838518143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11818630248308182, |
| "max": 0.1705242395401001, |
| "mean": 0.0002858135849237442, |
| "std": 0.025103183463215828, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24609290063381195, |
| "max": 0.30029821395874023, |
| "mean": -3.647123230621219e-05, |
| "std": 0.03893830627202988, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5019514560699463, |
| "max": 3.711169481277466, |
| "mean": 0.015843264758586884, |
| "std": 0.7819090485572815, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21829943358898163, |
| "max": 0.23758333921432495, |
| "mean": -1.3816705177305266e-05, |
| "std": 0.03631007671356201, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04714132845401764, |
| "max": 0.051366791129112244, |
| "mean": 0.00047747697681188583, |
| "std": 0.01350868958979845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21323293447494507, |
| "max": 0.2170214205980301, |
| "mean": 5.658239751937799e-05, |
| "std": 0.033622127026319504, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21135154366493225, |
| "max": 0.23155677318572998, |
| "mean": -0.005110344383865595, |
| "std": 0.06187622249126434, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36206167936325073, |
| "max": 1.097632884979248, |
| "mean": 0.6992448568344116, |
| "std": 0.05318887159228325, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23417295515537262, |
| "max": 0.2448265254497528, |
| "mean": 0.0004635582445189357, |
| "std": 0.04127749800682068, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09782810509204865, |
| "max": 0.06829667091369629, |
| "mean": -0.031430259346961975, |
| "std": 0.018095970153808594, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.30144715309143066, |
| "max": 0.3511406481266022, |
| "mean": -8.084578439593315e-05, |
| "std": 0.04028310999274254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15208296477794647, |
| "max": 0.1494162231683731, |
| "mean": 0.0002504626754671335, |
| "std": 0.023021113127470016, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9953764081001282, |
| "max": 1.0005042552947998, |
| "mean": 0.9992995858192444, |
| "std": 0.00161725003272295, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031269513070583344, |
| "max": 0.031265489757061005, |
| "mean": -1.9295868696644902e-05, |
| "std": 0.018045131117105484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031223546713590622, |
| "max": 0.0309983491897583, |
| "mean": -0.0010843857889994979, |
| "std": 0.017954815179109573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.03126491606235504, |
| "max": 0.03126438334584236, |
| "mean": 3.5442317312117666e-06, |
| "std": 0.018045514822006226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031160738319158554, |
| "max": 0.03118434175848961, |
| "mean": 0.00033380728564225137, |
| "std": 0.01806693710386753, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0004188704479020089, |
| "max": 0.00032652742811478674, |
| "mean": -3.7413692552945577e-06, |
| "std": 9.604167280485854e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9950032234191895, |
| "max": 1.000982403755188, |
| "mean": 0.9997574090957642, |
| "std": 0.0010362789034843445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03225700929760933, |
| "max": 0.032385751605033875, |
| "mean": -9.290525667893235e-06, |
| "std": 0.01804504171013832, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03201417997479439, |
| "max": 0.03202167525887489, |
| "mean": 0.0002501691924408078, |
| "std": 0.018027769401669502, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.0008222123724408448, |
| "max": 0.0007597835501655936, |
| "mean": -1.4037771052244352e-06, |
| "std": 0.0001422762288711965, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0004344022599980235, |
| "max": 0.000338842801284045, |
| "mean": -5.246626642474439e-06, |
| "std": 8.8350752776023e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3827516734600067, |
| "max": 0.7182729244232178, |
| "mean": 0.5806694030761719, |
| "std": 0.03871554881334305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23742133378982544, |
| "max": 0.19636878371238708, |
| "mean": 2.6759680622490123e-05, |
| "std": 0.037471406161785126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.1184450015425682, |
| "max": 0.16545724868774414, |
| "mean": 0.0009931407403200865, |
| "std": 0.027538597583770752, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.2451958954334259, |
| "max": 0.49966853857040405, |
| "mean": -5.0392896810080856e-05, |
| "std": 0.0376293808221817, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.932778835296631, |
| "max": 3.76035213470459, |
| "mean": -0.003568061627447605, |
| "std": 0.6805727481842041, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22708982229232788, |
| "max": 0.2511258125305176, |
| "mean": -1.143130793934688e-05, |
| "std": 0.037441134452819824, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07165413349866867, |
| "max": 0.08049532026052475, |
| "mean": -0.0005234142299741507, |
| "std": 0.015659447759389877, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22785918414592743, |
| "max": 0.25734248757362366, |
| "mean": -2.8539496270241216e-05, |
| "std": 0.035427965223789215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.19991812109947205, |
| "max": 0.214930921792984, |
| "mean": -0.005538000259548426, |
| "std": 0.06830835342407227, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.40544652938842773, |
| "max": 1.1868609189987183, |
| "mean": 0.7379507422447205, |
| "std": 0.05492096021771431, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.22111627459526062, |
| "max": 0.2460324913263321, |
| "mean": 0.0005210894159972668, |
| "std": 0.04134552925825119, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10342609882354736, |
| "max": 0.024193264544010162, |
| "mean": -0.03266071155667305, |
| "std": 0.018867699429392815, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.448818176984787, |
| "max": 0.4217819571495056, |
| "mean": -0.000431257882155478, |
| "std": 0.04690708965063095, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2508312165737152, |
| "max": 0.46896737813949585, |
| "mean": 0.00319076469168067, |
| "std": 0.04450752213597298, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3169752359390259, |
| "max": 0.33314692974090576, |
| "mean": -2.5337005354231223e-05, |
| "std": 0.021293330937623978, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.32465165853500366, |
| "max": 0.6822460889816284, |
| "mean": 0.5709546208381653, |
| "std": 0.04454142227768898, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16416817903518677, |
| "max": 0.1733636111021042, |
| "mean": -4.858425018028356e-05, |
| "std": 0.03318599984049797, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18635453283786774, |
| "max": 0.1423773616552353, |
| "mean": 4.034899757243693e-05, |
| "std": 0.02966292016208172, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.37941935658454895, |
| "max": 0.24537599086761475, |
| "mean": -1.0037202628154773e-05, |
| "std": 0.03276722505688667, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6522655487060547, |
| "max": 3.2869510650634766, |
| "mean": -0.014257419854402542, |
| "std": 0.9848745465278625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23496949672698975, |
| "max": 0.24738511443138123, |
| "mean": -1.7606289475224912e-05, |
| "std": 0.04170484468340874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07273464649915695, |
| "max": 0.15422259271144867, |
| "mean": 0.0006638166960328817, |
| "std": 0.025166962295770645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2664797306060791, |
| "max": 0.248508021235466, |
| "mean": -1.5497178537771106e-05, |
| "std": 0.04014508053660393, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18958289921283722, |
| "max": 0.19478872418403625, |
| "mean": -0.0012272386811673641, |
| "std": 0.06668190658092499, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32911282777786255, |
| "max": 0.9983987808227539, |
| "mean": 0.7191941142082214, |
| "std": 0.0522039495408535, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23135632276535034, |
| "max": 0.24583274126052856, |
| "mean": 0.00018275347247254103, |
| "std": 0.04090878367424011, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11377062648534775, |
| "max": 0.018522411584854126, |
| "mean": -0.04246858134865761, |
| "std": 0.018818210810422897, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.3897111713886261, |
| "max": 0.40687721967697144, |
| "mean": -2.178383874706924e-05, |
| "std": 0.04854356870055199, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6922244429588318, |
| "max": 0.4119531214237213, |
| "mean": 0.0008513483917340636, |
| "std": 0.060246195644140244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0007574164774268866, |
| "max": 1.0006382465362549, |
| "mean": 0.0004883571527898312, |
| "std": 0.022093627601861954, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.995638906955719, |
| "max": 1.000357985496521, |
| "mean": 0.9993537068367004, |
| "std": 0.001561639248393476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03126733377575874, |
| "max": 0.031276635825634, |
| "mean": -2.102728103636764e-05, |
| "std": 0.01803644187748432, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.03121519461274147, |
| "max": 0.031229794025421143, |
| "mean": -0.000677098985761404, |
| "std": 0.017830997705459595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03127024322748184, |
| "max": 0.03126488998532295, |
| "mean": -8.836910637910478e-06, |
| "std": 0.018035493791103363, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031232407316565514, |
| "max": 0.031246833503246307, |
| "mean": -0.0007298535201698542, |
| "std": 0.0179455429315567, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.00021961150923743844, |
| "max": 0.00025036477018147707, |
| "mean": -8.001849209904321e-07, |
| "std": 8.148775668814778e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.995234489440918, |
| "max": 1.0012273788452148, |
| "mean": 0.9999035596847534, |
| "std": 0.001056881621479988, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03210779279470444, |
| "max": 0.03193911164999008, |
| "mean": 5.988833436276764e-06, |
| "std": 0.018047882243990898, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.031279150396585464, |
| "max": 0.031749434769153595, |
| "mean": 0.00044275011168792844, |
| "std": 0.018095213919878006, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0007249970221891999, |
| "max": 0.0007807987276464701, |
| "mean": -3.5197314218748943e-07, |
| "std": 0.00014107293100096285, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.00022946292301639915, |
| "max": 0.00021843933791387826, |
| "mean": -1.2389690482450533e-06, |
| "std": 7.586943684145808e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23457658290863037, |
| "max": 0.2724316418170929, |
| "mean": 7.120183454389917e-06, |
| "std": 0.01881435327231884, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32128995656967163, |
| "max": 0.692602813243866, |
| "mean": 0.5816522836685181, |
| "std": 0.04586285352706909, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18137724697589874, |
| "max": 0.19706015288829803, |
| "mean": -1.1772945072152652e-05, |
| "std": 0.03318871185183525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.1606057584285736, |
| "max": 0.12942680716514587, |
| "mean": -0.0010653780773282051, |
| "std": 0.03413666784763336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.3314096927642822, |
| "max": 0.3108590841293335, |
| "mean": -1.029382929118583e-05, |
| "std": 0.03223954886198044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.800930500030518, |
| "max": 8.760626792907715, |
| "mean": 0.09345310181379318, |
| "std": 1.6193360090255737, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23322908580303192, |
| "max": 0.24158968031406403, |
| "mean": 4.1257830162066966e-05, |
| "std": 0.040864504873752594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07589098066091537, |
| "max": 0.06572694331407547, |
| "mean": 0.00047726332559250295, |
| "std": 0.019406452775001526, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24502328038215637, |
| "max": 0.23352351784706116, |
| "mean": -2.668632077984512e-06, |
| "std": 0.039439182728528976, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16295023262500763, |
| "max": 0.16059955954551697, |
| "mean": 0.0016356806736439466, |
| "std": 0.06525918841362, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.556554913520813, |
| "max": 0.9408271312713623, |
| "mean": 0.7128406167030334, |
| "std": 0.039769869297742844, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22860386967658997, |
| "max": 0.25511136651039124, |
| "mean": -4.539915607892908e-05, |
| "std": 0.04058451950550079, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13515348732471466, |
| "max": 0.02234305441379547, |
| "mean": -0.04134881868958473, |
| "std": 0.01836741715669632, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4212746024131775, |
| "max": 0.39222264289855957, |
| "mean": -4.234017978888005e-06, |
| "std": 0.047794174402952194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6065890789031982, |
| "max": 0.6503084897994995, |
| "mean": 0.0015799436951056123, |
| "std": 0.056790802627801895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.25144556164741516, |
| "max": 0.3204054832458496, |
| "mean": -5.961472197668627e-06, |
| "std": 0.019617972895503044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.36011484265327454, |
| "max": 0.6801881790161133, |
| "mean": 0.5707067251205444, |
| "std": 0.04279083386063576, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22022095322608948, |
| "max": 0.17668727040290833, |
| "mean": -3.4830391086870804e-05, |
| "std": 0.034304577857255936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16363094747066498, |
| "max": 0.2328542321920395, |
| "mean": 0.0003622955409809947, |
| "std": 0.03286634013056755, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.26301464438438416, |
| "max": 0.23922747373580933, |
| "mean": -5.2115137805230916e-05, |
| "std": 0.03390384837985039, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.843376159667969, |
| "max": 5.079013824462891, |
| "mean": 0.043839357793331146, |
| "std": 1.2277964353561401, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24616090953350067, |
| "max": 0.24996501207351685, |
| "mean": 7.23035482224077e-05, |
| "std": 0.04399650916457176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06268942356109619, |
| "max": 0.054509397596120834, |
| "mean": 0.0006487497594207525, |
| "std": 0.017188087105751038, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2859387695789337, |
| "max": 0.27142879366874695, |
| "mean": -4.999006341677159e-05, |
| "std": 0.04299502447247505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16028116643428802, |
| "max": 0.1701924204826355, |
| "mean": -0.00288166431710124, |
| "std": 0.05925562232732773, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5195892453193665, |
| "max": 0.9285021424293518, |
| "mean": 0.71345454454422, |
| "std": 0.03798013553023338, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23824341595172882, |
| "max": 0.24957609176635742, |
| "mean": 0.0004649516486097127, |
| "std": 0.040465425699949265, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.1440071016550064, |
| "max": 0.041583579033613205, |
| "mean": -0.03968297317624092, |
| "std": 0.020529083907604218, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5325517058372498, |
| "max": 0.5824555158615112, |
| "mean": 5.4546726460102946e-06, |
| "std": 0.04887215048074722, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5182770490646362, |
| "max": 0.4927639365196228, |
| "mean": 0.002359384670853615, |
| "std": 0.05340024083852768, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.27337488532066345, |
| "max": 0.3148258626461029, |
| "mean": 1.8105949948221678e-06, |
| "std": 0.020055659115314484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36668556928634644, |
| "max": 0.7091761827468872, |
| "mean": 0.5931493639945984, |
| "std": 0.04574775695800781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.2106715887784958, |
| "max": 0.1992705911397934, |
| "mean": 3.0829094612272456e-05, |
| "std": 0.03486945852637291, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18688145279884338, |
| "max": 0.2038576900959015, |
| "mean": 0.0009574516443535686, |
| "std": 0.03150374814867973, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.2888670563697815, |
| "max": 0.33895108103752136, |
| "mean": -4.766129131894559e-05, |
| "std": 0.03459092602133751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.8705790042877197, |
| "max": 3.3815643787384033, |
| "mean": 0.014464044943451881, |
| "std": 0.8578398823738098, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.2241480052471161, |
| "max": 0.24975183606147766, |
| "mean": -4.014226306026103e-06, |
| "std": 0.04223877936601639, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.0549103245139122, |
| "max": 0.04695763811469078, |
| "mean": -1.4065793948248029e-05, |
| "std": 0.015847966074943542, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2923896610736847, |
| "max": 0.2908935844898224, |
| "mean": -7.1035901783034205e-06, |
| "std": 0.04195380210876465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12478198111057281, |
| "max": 0.2591152787208557, |
| "mean": -0.003229282796382904, |
| "std": 0.053138162940740585, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.45623326301574707, |
| "max": 0.8426384925842285, |
| "mean": 0.7055743336677551, |
| "std": 0.034994304180145264, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5110324621200562, |
| "max": 0.3488520383834839, |
| "mean": 0.00034251363831572235, |
| "std": 0.04021010175347328, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18705947697162628, |
| "max": 0.03953401744365692, |
| "mean": -0.03937750309705734, |
| "std": 0.02131262607872486, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5440298318862915, |
| "max": 0.5563207864761353, |
| "mean": -7.213428762042895e-05, |
| "std": 0.050746381282806396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5106754302978516, |
| "max": 0.662798285484314, |
| "mean": 0.002447732724249363, |
| "std": 0.04947002977132797, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.33220773935317993, |
| "max": 0.2652227580547333, |
| "mean": 3.882123110088287e-06, |
| "std": 0.01939382590353489, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.32238951325416565, |
| "max": 0.764789879322052, |
| "mean": 0.6509858965873718, |
| "std": 0.0451430045068264, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.24893951416015625, |
| "max": 0.219136044383049, |
| "mean": -2.739794126682682e-06, |
| "std": 0.036503732204437256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32658451795578003, |
| "max": 0.28703945875167847, |
| "mean": -0.0006784016732126474, |
| "std": 0.038509681820869446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3096793591976166, |
| "max": 0.3693031072616577, |
| "mean": 6.47535634925589e-05, |
| "std": 0.036244187504053116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.706123352050781, |
| "max": 5.793623447418213, |
| "mean": 0.03790595382452011, |
| "std": 1.4113690853118896, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22146277129650116, |
| "max": 0.20545163750648499, |
| "mean": -7.498646300518885e-05, |
| "std": 0.042494479566812515, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07756227254867554, |
| "max": 0.05129515379667282, |
| "mean": -0.0009279022924602032, |
| "std": 0.016406826674938202, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.33102676272392273, |
| "max": 0.3289909064769745, |
| "mean": -5.028288796893321e-06, |
| "std": 0.042801517993211746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.28435027599334717, |
| "max": 0.111260324716568, |
| "mean": -0.001205979730002582, |
| "std": 0.04699746519327164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4868572950363159, |
| "max": 0.8827712535858154, |
| "mean": 0.7374467849731445, |
| "std": 0.03787440061569214, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3608104884624481, |
| "max": 0.2736315429210663, |
| "mean": 5.1337454351596534e-05, |
| "std": 0.04065750911831856, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24695155024528503, |
| "max": 0.04662873595952988, |
| "mean": -0.039258524775505066, |
| "std": 0.023203320801258087, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6257067322731018, |
| "max": 0.5967472195625305, |
| "mean": -6.336745718726888e-05, |
| "std": 0.05312981456518173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7091463208198547, |
| "max": 0.26562684774398804, |
| "mean": 0.0009212760487571359, |
| "std": 0.051211755722761154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.34325337409973145, |
| "max": 0.30324116349220276, |
| "mean": 1.430171323590912e-07, |
| "std": 0.019143851473927498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34994906187057495, |
| "max": 0.7801994681358337, |
| "mean": 0.6388012170791626, |
| "std": 0.04902452602982521, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20566730201244354, |
| "max": 0.2065981775522232, |
| "mean": -6.0025900893379e-05, |
| "std": 0.03770073875784874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25845062732696533, |
| "max": 0.268261194229126, |
| "mean": -0.00040606403490528464, |
| "std": 0.04461587592959404, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3532998263835907, |
| "max": 0.3217300474643707, |
| "mean": -7.498586455767509e-06, |
| "std": 0.037208717316389084, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.249058246612549, |
| "max": 4.194725036621094, |
| "mean": -0.02638459950685501, |
| "std": 1.005539894104004, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.2386980652809143, |
| "max": 0.24372872710227966, |
| "mean": -2.586210030131042e-05, |
| "std": 0.04321879521012306, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.062367696315050125, |
| "max": 0.05657341331243515, |
| "mean": 0.0003560591721907258, |
| "std": 0.01414806954562664, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.43753641843795776, |
| "max": 0.37365373969078064, |
| "mean": 1.460490602767095e-05, |
| "std": 0.044131483882665634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09578664600849152, |
| "max": 0.17602641880512238, |
| "mean": -0.0006584142101928592, |
| "std": 0.0351262167096138, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.42189696431159973, |
| "max": 1.0643466711044312, |
| "mean": 0.7485300302505493, |
| "std": 0.04179271310567856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.265593945980072, |
| "max": 0.29676973819732666, |
| "mean": -7.866104715503752e-05, |
| "std": 0.04081883281469345, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18380795419216156, |
| "max": 0.04289933666586876, |
| "mean": -0.036790553480386734, |
| "std": 0.02553965151309967, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4579704999923706, |
| "max": 0.4863548278808594, |
| "mean": 4.272036676411517e-05, |
| "std": 0.05422580987215042, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.2855266034603119, |
| "max": 0.5506117939949036, |
| "mean": -0.0008784987148828804, |
| "std": 0.047787394374608994, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2924049496650696, |
| "max": 0.32256847620010376, |
| "mean": 5.68283303437056e-06, |
| "std": 0.01997658796608448, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.29146960377693176, |
| "max": 0.7568098902702332, |
| "mean": 0.6507450938224792, |
| "std": 0.05195383355021477, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.2434154599905014, |
| "max": 0.26121068000793457, |
| "mean": -5.642844371323008e-06, |
| "std": 0.039615679532289505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2669755518436432, |
| "max": 0.19996695220470428, |
| "mean": -0.0008783398079685867, |
| "std": 0.051739659160375595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.27164191007614136, |
| "max": 0.25313133001327515, |
| "mean": 5.889336534892209e-06, |
| "std": 0.03871198371052742, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.952698707580566, |
| "max": 15.9312744140625, |
| "mean": 0.03322799503803253, |
| "std": 1.9877989292144775, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20647653937339783, |
| "max": 0.2256641685962677, |
| "mean": -7.246333552757278e-05, |
| "std": 0.040561433881521225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06935624778270721, |
| "max": 0.06306472420692444, |
| "mean": 0.00016317634435836226, |
| "std": 0.014748629182577133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.4654642939567566, |
| "max": 0.31973931193351746, |
| "mean": 1.960094778041821e-05, |
| "std": 0.04059756174683571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06414826959371567, |
| "max": 0.11558651179075241, |
| "mean": 0.0012002706062048674, |
| "std": 0.024707410484552383, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.3750652074813843, |
| "max": 0.9275709390640259, |
| "mean": 0.7511184215545654, |
| "std": 0.03999503329396248, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2787969410419464, |
| "max": 0.2728310525417328, |
| "mean": -0.00016816731658764184, |
| "std": 0.0410102978348732, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19773395359516144, |
| "max": 0.05162842571735382, |
| "mean": -0.03201429173350334, |
| "std": 0.025033777579665184, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6583139300346375, |
| "max": 0.5351659655570984, |
| "mean": -5.119909474160522e-05, |
| "std": 0.05286192148923874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.1919519156217575, |
| "max": 0.5808603763580322, |
| "mean": -0.0005111135542392731, |
| "std": 0.04104519635438919, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.4174348711967468, |
| "max": 0.3718706965446472, |
| "mean": 6.703614417347126e-06, |
| "std": 0.021633952856063843, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21479681134223938, |
| "max": 0.7478918433189392, |
| "mean": 0.6493618488311768, |
| "std": 0.054201409220695496, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20870910584926605, |
| "max": 0.1947445124387741, |
| "mean": 4.020327469334006e-05, |
| "std": 0.03945876285433769, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.32888734340667725, |
| "max": 0.25908946990966797, |
| "mean": -0.003229741007089615, |
| "std": 0.05623537674546242, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.2056186944246292, |
| "max": 0.2540878653526306, |
| "mean": 5.3863834182266146e-05, |
| "std": 0.03856115788221359, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.228662967681885, |
| "max": 6.915782928466797, |
| "mean": 0.04823269695043564, |
| "std": 1.3832472562789917, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20932167768478394, |
| "max": 0.22993139922618866, |
| "mean": -4.4988796616962645e-06, |
| "std": 0.04132062569260597, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04368359223008156, |
| "max": 0.035936541855335236, |
| "mean": -1.0926916729658842e-05, |
| "std": 0.012798542156815529, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.3968988060951233, |
| "max": 0.34478238224983215, |
| "mean": -5.5305037676589563e-05, |
| "std": 0.04239818826317787, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.05508316308259964, |
| "max": 0.06261169910430908, |
| "mean": 0.0003532343253027648, |
| "std": 0.018669025972485542, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3511422276496887, |
| "max": 1.0404622554779053, |
| "mean": 0.7897100448608398, |
| "std": 0.048514608293771744, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3338225483894348, |
| "max": 0.38620951771736145, |
| "mean": -0.00016899823094718158, |
| "std": 0.04149709641933441, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15740133821964264, |
| "max": 0.058948904275894165, |
| "mean": -0.0318116769194603, |
| "std": 0.025069545954465866, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6960089206695557, |
| "max": 0.46894899010658264, |
| "mean": -8.237230940721929e-05, |
| "std": 0.05181308463215828, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24741840362548828, |
| "max": 0.3286932408809662, |
| "mean": -0.00026996995438821614, |
| "std": 0.04144337400794029, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.28653645515441895, |
| "max": 0.35008078813552856, |
| "mean": -2.9175917006796226e-06, |
| "std": 0.024247299879789352, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19693201780319214, |
| "max": 0.7785046696662903, |
| "mean": 0.670115053653717, |
| "std": 0.058539655059576035, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.228579580783844, |
| "max": 0.23089821636676788, |
| "mean": -2.1206951714702882e-05, |
| "std": 0.040444690734148026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22008375823497772, |
| "max": 0.24102427065372467, |
| "mean": 0.0007767346687614918, |
| "std": 0.055866289883852005, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21646404266357422, |
| "max": 0.2256259322166443, |
| "mean": -7.261607970576733e-05, |
| "std": 0.03937656059861183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.884381294250488, |
| "max": 9.046843528747559, |
| "mean": -0.0012065814808011055, |
| "std": 1.8454406261444092, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2685357332229614, |
| "max": 0.2581280469894409, |
| "mean": 4.3568383262027055e-05, |
| "std": 0.03841337561607361, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.057995330542325974, |
| "max": 0.05802358686923981, |
| "mean": 0.00035532776382751763, |
| "std": 0.014707793481647968, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.2625483274459839, |
| "max": 0.2874881625175476, |
| "mean": -6.166227103676647e-05, |
| "std": 0.039080966264009476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.0441780760884285, |
| "max": 0.03726305067539215, |
| "mean": -0.00010403832129668444, |
| "std": 0.013333701528608799, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.3392186760902405, |
| "max": 1.088745355606079, |
| "mean": 0.8640130758285522, |
| "std": 0.06376548111438751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.42300641536712646, |
| "max": 0.41883379220962524, |
| "mean": 0.00031391510856337845, |
| "std": 0.04352227598428726, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21468287706375122, |
| "max": 0.1707322746515274, |
| "mean": -0.02942698448896408, |
| "std": 0.03183940798044205, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5976030826568604, |
| "max": 0.559415340423584, |
| "mean": -0.00014561890566255897, |
| "std": 0.05347010865807533, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17889779806137085, |
| "max": 0.3772771656513214, |
| "mean": 0.001343069365248084, |
| "std": 0.03730209544301033, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.39426180720329285, |
| "max": 0.36868590116500854, |
| "mean": 3.8257519918261096e-05, |
| "std": 0.0286222156137228, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2908227741718292, |
| "max": 0.8264791369438171, |
| "mean": 0.7054398655891418, |
| "std": 0.0677274614572525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.926691472530365, |
| "max": 1.0270028114318848, |
| "mean": -2.8848577130702324e-05, |
| "std": 0.04765753820538521, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.878186821937561, |
| "max": 0.8147233724594116, |
| "mean": -0.0002844139817170799, |
| "std": 0.09543365985155106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.27030670642852783, |
| "max": 0.24055372178554535, |
| "mean": -2.2271982743404806e-05, |
| "std": 0.038951653987169266, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.68506622314453, |
| "max": 22.795772552490234, |
| "mean": -0.09177836775779724, |
| "std": 4.062017440795898, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22721800208091736, |
| "max": 0.24524104595184326, |
| "mean": -2.5419916710234247e-05, |
| "std": 0.038644734770059586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.059977784752845764, |
| "max": 0.04509967938065529, |
| "mean": -0.00013076608593109995, |
| "std": 0.01468411460518837, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.3371436893939972, |
| "max": 0.3742288053035736, |
| "mean": 7.546843335148878e-06, |
| "std": 0.04082665964961052, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04609467089176178, |
| "max": 0.19514600932598114, |
| "mean": 0.00027449309709481895, |
| "std": 0.013541752472519875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.37357744574546814, |
| "max": 1.125421166419983, |
| "mean": 0.8902103900909424, |
| "std": 0.06386467814445496, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.447258859872818, |
| "max": 0.5423630475997925, |
| "mean": 2.548232805565931e-05, |
| "std": 0.045591775327920914, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22343683242797852, |
| "max": 0.08690512925386429, |
| "mean": -0.03200257197022438, |
| "std": 0.03771420195698738, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7260164022445679, |
| "max": 0.6879873275756836, |
| "mean": 3.631926665548235e-05, |
| "std": 0.05180613696575165, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.17385190725326538, |
| "max": 0.21751302480697632, |
| "mean": 3.567736712284386e-05, |
| "std": 0.03174319490790367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.3385016918182373, |
| "max": 0.37161216139793396, |
| "mean": 4.3165768147446215e-05, |
| "std": 0.0341353677213192, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.31760096549987793, |
| "max": 1.2830872535705566, |
| "mean": 0.6014329195022583, |
| "std": 0.08317635953426361, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.28283271193504333, |
| "max": 0.26012101769447327, |
| "mean": -2.921331542893313e-06, |
| "std": 0.035985857248306274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23526135087013245, |
| "max": 0.20543411374092102, |
| "mean": 0.00024757458595559, |
| "std": 0.05601666867733002, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.4347652792930603, |
| "max": 0.32389530539512634, |
| "mean": 2.395988121861592e-05, |
| "std": 0.03412287309765816, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.541207790374756, |
| "max": 7.30653190612793, |
| "mean": -0.00736255943775177, |
| "std": 0.6987443566322327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.3433501720428467, |
| "max": 0.361217200756073, |
| "mean": 0.0001032147411024198, |
| "std": 0.04784071072936058, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07378581166267395, |
| "max": 0.060352873057127, |
| "mean": 0.0009383288561366498, |
| "std": 0.01492984127253294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2561882436275482, |
| "max": 0.28616371750831604, |
| "mean": 5.244153726380318e-06, |
| "std": 0.04157177358865738, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05515698716044426, |
| "max": 0.062612384557724, |
| "mean": 0.00012199293996673077, |
| "std": 0.007132581900805235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49441853165626526, |
| "max": 1.2188090085983276, |
| "mean": 1.013464331626892, |
| "std": 0.11732637882232666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0939745903015137, |
| "max": 1.0476189851760864, |
| "mean": -4.830169564229436e-05, |
| "std": 0.05242462456226349, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22291964292526245, |
| "max": 0.17299318313598633, |
| "mean": -0.027209078893065453, |
| "std": 0.03627277910709381, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8831630349159241, |
| "max": 0.9219300150871277, |
| "mean": -0.00014596671098843217, |
| "std": 0.05330995097756386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17071670293807983, |
| "max": 0.3785896301269531, |
| "mean": 0.0033629729878157377, |
| "std": 0.03981942683458328, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7773581147193909, |
| "max": 0.721552848815918, |
| "mean": 1.7906297216541134e-05, |
| "std": 0.0461493544280529, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.33866649866104126, |
| "max": 1.4223623275756836, |
| "mean": 0.9482957124710083, |
| "std": 0.20650897920131683, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7458724975585938, |
| "max": 1.7043527364730835, |
| "mean": 0.0002272979763802141, |
| "std": 0.1587107926607132, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.1964622735977173, |
| "max": 1.0986626148223877, |
| "mean": -0.009530629962682724, |
| "std": 0.20347940921783447, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.420305460691452, |
| "max": 0.42840367555618286, |
| "mean": 6.361818668665364e-05, |
| "std": 0.04802125319838524, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.700023651123047, |
| "max": 19.49565315246582, |
| "mean": -0.24793246388435364, |
| "std": 4.7666015625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.3232991695404053, |
| "max": 0.4378996789455414, |
| "mean": -1.1727358469215687e-05, |
| "std": 0.04616958647966385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.033631421625614166, |
| "max": 0.03664267063140869, |
| "mean": 0.0006392866489477456, |
| "std": 0.012905232608318329, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7025362849235535, |
| "max": 0.6701837778091431, |
| "mean": 4.212657222524285e-05, |
| "std": 0.057898350059986115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07234562933444977, |
| "max": 0.06781232357025146, |
| "mean": -0.00013423134805634618, |
| "std": 0.012877929955720901, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.3804936408996582, |
| "max": 1.3917937278747559, |
| "mean": 1.0666232109069824, |
| "std": 0.21957866847515106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6164886951446533, |
| "max": 0.7186930179595947, |
| "mean": 0.00011397639173083007, |
| "std": 0.05803186818957329, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21819192171096802, |
| "max": 0.22446297109127045, |
| "mean": 0.006146667059510946, |
| "std": 0.04965293034911156, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6298643350601196, |
| "max": 0.8897628784179688, |
| "mean": 1.269071981369052e-05, |
| "std": 0.023556767031550407, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.50624680519104, |
| "max": 0.4730708599090576, |
| "mean": -0.0030176215805113316, |
| "std": 0.06914978474378586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5384271144866943, |
| "max": 1.1763767004013062, |
| "mean": 0.7825473546981812, |
| "std": 0.09825034439563751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.26688942313194275, |
| "max": 0.21287617087364197, |
| "mean": -0.00022272299975156784, |
| "std": 0.0540103055536747, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23796546459197998, |
| "max": 0.014876163564622402, |
| "mean": -0.04389083757996559, |
| "std": 0.03420323133468628, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |