| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43045347929000854, |
| "max": 0.2989708483219147, |
| "mean": -0.002559528686106205, |
| "std": 0.042551685124635696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06319475919008255, |
| "max": 0.10763752460479736, |
| "mean": 0.0005878363735973835, |
| "std": 0.0341116227209568, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4125802516937256, |
| "max": 0.8362879157066345, |
| "mean": -0.00021037086844444275, |
| "std": 0.024107296019792557, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11548256129026413, |
| "max": 0.3214675784111023, |
| "mean": -0.0009404525626450777, |
| "std": 0.01957694999873638, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7917673587799072, |
| "max": 2.87048602104187, |
| "mean": -0.000364800012903288, |
| "std": 0.6153724193572998, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.2789378762245178, |
| "max": 0.38190650939941406, |
| "mean": 0.00042029444011859596, |
| "std": 0.04275033250451088, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22229844331741333, |
| "max": 0.20966938138008118, |
| "mean": -0.004494193941354752, |
| "std": 0.04090972617268562, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.42792314291000366, |
| "max": 0.4753040671348572, |
| "mean": 2.5448428004892776e-06, |
| "std": 0.02450907975435257, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.3254566490650177, |
| "max": 0.15697774291038513, |
| "mean": -0.046701110899448395, |
| "std": 0.05157899484038353, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.41040217876434326, |
| "max": 0.3545200824737549, |
| "mean": -0.00012632929428946227, |
| "std": 0.023601176217198372, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.22976312041282654, |
| "max": 0.26262250542640686, |
| "mean": -0.029148582369089127, |
| "std": 0.049347616732120514, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.25461843609809875, |
| "max": 0.8200721740722656, |
| "mean": 0.5254405736923218, |
| "std": 0.08080819994211197, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.2969217598438263, |
| "max": 0.2653011679649353, |
| "mean": -0.00042407598812133074, |
| "std": 0.03210418298840523, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09274514019489288, |
| "max": 0.12481185793876648, |
| "mean": 0.0006486732745543122, |
| "std": 0.025742683559656143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.29045799374580383, |
| "max": 0.28142276406288147, |
| "mean": -7.696857210248709e-05, |
| "std": 0.03093627467751503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.8994598388671875, |
| "max": 5.814236164093018, |
| "mean": -0.009332070127129555, |
| "std": 1.2954570055007935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4248283803462982, |
| "max": 0.3437764346599579, |
| "mean": 9.760602551978081e-05, |
| "std": 0.029952971264719963, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028973544016480446, |
| "max": 0.027646001428365707, |
| "mean": -0.000311461859382689, |
| "std": 0.01257230993360281, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.45393574237823486, |
| "max": 0.4486967921257019, |
| "mean": 2.2734935555490665e-05, |
| "std": 0.023855067789554596, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08869241178035736, |
| "max": 0.09115342795848846, |
| "mean": 0.0022729213815182447, |
| "std": 0.019511748105287552, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.26661837100982666, |
| "max": 1.0562738180160522, |
| "mean": 0.5311292409896851, |
| "std": 0.10441415756940842, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5744583010673523, |
| "max": 0.6083983182907104, |
| "mean": -0.0004310230724513531, |
| "std": 0.03859498351812363, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18180975317955017, |
| "max": 0.04576439782977104, |
| "mean": -0.029441392049193382, |
| "std": 0.0425901859998703, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1666754484176636, |
| "max": 1.6346206665039062, |
| "mean": 0.00031845836201682687, |
| "std": 0.027693821117281914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16254130005836487, |
| "max": 0.20572608709335327, |
| "mean": -0.021116478368639946, |
| "std": 0.02794043906033039, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22449950873851776, |
| "max": 0.8436615467071533, |
| "mean": 0.48752841353416443, |
| "std": 0.07519911974668503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.25530511140823364, |
| "max": 0.30584144592285156, |
| "mean": -9.390279956278391e-06, |
| "std": 0.03347048908472061, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09549093246459961, |
| "max": 0.1104247123003006, |
| "mean": 5.642877658829093e-05, |
| "std": 0.02698560617864132, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.29746732115745544, |
| "max": 0.29597631096839905, |
| "mean": 5.020098251407035e-05, |
| "std": 0.03253835067152977, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.164289474487305, |
| "max": 5.084513187408447, |
| "mean": -0.014594512060284615, |
| "std": 1.157379150390625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.34489384293556213, |
| "max": 0.34349551796913147, |
| "mean": 7.88411489338614e-05, |
| "std": 0.030058156698942184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03615685552358627, |
| "max": 0.033247072249650955, |
| "mean": -0.0001437932369299233, |
| "std": 0.0130230151116848, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.31528598070144653, |
| "max": 0.3752017617225647, |
| "mean": -2.1658630430465564e-05, |
| "std": 0.02405543439090252, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10527704656124115, |
| "max": 0.12188438326120377, |
| "mean": -0.001954131992533803, |
| "std": 0.028842832893133163, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.3118007183074951, |
| "max": 1.1209547519683838, |
| "mean": 0.6662399172782898, |
| "std": 0.09774922579526901, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8724845051765442, |
| "max": 0.6275652050971985, |
| "mean": 0.0016756996046751738, |
| "std": 0.04743832349777222, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.2710001766681671, |
| "max": 0.034087300300598145, |
| "mean": -0.04660267010331154, |
| "std": 0.040595393627882004, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9202765226364136, |
| "max": 0.964392364025116, |
| "mean": 0.0010208573658019304, |
| "std": 0.040701836347579956, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14455123245716095, |
| "max": 0.07482488453388214, |
| "mean": -0.009084297344088554, |
| "std": 0.025694943964481354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.2397618293762207, |
| "max": 0.7124034762382507, |
| "mean": 0.4472024440765381, |
| "std": 0.0593235045671463, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.2730177044868469, |
| "max": 0.29747670888900757, |
| "mean": 8.653647455503233e-06, |
| "std": 0.03547436371445656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11902837455272675, |
| "max": 0.1184682548046112, |
| "mean": 0.0007503863889724016, |
| "std": 0.027607794851064682, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.28101518750190735, |
| "max": 0.27942103147506714, |
| "mean": -7.649646431673318e-05, |
| "std": 0.03510240092873573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.509594440460205, |
| "max": 2.5215904712677, |
| "mean": 0.026745371520519257, |
| "std": 0.5867790579795837, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.2210964858531952, |
| "max": 0.2716039717197418, |
| "mean": 2.442306140437722e-06, |
| "std": 0.030731501057744026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03315361589193344, |
| "max": 0.031151030212640762, |
| "mean": 0.00011695168359437957, |
| "std": 0.012393992394208908, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23539169132709503, |
| "max": 0.23184844851493835, |
| "mean": 5.725533628719859e-05, |
| "std": 0.025697585195302963, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13603144884109497, |
| "max": 0.12801550328731537, |
| "mean": -0.005497873295098543, |
| "std": 0.039962731301784515, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.35472768545150757, |
| "max": 1.1723560094833374, |
| "mean": 0.7105388641357422, |
| "std": 0.10377441346645355, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6173874735832214, |
| "max": 0.5556294322013855, |
| "mean": 0.0011603377060964704, |
| "std": 0.04611397534608841, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.18947651982307434, |
| "max": 0.024928653612732887, |
| "mean": -0.03484659641981125, |
| "std": 0.028622934594750404, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1309525966644287, |
| "max": 0.9703920483589172, |
| "mean": 0.0003591428976505995, |
| "std": 0.04234250634908676, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.59785395860672, |
| "max": 0.0627356544137001, |
| "mean": -0.004881600849330425, |
| "std": 0.028621168807148933, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3753371834754944, |
| "max": 0.9404803514480591, |
| "mean": 0.5924646854400635, |
| "std": 0.06694936007261276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3917739689350128, |
| "max": 0.36935487389564514, |
| "mean": 7.001077028689906e-05, |
| "std": 0.03718659654259682, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11900685727596283, |
| "max": 0.1365460306406021, |
| "mean": 0.0009158444590866566, |
| "std": 0.029187751933932304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6190850138664246, |
| "max": 0.5087974667549133, |
| "mean": 1.5220098248391878e-05, |
| "std": 0.036439333111047745, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.186792373657227, |
| "max": 8.788902282714844, |
| "mean": -0.10927547514438629, |
| "std": 1.698854923248291, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.2765258252620697, |
| "max": 0.23972086608409882, |
| "mean": 5.2279683586675674e-05, |
| "std": 0.03261309862136841, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.051504429429769516, |
| "max": 0.0394677110016346, |
| "mean": 9.376452362630516e-05, |
| "std": 0.012969755567610264, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23076090216636658, |
| "max": 0.23486877977848053, |
| "mean": -2.2034959329175763e-05, |
| "std": 0.02938973717391491, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20423753559589386, |
| "max": 0.10524258017539978, |
| "mean": -0.004020648077130318, |
| "std": 0.03263989835977554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3396383821964264, |
| "max": 1.0124459266662598, |
| "mean": 0.7007039785385132, |
| "std": 0.09675922244787216, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5647669434547424, |
| "max": 0.8336009979248047, |
| "mean": 0.00041507231071591377, |
| "std": 0.042294517159461975, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21213513612747192, |
| "max": 0.029952630400657654, |
| "mean": -0.03217371925711632, |
| "std": 0.026498902589082718, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7548895478248596, |
| "max": 0.7191285490989685, |
| "mean": -1.5825342416064814e-05, |
| "std": 0.03683512657880783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.26342180371284485, |
| "max": 0.106303870677948, |
| "mean": -0.0030142769683152437, |
| "std": 0.028873054310679436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.2839457094669342, |
| "max": 0.695040225982666, |
| "mean": 0.4993869960308075, |
| "std": 0.04653431475162506, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27824723720550537, |
| "max": 0.23382486402988434, |
| "mean": -0.00011091126361861825, |
| "std": 0.03875747323036194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.1535824090242386, |
| "max": 0.12643294036388397, |
| "mean": -0.0022276602685451508, |
| "std": 0.03332621976733208, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.4143897294998169, |
| "max": 0.6594640016555786, |
| "mean": -1.8512728274799883e-05, |
| "std": 0.03909672051668167, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.237905979156494, |
| "max": 4.722469329833984, |
| "mean": -0.020456835627555847, |
| "std": 1.0076903104782104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24504739046096802, |
| "max": 0.2075919508934021, |
| "mean": 4.4300948502495885e-05, |
| "std": 0.033962640911340714, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03446675091981888, |
| "max": 0.04485952481627464, |
| "mean": -2.2283929865807295e-05, |
| "std": 0.01263953372836113, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20111070573329926, |
| "max": 0.2064419686794281, |
| "mean": -2.9351647754083388e-05, |
| "std": 0.031020889058709145, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.1998225450515747, |
| "max": 0.11318594217300415, |
| "mean": -0.002895027631893754, |
| "std": 0.034535519778728485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.3667519986629486, |
| "max": 1.0576496124267578, |
| "mean": 0.6704938411712646, |
| "std": 0.06640732288360596, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.3984535038471222, |
| "max": 0.5021195411682129, |
| "mean": -3.873988316627219e-05, |
| "std": 0.04113014414906502, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12865276634693146, |
| "max": 0.02695303224027157, |
| "mean": -0.0305329579859972, |
| "std": 0.021882230415940285, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.44948050379753113, |
| "max": 0.43325671553611755, |
| "mean": 7.534700125688687e-05, |
| "std": 0.03489053621888161, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.26749807596206665, |
| "max": 0.07307979464530945, |
| "mean": -0.0010903773363679647, |
| "std": 0.023135719820857048, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28755003213882446, |
| "max": 0.6852815747261047, |
| "mean": 0.5245311260223389, |
| "std": 0.047535065561532974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22266581654548645, |
| "max": 0.22331343591213226, |
| "mean": 1.5911335140117444e-05, |
| "std": 0.038949206471443176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13634715974330902, |
| "max": 0.10933983325958252, |
| "mean": 0.00024775456404313445, |
| "std": 0.02920820191502571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37493303418159485, |
| "max": 0.43759024143218994, |
| "mean": -9.405484888702631e-06, |
| "std": 0.03928741440176964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8458516597747803, |
| "max": 4.99931526184082, |
| "mean": 0.0097417663782835, |
| "std": 0.8452187180519104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22269685566425323, |
| "max": 0.22029872238636017, |
| "mean": -3.309251042082906e-07, |
| "std": 0.03441028296947479, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.043786074966192245, |
| "max": 0.03593028709292412, |
| "mean": -0.0002595169935375452, |
| "std": 0.012078601866960526, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.2127062827348709, |
| "max": 0.18842767179012299, |
| "mean": -1.7018646758515388e-05, |
| "std": 0.03153670206665993, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18093673884868622, |
| "max": 0.12075397372245789, |
| "mean": -0.0023954270873218775, |
| "std": 0.0412798747420311, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4229053258895874, |
| "max": 0.9417746663093567, |
| "mean": 0.6626519560813904, |
| "std": 0.05681704729795456, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.3708723485469818, |
| "max": 0.4765413999557495, |
| "mean": -8.208492363337427e-05, |
| "std": 0.040889330208301544, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.2084851861000061, |
| "max": 0.02737521566450596, |
| "mean": -0.03023434244096279, |
| "std": 0.021364057436585426, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3406664729118347, |
| "max": 0.7341601848602295, |
| "mean": 8.241336036007851e-05, |
| "std": 0.03476617485284805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.24016013741493225, |
| "max": 0.05046252906322479, |
| "mean": -0.0011865145061165094, |
| "std": 0.02045980468392372, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3058834671974182, |
| "max": 0.6534616947174072, |
| "mean": 0.5251225829124451, |
| "std": 0.04612237960100174, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.3043194115161896, |
| "max": 0.2172033190727234, |
| "mean": 6.997850869083777e-05, |
| "std": 0.039497096091508865, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14911659061908722, |
| "max": 0.1309829205274582, |
| "mean": 0.00032657815609127283, |
| "std": 0.030455630272626877, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.2569442689418793, |
| "max": 0.2018917053937912, |
| "mean": 3.1276180379791185e-05, |
| "std": 0.039488255977630615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.3362653255462646, |
| "max": 2.3758890628814697, |
| "mean": -0.026241008192300797, |
| "std": 0.44977059960365295, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18858182430267334, |
| "max": 0.21028441190719604, |
| "mean": 3.710644523380324e-05, |
| "std": 0.034793708473443985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03167951852083206, |
| "max": 0.03567720949649811, |
| "mean": -0.0001978189975488931, |
| "std": 0.012288851663470268, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.1882917732000351, |
| "max": 0.1702534258365631, |
| "mean": -6.83729158481583e-05, |
| "std": 0.03217038884758949, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13942022621631622, |
| "max": 0.1372338831424713, |
| "mean": -0.0025149777065962553, |
| "std": 0.05129906162619591, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4670557677745819, |
| "max": 0.9555894136428833, |
| "mean": 0.668860912322998, |
| "std": 0.052772559225559235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.32439276576042175, |
| "max": 0.30925771594047546, |
| "mean": -1.0448575267218985e-06, |
| "std": 0.04094531387090683, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12486255913972855, |
| "max": 0.025668619200587273, |
| "mean": -0.030689772218465805, |
| "std": 0.019822947680950165, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.4394906163215637, |
| "max": 0.4453367292881012, |
| "mean": 9.582463098922744e-05, |
| "std": 0.03511909395456314, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22461967170238495, |
| "max": 0.051830437034368515, |
| "mean": -0.0011815722100436687, |
| "std": 0.018466372042894363, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.3391477167606354, |
| "max": 0.739862322807312, |
| "mean": 0.558701753616333, |
| "std": 0.04139617085456848, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27299270033836365, |
| "max": 0.27884820103645325, |
| "mean": 2.0352346837171353e-05, |
| "std": 0.04105763137340546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13679315149784088, |
| "max": 0.13977941870689392, |
| "mean": 0.0004920524079352617, |
| "std": 0.026632016524672508, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.4905266761779785, |
| "max": 0.35576674342155457, |
| "mean": 8.910118776839226e-05, |
| "std": 0.04069532826542854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.297072649002075, |
| "max": 1.7451610565185547, |
| "mean": -0.02107967808842659, |
| "std": 0.5001281499862671, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.21811611950397491, |
| "max": 0.19743309915065765, |
| "mean": -4.0164730307878926e-05, |
| "std": 0.034233368933200836, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.04114250838756561, |
| "max": 0.03886367008090019, |
| "mean": -0.0001361201866529882, |
| "std": 0.01288355328142643, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17762865126132965, |
| "max": 0.1828955113887787, |
| "mean": 4.802473995368928e-05, |
| "std": 0.031556740403175354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.17992889881134033, |
| "max": 0.18389376997947693, |
| "mean": -0.002214584732428193, |
| "std": 0.054829709231853485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4741877317428589, |
| "max": 1.025841474533081, |
| "mean": 0.6452314257621765, |
| "std": 0.050352681428194046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.27164527773857117, |
| "max": 0.30913278460502625, |
| "mean": 0.00011245411587879062, |
| "std": 0.04068151116371155, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10526741296052933, |
| "max": 0.0267398189753294, |
| "mean": -0.029518909752368927, |
| "std": 0.017934836447238922, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.33933401107788086, |
| "max": 0.3291725814342499, |
| "mean": 5.2628944104071707e-05, |
| "std": 0.034412726759910583, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18180128931999207, |
| "max": 0.04250966012477875, |
| "mean": -0.0010595148196443915, |
| "std": 0.017209524288773537, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.3251764476299286, |
| "max": 0.686564564704895, |
| "mean": 0.5111627578735352, |
| "std": 0.03695236146450043, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23392871022224426, |
| "max": 0.22538572549819946, |
| "mean": -3.6134006222710013e-05, |
| "std": 0.03917535021901131, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11511560529470444, |
| "max": 0.13181880116462708, |
| "mean": 0.0001504624669905752, |
| "std": 0.029160819947719574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3522934317588806, |
| "max": 0.28486883640289307, |
| "mean": 6.553360890393378e-06, |
| "std": 0.03924445062875748, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.132338523864746, |
| "max": 3.5437686443328857, |
| "mean": -0.011590493842959404, |
| "std": 0.6826138496398926, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21074621379375458, |
| "max": 0.20937031507492065, |
| "mean": 3.468795330263674e-05, |
| "std": 0.03448443114757538, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03586054965853691, |
| "max": 0.04796382784843445, |
| "mean": 0.0007884950027801096, |
| "std": 0.012871338985860348, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21027511358261108, |
| "max": 0.1930612176656723, |
| "mean": -9.818363650992978e-07, |
| "std": 0.03169528394937515, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18642070889472961, |
| "max": 0.1772109568119049, |
| "mean": -0.0028416060376912355, |
| "std": 0.058615587651729584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4746147096157074, |
| "max": 1.0414643287658691, |
| "mean": 0.6513273119926453, |
| "std": 0.04965711012482643, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24834445118904114, |
| "max": 0.3291000425815582, |
| "mean": 0.00018075655680149794, |
| "std": 0.04056985676288605, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12541179358959198, |
| "max": 0.02496136911213398, |
| "mean": -0.030498577281832695, |
| "std": 0.017614111304283142, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.42039719223976135, |
| "max": 0.48143431544303894, |
| "mean": 1.1528718459885567e-06, |
| "std": 0.03539694473147392, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15133655071258545, |
| "max": 0.04343574121594429, |
| "mean": 4.278856431483291e-05, |
| "std": 0.014885962940752506, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.3155560791492462, |
| "max": 0.6816220879554749, |
| "mean": 0.5528930425643921, |
| "std": 0.04069439694285393, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20635411143302917, |
| "max": 0.21984520554542542, |
| "mean": 3.190069764968939e-05, |
| "std": 0.038299400359392166, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13771876692771912, |
| "max": 0.1125807911157608, |
| "mean": 2.632014366099611e-05, |
| "std": 0.025809206068515778, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.4028305411338806, |
| "max": 0.3708246946334839, |
| "mean": 2.552652767917607e-05, |
| "std": 0.03817948326468468, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.770878791809082, |
| "max": 2.8686978816986084, |
| "mean": 0.001155341975390911, |
| "std": 0.5168278217315674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.2037316858768463, |
| "max": 0.1975933313369751, |
| "mean": 2.9730301321251318e-05, |
| "std": 0.03429727256298065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.05053940787911415, |
| "max": 0.039879124611616135, |
| "mean": -0.00042120314901694655, |
| "std": 0.013415130786597729, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19603155553340912, |
| "max": 0.20171792805194855, |
| "mean": -1.2456664080673363e-05, |
| "std": 0.0318053737282753, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19293668866157532, |
| "max": 0.19509124755859375, |
| "mean": -0.0029669972136616707, |
| "std": 0.06252549588680267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.348905086517334, |
| "max": 1.0837733745574951, |
| "mean": 0.6670998334884644, |
| "std": 0.05524366348981857, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22561387717723846, |
| "max": 0.25142621994018555, |
| "mean": 0.00035854580346494913, |
| "std": 0.04075940325856209, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09107953310012817, |
| "max": 0.04363439604640007, |
| "mean": -0.030079854652285576, |
| "std": 0.017611680552363396, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.353360116481781, |
| "max": 0.30395275354385376, |
| "mean": -4.4715885451296344e-05, |
| "std": 0.03712251037359238, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16167744994163513, |
| "max": 0.06346611678600311, |
| "mean": -7.887817628215998e-05, |
| "std": 0.019426995888352394, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.3487050533294678, |
| "max": 0.7219327092170715, |
| "mean": 0.5423474907875061, |
| "std": 0.0390637181699276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21929427981376648, |
| "max": 0.22339415550231934, |
| "mean": -1.152800177806057e-05, |
| "std": 0.039230845868587494, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11837491393089294, |
| "max": 0.17054983973503113, |
| "mean": 0.0002821336966007948, |
| "std": 0.025116898119449615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24647969007492065, |
| "max": 0.3006535768508911, |
| "mean": -3.7006771890446544e-05, |
| "std": 0.038930293172597885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.505005121231079, |
| "max": 3.7144510746002197, |
| "mean": 0.01584703102707863, |
| "std": 0.782384991645813, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21911682188510895, |
| "max": 0.237393319606781, |
| "mean": -1.3131610103300773e-05, |
| "std": 0.03630334511399269, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04719853773713112, |
| "max": 0.051371362060308456, |
| "mean": 0.00048090319614857435, |
| "std": 0.013523470610380173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21416644752025604, |
| "max": 0.21722018718719482, |
| "mean": 5.635957859340124e-05, |
| "std": 0.033615801483392715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.2113313376903534, |
| "max": 0.2312089204788208, |
| "mean": -0.005099226720631123, |
| "std": 0.06185970827937126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.3619273602962494, |
| "max": 1.1010714769363403, |
| "mean": 0.699254035949707, |
| "std": 0.053593844175338745, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23512133955955505, |
| "max": 0.24475844204425812, |
| "mean": 0.00046337698586285114, |
| "std": 0.04126880317926407, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09808072447776794, |
| "max": 0.06809643656015396, |
| "mean": -0.03143021836876869, |
| "std": 0.01812811754643917, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.3017187714576721, |
| "max": 0.3516466021537781, |
| "mean": -8.262180926976725e-05, |
| "std": 0.040274444967508316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15225963294506073, |
| "max": 0.149653360247612, |
| "mean": 0.00026317729498259723, |
| "std": 0.023038743063807487, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9992579817771912, |
| "max": 1.0015391111373901, |
| "mean": 1.0000743865966797, |
| "std": 0.0006371568888425827, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.03125908225774765, |
| "max": 0.0312553308904171, |
| "mean": -1.9290733689558692e-05, |
| "std": 0.01804095134139061, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.03122831881046295, |
| "max": 0.030987922102212906, |
| "mean": -0.001084161689504981, |
| "std": 0.017950566485524178, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031255997717380524, |
| "max": 0.031259775161743164, |
| "mean": 3.548155291355215e-06, |
| "std": 0.01804135926067829, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031154237687587738, |
| "max": 0.03117459826171398, |
| "mean": 0.0003339198010507971, |
| "std": 0.018062766641378403, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.000624487001914531, |
| "max": 0.0007099520298652351, |
| "mean": 4.385071406431962e-06, |
| "std": 0.00018961619934998453, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.99758380651474, |
| "max": 1.0029877424240112, |
| "mean": 0.9999918341636658, |
| "std": 0.0008515770896337926, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.0335661917924881, |
| "max": 0.03370394930243492, |
| "mean": -6.065281013434287e-06, |
| "std": 0.018047738820314407, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03307846933603287, |
| "max": 0.033399470150470734, |
| "mean": -0.00018566125072538853, |
| "std": 0.017954055219888687, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.001481670537032187, |
| "max": 0.001570003922097385, |
| "mean": 1.885646042865119e-06, |
| "std": 0.0002906274457927793, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0005839330260641873, |
| "max": 0.0007720313151367009, |
| "mean": 7.4740901254699565e-06, |
| "std": 0.00017145519086625427, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3833076059818268, |
| "max": 0.7191433310508728, |
| "mean": 0.5806823968887329, |
| "std": 0.03885458782315254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23893095552921295, |
| "max": 0.19658136367797852, |
| "mean": 2.6083449483849108e-05, |
| "std": 0.03746617212891579, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11882374435663223, |
| "max": 0.16677531599998474, |
| "mean": 0.0009812903590500355, |
| "std": 0.027557166293263435, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.24655194580554962, |
| "max": 0.49992480874061584, |
| "mean": -5.045527359470725e-05, |
| "std": 0.0376235656440258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.941847801208496, |
| "max": 3.7689895629882812, |
| "mean": -0.0035720239393413067, |
| "std": 0.6813404560089111, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22746945917606354, |
| "max": 0.25183355808258057, |
| "mean": -1.1859048754558899e-05, |
| "std": 0.037434790283441544, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07158222794532776, |
| "max": 0.08058217912912369, |
| "mean": -0.0005094742518849671, |
| "std": 0.01565464586019516, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22813726961612701, |
| "max": 0.2576807737350464, |
| "mean": -2.8760241548297927e-05, |
| "std": 0.03542162850499153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20052045583724976, |
| "max": 0.21483510732650757, |
| "mean": -0.005527016241103411, |
| "std": 0.06832844763994217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.40501996874809265, |
| "max": 1.1893715858459473, |
| "mean": 0.7378885746002197, |
| "std": 0.055228959769010544, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.22087574005126953, |
| "max": 0.2456100732088089, |
| "mean": 0.0005211896495893598, |
| "std": 0.04133577644824982, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.1032254695892334, |
| "max": 0.024186622351408005, |
| "mean": -0.03266698122024536, |
| "std": 0.018890688195824623, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.44966718554496765, |
| "max": 0.4224751591682434, |
| "mean": -0.00043509487295523286, |
| "std": 0.04689602553844452, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2515262961387634, |
| "max": 0.47013524174690247, |
| "mean": 0.0032045203261077404, |
| "std": 0.04452691972255707, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.31688347458839417, |
| "max": 0.33314356207847595, |
| "mean": -2.516225868021138e-05, |
| "std": 0.021287811920046806, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.32447901368141174, |
| "max": 0.6856404542922974, |
| "mean": 0.5710100531578064, |
| "std": 0.04470637068152428, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.1645602136850357, |
| "max": 0.17448709905147552, |
| "mean": -4.871720739174634e-05, |
| "std": 0.033182382583618164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.1869296431541443, |
| "max": 0.14326152205467224, |
| "mean": 3.4562835935503244e-05, |
| "std": 0.029701465740799904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.3810564875602722, |
| "max": 0.24595260620117188, |
| "mean": -9.857794793788344e-06, |
| "std": 0.032763585448265076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6554818153381348, |
| "max": 3.289768695831299, |
| "mean": -0.014251366257667542, |
| "std": 0.9850791096687317, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.2347586303949356, |
| "max": 0.24735252559185028, |
| "mean": -1.8151138647226617e-05, |
| "std": 0.041698191314935684, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07252755761146545, |
| "max": 0.154456228017807, |
| "mean": 0.0006656115292571485, |
| "std": 0.025164911523461342, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2663172781467438, |
| "max": 0.24813731014728546, |
| "mean": -1.5164550859481096e-05, |
| "std": 0.04013926163315773, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18959401547908783, |
| "max": 0.19463232159614563, |
| "mean": -0.0012374802026897669, |
| "std": 0.06668464839458466, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32920119166374207, |
| "max": 0.99962317943573, |
| "mean": 0.7191556692123413, |
| "std": 0.052332110702991486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.2317013144493103, |
| "max": 0.24530917406082153, |
| "mean": 0.00018264415848534554, |
| "std": 0.04090017080307007, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11453195661306381, |
| "max": 0.01904553547501564, |
| "mean": -0.04247689247131348, |
| "std": 0.01886470802128315, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.38964197039604187, |
| "max": 0.4074561595916748, |
| "mean": -2.184425829909742e-05, |
| "std": 0.048533279448747635, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.692954957485199, |
| "max": 0.41268306970596313, |
| "mean": 0.0008480865508317947, |
| "std": 0.060282234102487564, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0014125935267657042, |
| "max": 1.0007404088974, |
| "mean": 0.00048819734365679324, |
| "std": 0.02208949252963066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9992830157279968, |
| "max": 1.0015500783920288, |
| "mean": 1.0000728368759155, |
| "std": 0.0006243661628104746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03125380352139473, |
| "max": 0.03125770390033722, |
| "mean": -2.1020379790570587e-05, |
| "std": 0.018032291904091835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031215310096740723, |
| "max": 0.031232142820954323, |
| "mean": -0.0006769997999072075, |
| "std": 0.017826829105615616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03125686198472977, |
| "max": 0.03126228600740433, |
| "mean": -8.83147367858328e-06, |
| "std": 0.018031319603323936, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031232059001922607, |
| "max": 0.031244752928614616, |
| "mean": -0.0007297524134628475, |
| "std": 0.017941756173968315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.0005121154244989157, |
| "max": 0.000419745163526386, |
| "mean": -3.856697276205523e-06, |
| "std": 0.00015613996947649866, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9973074197769165, |
| "max": 1.0023618936538696, |
| "mean": 0.9995496869087219, |
| "std": 0.0008333163568750024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03326094523072243, |
| "max": 0.03284362331032753, |
| "mean": -2.9510356398532167e-06, |
| "std": 0.018027810379862785, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.03245115652680397, |
| "max": 0.03129417076706886, |
| "mean": -0.0005187825299799442, |
| "std": 0.018035637214779854, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.001710034441202879, |
| "max": 0.001517186756245792, |
| "mean": -1.1187451036676066e-06, |
| "std": 0.00028821235173381865, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.0004745775950141251, |
| "max": 0.00038665023748762906, |
| "mean": -3.4791635243891506e-06, |
| "std": 0.00014281406765803695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23430979251861572, |
| "max": 0.27249982953071594, |
| "mean": 6.625029982387787e-06, |
| "std": 0.018810328096151352, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32144901156425476, |
| "max": 0.6939529180526733, |
| "mean": 0.5816143751144409, |
| "std": 0.04593788832426071, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18192411959171295, |
| "max": 0.19777271151542664, |
| "mean": -1.1577552868402563e-05, |
| "std": 0.03318414464592934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16048845648765564, |
| "max": 0.12929441034793854, |
| "mean": -0.0010730556678026915, |
| "std": 0.03413493558764458, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.3323971629142761, |
| "max": 0.31116729974746704, |
| "mean": -1.0262037903885357e-05, |
| "std": 0.032234691083431244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.802551746368408, |
| "max": 8.761726379394531, |
| "mean": 0.0934542790055275, |
| "std": 1.6194651126861572, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23397472500801086, |
| "max": 0.24182309210300446, |
| "mean": 4.162585537414998e-05, |
| "std": 0.040856119245290756, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07595551013946533, |
| "max": 0.06575819849967957, |
| "mean": 0.00048204767517745495, |
| "std": 0.019416553899645805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24591538310050964, |
| "max": 0.23388886451721191, |
| "mean": -3.2548523449804634e-06, |
| "std": 0.039430882781744, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16298307478427887, |
| "max": 0.16088849306106567, |
| "mean": 0.0016233095666393638, |
| "std": 0.06529011577367783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5571500658988953, |
| "max": 0.9436134696006775, |
| "mean": 0.7128155827522278, |
| "std": 0.0401235930621624, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22800666093826294, |
| "max": 0.2548002004623413, |
| "mean": -4.557950160233304e-05, |
| "std": 0.0405743233859539, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13472457230091095, |
| "max": 0.022118322551250458, |
| "mean": -0.04135219752788544, |
| "std": 0.01838735118508339, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.42162197828292847, |
| "max": 0.39239510893821716, |
| "mean": -4.3281570469844155e-06, |
| "std": 0.0477834977209568, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6071848273277283, |
| "max": 0.6512866020202637, |
| "mean": 0.0015846553724259138, |
| "std": 0.05683678016066551, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.25181835889816284, |
| "max": 0.32083579897880554, |
| "mean": -6.167530045786407e-06, |
| "std": 0.01961352303624153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.35955187678337097, |
| "max": 0.6821539998054504, |
| "mean": 0.5706835389137268, |
| "std": 0.04298859089612961, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22016161680221558, |
| "max": 0.17701253294944763, |
| "mean": -3.445023321546614e-05, |
| "std": 0.03429866582155228, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16314493119716644, |
| "max": 0.23276831209659576, |
| "mean": 0.000363295606803149, |
| "std": 0.032813575118780136, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.26391661167144775, |
| "max": 0.23982854187488556, |
| "mean": -5.2968603995395824e-05, |
| "std": 0.03389734774827957, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.854297161102295, |
| "max": 5.090524673461914, |
| "mean": 0.04387897625565529, |
| "std": 1.229095458984375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24643158912658691, |
| "max": 0.2503342926502228, |
| "mean": 7.21608375897631e-05, |
| "std": 0.04398628696799278, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06249094381928444, |
| "max": 0.05441959202289581, |
| "mean": 0.0006457456620410085, |
| "std": 0.017188476398587227, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.28642886877059937, |
| "max": 0.2721048593521118, |
| "mean": -5.0093196477973834e-05, |
| "std": 0.04298442229628563, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16100700199604034, |
| "max": 0.1703459769487381, |
| "mean": -0.002886796835809946, |
| "std": 0.05929969996213913, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5198073983192444, |
| "max": 0.9330060482025146, |
| "mean": 0.7133970260620117, |
| "std": 0.03842265531420708, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.2378769814968109, |
| "max": 0.2487393021583557, |
| "mean": 0.00046459035365842283, |
| "std": 0.04045308753848076, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.1450035721063614, |
| "max": 0.0410858653485775, |
| "mean": -0.03969570994377136, |
| "std": 0.020541729405522346, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5323667526245117, |
| "max": 0.5824663043022156, |
| "mean": 5.913888344366569e-06, |
| "std": 0.048858821392059326, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5189786553382874, |
| "max": 0.49333813786506653, |
| "mean": 0.0023667975328862667, |
| "std": 0.0534440316259861, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.2737034261226654, |
| "max": 0.31558021903038025, |
| "mean": 1.935112777573522e-06, |
| "std": 0.02005006931722164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36589479446411133, |
| "max": 0.7117040157318115, |
| "mean": 0.5931321382522583, |
| "std": 0.0459616482257843, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21081827580928802, |
| "max": 0.19904154539108276, |
| "mean": 3.062835457967594e-05, |
| "std": 0.03486720845103264, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18713217973709106, |
| "max": 0.20344023406505585, |
| "mean": 0.000952105619944632, |
| "std": 0.031497493386268616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.28968340158462524, |
| "max": 0.33981209993362427, |
| "mean": -4.6875291445758194e-05, |
| "std": 0.03458764776587486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.876854181289673, |
| "max": 3.3869495391845703, |
| "mean": 0.014455719850957394, |
| "std": 0.8583089709281921, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22449138760566711, |
| "max": 0.2498161643743515, |
| "mean": -3.885651949531166e-06, |
| "std": 0.04222925379872322, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05526581034064293, |
| "max": 0.04652895778417587, |
| "mean": -2.1849831682629883e-05, |
| "std": 0.015840303152799606, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2932976484298706, |
| "max": 0.29035061597824097, |
| "mean": -7.6227315730648115e-06, |
| "std": 0.041944343596696854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12483495473861694, |
| "max": 0.2589971721172333, |
| "mean": -0.003243764629587531, |
| "std": 0.05317297205328941, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.45624497532844543, |
| "max": 0.8444257378578186, |
| "mean": 0.705470621585846, |
| "std": 0.03522758185863495, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5120490789413452, |
| "max": 0.3481951355934143, |
| "mean": 0.00034297475940547884, |
| "std": 0.040198490023612976, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18573546409606934, |
| "max": 0.03953690081834793, |
| "mean": -0.03938683122396469, |
| "std": 0.021360911428928375, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5439703464508057, |
| "max": 0.5556368231773376, |
| "mean": -7.127778371796012e-05, |
| "std": 0.05073383450508118, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5116579532623291, |
| "max": 0.6641839742660522, |
| "mean": 0.0024420106783509254, |
| "std": 0.04951965808868408, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.33250588178634644, |
| "max": 0.2653454840183258, |
| "mean": 3.314120021968847e-06, |
| "std": 0.019387103617191315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.32199233770370483, |
| "max": 0.7664577960968018, |
| "mean": 0.6510406136512756, |
| "std": 0.04532792791724205, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.24981507658958435, |
| "max": 0.21987095475196838, |
| "mean": -1.8786176951834932e-06, |
| "std": 0.03650160878896713, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32696181535720825, |
| "max": 0.286738783121109, |
| "mean": -0.0006850577774457633, |
| "std": 0.038556959480047226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.310026079416275, |
| "max": 0.3700660765171051, |
| "mean": 6.51663140160963e-05, |
| "std": 0.03624221682548523, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.716763496398926, |
| "max": 5.807004928588867, |
| "mean": 0.03795414790511131, |
| "std": 1.4130035638809204, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22150921821594238, |
| "max": 0.20585696399211884, |
| "mean": -7.512117736041546e-05, |
| "std": 0.0424848347902298, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07765647768974304, |
| "max": 0.05150295048952103, |
| "mean": -0.0009257810888811946, |
| "std": 0.01641261577606201, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.3305484354496002, |
| "max": 0.3292558491230011, |
| "mean": -4.674302545026876e-06, |
| "std": 0.042791128158569336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2847578823566437, |
| "max": 0.11202681809663773, |
| "mean": -0.0012038333807140589, |
| "std": 0.04701409116387367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4860534965991974, |
| "max": 0.8868187069892883, |
| "mean": 0.7373650074005127, |
| "std": 0.03824280574917793, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3623279929161072, |
| "max": 0.2745623290538788, |
| "mean": 5.109083213028498e-05, |
| "std": 0.04064391553401947, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24753618240356445, |
| "max": 0.046382758766412735, |
| "mean": -0.039263010025024414, |
| "std": 0.023289302363991737, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6261420249938965, |
| "max": 0.5965140461921692, |
| "mean": -5.986806354485452e-05, |
| "std": 0.05311597138643265, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7094455361366272, |
| "max": 0.2657928168773651, |
| "mean": 0.0009170880075544119, |
| "std": 0.05122483894228935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3433697819709778, |
| "max": 0.30368947982788086, |
| "mean": 2.3889015210443176e-07, |
| "std": 0.019135670736432076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.3497507870197296, |
| "max": 0.7829343676567078, |
| "mean": 0.638809323310852, |
| "std": 0.04924893379211426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20543725788593292, |
| "max": 0.20679403841495514, |
| "mean": -5.990585486870259e-05, |
| "std": 0.037696100771427155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25862252712249756, |
| "max": 0.26803287863731384, |
| "mean": -0.00040157014154829085, |
| "std": 0.04459596797823906, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3540649712085724, |
| "max": 0.32237085700035095, |
| "mean": -6.968005436647218e-06, |
| "std": 0.03720472380518913, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.260965824127197, |
| "max": 4.203993797302246, |
| "mean": -0.026412349194288254, |
| "std": 1.006641149520874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23861756920814514, |
| "max": 0.24335098266601562, |
| "mean": -2.5078054022742435e-05, |
| "std": 0.043209534138441086, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06233251839876175, |
| "max": 0.056672900915145874, |
| "mean": 0.00034255694481544197, |
| "std": 0.014151446521282196, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.4369187653064728, |
| "max": 0.373432457447052, |
| "mean": 1.4437458048632834e-05, |
| "std": 0.044120825827121735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09643160551786423, |
| "max": 0.1759035885334015, |
| "mean": -0.0006591043202206492, |
| "std": 0.035157084465026855, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4216560423374176, |
| "max": 1.0694262981414795, |
| "mean": 0.7483175992965698, |
| "std": 0.04205932468175888, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2665790617465973, |
| "max": 0.29692915081977844, |
| "mean": -7.955127512104809e-05, |
| "std": 0.04080403223633766, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18574701249599457, |
| "max": 0.043912798166275024, |
| "mean": -0.03681863471865654, |
| "std": 0.025608953088521957, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.45691967010498047, |
| "max": 0.486579954624176, |
| "mean": 4.3823405576404184e-05, |
| "std": 0.05420882627367973, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.28651049733161926, |
| "max": 0.5512732267379761, |
| "mean": -0.0008804658427834511, |
| "std": 0.04782622680068016, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2928602397441864, |
| "max": 0.3227991461753845, |
| "mean": 6.5394251578254625e-06, |
| "std": 0.019969874992966652, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2909410297870636, |
| "max": 0.7601505517959595, |
| "mean": 0.6508233547210693, |
| "std": 0.05213586986064911, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.2434667944908142, |
| "max": 0.2616351246833801, |
| "mean": -6.0445322560553905e-06, |
| "std": 0.039612967520952225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2675444483757019, |
| "max": 0.1998518854379654, |
| "mean": -0.0008808361599221826, |
| "std": 0.05175328254699707, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2721408009529114, |
| "max": 0.2537347078323364, |
| "mean": 4.015575541416183e-06, |
| "std": 0.03871006891131401, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.963685989379883, |
| "max": 15.945606231689453, |
| "mean": 0.033225029706954956, |
| "std": 1.9889812469482422, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20711851119995117, |
| "max": 0.22583316266536713, |
| "mean": -7.227503374451771e-05, |
| "std": 0.04055361449718475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06934971362352371, |
| "max": 0.06323137879371643, |
| "mean": 0.00015275523765012622, |
| "std": 0.014742234721779823, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.4650252163410187, |
| "max": 0.3206908702850342, |
| "mean": 1.950068872247357e-05, |
| "std": 0.04058856889605522, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06406640261411667, |
| "max": 0.11521138995885849, |
| "mean": 0.0011922243284061551, |
| "std": 0.02470523677766323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.3746338486671448, |
| "max": 0.9322722554206848, |
| "mean": 0.7508488893508911, |
| "std": 0.040187884122133255, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2793060839176178, |
| "max": 0.2731705904006958, |
| "mean": -0.00016857523587532341, |
| "std": 0.04099458083510399, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.198820099234581, |
| "max": 0.05085344612598419, |
| "mean": -0.03202417492866516, |
| "std": 0.025111379101872444, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6573337912559509, |
| "max": 0.5352881550788879, |
| "mean": -4.8675712605472654e-05, |
| "std": 0.05284544453024864, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19310522079467773, |
| "max": 0.5820621848106384, |
| "mean": -0.000515035935677588, |
| "std": 0.04106917232275009, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.4177176356315613, |
| "max": 0.37193918228149414, |
| "mean": 6.035062597220531e-06, |
| "std": 0.02162161096930504, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21426703035831451, |
| "max": 0.7471129894256592, |
| "mean": 0.649559497833252, |
| "std": 0.05437251552939415, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20954684913158417, |
| "max": 0.19578267633914948, |
| "mean": 4.0035050915321335e-05, |
| "std": 0.03946496546268463, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.3292764723300934, |
| "max": 0.2593560516834259, |
| "mean": -0.0032243705354630947, |
| "std": 0.056255340576171875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.20562483370304108, |
| "max": 0.2547135651111603, |
| "mean": 5.434878767118789e-05, |
| "std": 0.038567062467336655, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.242823600769043, |
| "max": 6.931674957275391, |
| "mean": 0.04833440110087395, |
| "std": 1.384947657585144, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20960542559623718, |
| "max": 0.2301599383354187, |
| "mean": -5.232992862147512e-06, |
| "std": 0.04131288081407547, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04387415945529938, |
| "max": 0.03594405576586723, |
| "mean": 4.847475793212652e-06, |
| "std": 0.012800573371350765, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.3978384733200073, |
| "max": 0.34482401609420776, |
| "mean": -5.554188828682527e-05, |
| "std": 0.04238930344581604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.05505719780921936, |
| "max": 0.06286165118217468, |
| "mean": 0.00037010322557762265, |
| "std": 0.018672354519367218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3501102924346924, |
| "max": 1.0451011657714844, |
| "mean": 0.7893368601799011, |
| "std": 0.04874463006854057, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.33344367146492004, |
| "max": 0.3858579397201538, |
| "mean": -0.00016948734992183745, |
| "std": 0.041480328887701035, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15724380314350128, |
| "max": 0.05914618447422981, |
| "mean": -0.03183374181389809, |
| "std": 0.0251409150660038, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6964119076728821, |
| "max": 0.4686836302280426, |
| "mean": -9.159947512671351e-05, |
| "std": 0.05179150402545929, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24826322495937347, |
| "max": 0.3285461962223053, |
| "mean": -0.00024742598179727793, |
| "std": 0.0414327010512352, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2872468829154968, |
| "max": 0.35023656487464905, |
| "mean": -2.1327541617210954e-06, |
| "std": 0.024238986894488335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19656415283679962, |
| "max": 0.7792240381240845, |
| "mean": 0.6702939867973328, |
| "std": 0.05869279056787491, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22861742973327637, |
| "max": 0.2311892956495285, |
| "mean": -1.9813087419606745e-05, |
| "std": 0.04044097661972046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.2196549028158188, |
| "max": 0.24067850410938263, |
| "mean": 0.0007784939371049404, |
| "std": 0.055799830704927444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21562136709690094, |
| "max": 0.22666974365711212, |
| "mean": -7.154869672376662e-05, |
| "std": 0.039377160370349884, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.90437126159668, |
| "max": 9.067243576049805, |
| "mean": -0.0012503080070018768, |
| "std": 1.8481035232543945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2693117558956146, |
| "max": 0.2589534521102905, |
| "mean": 4.357095167506486e-05, |
| "std": 0.038407646119594574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05761706829071045, |
| "max": 0.05768207088112831, |
| "mean": 0.0003497683210298419, |
| "std": 0.01472416240721941, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.2650063633918762, |
| "max": 0.2886802554130554, |
| "mean": -6.175818271003664e-05, |
| "std": 0.039074014872312546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04376920685172081, |
| "max": 0.03731464967131615, |
| "mean": -8.56523183756508e-05, |
| "std": 0.013365812599658966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.33950644731521606, |
| "max": 1.0926629304885864, |
| "mean": 0.8637055158615112, |
| "std": 0.06385361403226852, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.42327165603637695, |
| "max": 0.41919341683387756, |
| "mean": 0.00031273282365873456, |
| "std": 0.0435028038918972, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21477100253105164, |
| "max": 0.17062197625637054, |
| "mean": -0.02948208898305893, |
| "std": 0.0319497250020504, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5996779799461365, |
| "max": 0.5596659183502197, |
| "mean": -0.00015256987535394728, |
| "std": 0.05344602093100548, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17847254872322083, |
| "max": 0.37667688727378845, |
| "mean": 0.0013643621932715178, |
| "std": 0.03730973228812218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.3942721486091614, |
| "max": 0.36895284056663513, |
| "mean": 3.6433208151720464e-05, |
| "std": 0.028621351346373558, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2903022766113281, |
| "max": 0.826566219329834, |
| "mean": 0.7055737376213074, |
| "std": 0.06789274513721466, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9261494278907776, |
| "max": 1.0264488458633423, |
| "mean": -2.5618217478040606e-05, |
| "std": 0.04762551560997963, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8783111572265625, |
| "max": 0.81496262550354, |
| "mean": -0.0003140262851957232, |
| "std": 0.09553777426481247, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.269389808177948, |
| "max": 0.24095474183559418, |
| "mean": -2.2922709831618704e-05, |
| "std": 0.0389564111828804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.739809036254883, |
| "max": 22.848268508911133, |
| "mean": -0.0918719619512558, |
| "std": 4.069859504699707, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22777613997459412, |
| "max": 0.24508334696292877, |
| "mean": -2.581250009825453e-05, |
| "std": 0.038639314472675323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06041834130883217, |
| "max": 0.04605862498283386, |
| "mean": -0.00014601447037421167, |
| "std": 0.014698855578899384, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.3384549617767334, |
| "max": 0.37450915575027466, |
| "mean": 7.243736035889015e-06, |
| "std": 0.04081535339355469, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.046464741230010986, |
| "max": 0.19570393860340118, |
| "mean": 0.0002726423554122448, |
| "std": 0.013569480739533901, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.37450751662254333, |
| "max": 1.1300209760665894, |
| "mean": 0.8900179862976074, |
| "std": 0.06398562341928482, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.4477962851524353, |
| "max": 0.5424686074256897, |
| "mean": 2.4588229280197993e-05, |
| "std": 0.04556749016046524, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22407831251621246, |
| "max": 0.08827000111341476, |
| "mean": -0.032015662640333176, |
| "std": 0.03776349499821663, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7253148555755615, |
| "max": 0.6892704367637634, |
| "mean": 3.4532837162259966e-05, |
| "std": 0.051778074353933334, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.174549400806427, |
| "max": 0.21855904161930084, |
| "mean": 3.998563624918461e-05, |
| "std": 0.03177855163812637, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.34027042984962463, |
| "max": 0.37425076961517334, |
| "mean": 4.2934465454891324e-05, |
| "std": 0.03414500877261162, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.31756624579429626, |
| "max": 1.2868921756744385, |
| "mean": 0.6014678478240967, |
| "std": 0.08346211910247803, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2833472490310669, |
| "max": 0.26022085547447205, |
| "mean": -3.076446546401712e-06, |
| "std": 0.03598489984869957, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.2355138659477234, |
| "max": 0.2053714245557785, |
| "mean": 0.0002318831393495202, |
| "std": 0.05601060390472412, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.43542587757110596, |
| "max": 0.32521018385887146, |
| "mean": 2.451425461913459e-05, |
| "std": 0.034135881811380386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.544894218444824, |
| "max": 7.312623977661133, |
| "mean": -0.007366415113210678, |
| "std": 0.6992328763008118, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.34384819865226746, |
| "max": 0.3634955585002899, |
| "mean": 0.00010338952415622771, |
| "std": 0.047827959060668945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07375156134366989, |
| "max": 0.06036222726106644, |
| "mean": 0.0009326444123871624, |
| "std": 0.014949453994631767, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.25554174184799194, |
| "max": 0.28655222058296204, |
| "mean": 4.425931365403812e-06, |
| "std": 0.04155518114566803, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05532686412334442, |
| "max": 0.06282556056976318, |
| "mean": 0.00014147879846859723, |
| "std": 0.0071739982813596725, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49367740750312805, |
| "max": 1.2208529710769653, |
| "mean": 1.0134257078170776, |
| "std": 0.11743961274623871, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0936145782470703, |
| "max": 1.0469423532485962, |
| "mean": -4.9777743697632104e-05, |
| "std": 0.05241077393293381, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.2236752212047577, |
| "max": 0.1727852076292038, |
| "mean": -0.027246128767728806, |
| "std": 0.03635065257549286, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8845650553703308, |
| "max": 0.9224934577941895, |
| "mean": -0.00014609616482630372, |
| "std": 0.05328214913606644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17102202773094177, |
| "max": 0.3799096643924713, |
| "mean": 0.0033686563838273287, |
| "std": 0.039898186922073364, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7772517800331116, |
| "max": 0.7235067486763, |
| "mean": 1.9145372789353132e-05, |
| "std": 0.04616532474756241, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3385593295097351, |
| "max": 1.4277493953704834, |
| "mean": 0.948319673538208, |
| "std": 0.20673821866512299, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7455862760543823, |
| "max": 1.7045449018478394, |
| "mean": 0.00022695529332850128, |
| "std": 0.1586858183145523, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.1996264457702637, |
| "max": 1.0995841026306152, |
| "mean": -0.009535307995975018, |
| "std": 0.20383313298225403, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.42129284143447876, |
| "max": 0.42636537551879883, |
| "mean": 6.450986256822944e-05, |
| "std": 0.04801839217543602, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.74388885498047, |
| "max": 19.53899383544922, |
| "mean": -0.24829958379268646, |
| "std": 4.776181221008301, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32387763261795044, |
| "max": 0.43839961290359497, |
| "mean": -1.2020052054140251e-05, |
| "std": 0.04616132006049156, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03405960276722908, |
| "max": 0.03712477907538414, |
| "mean": 0.000642063794657588, |
| "std": 0.012921381741762161, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7034934759140015, |
| "max": 0.6645202040672302, |
| "mean": 4.349739174358547e-05, |
| "std": 0.05788357928395271, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07222776859998703, |
| "max": 0.06750176101922989, |
| "mean": -0.00013276952086016536, |
| "std": 0.012919425964355469, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38018617033958435, |
| "max": 1.3909327983856201, |
| "mean": 1.0665558576583862, |
| "std": 0.21971333026885986, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.616411030292511, |
| "max": 0.71701979637146, |
| "mean": 0.00011130145139759406, |
| "std": 0.05802119895815849, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.2196073830127716, |
| "max": 0.22519457340240479, |
| "mean": 0.006242883857339621, |
| "std": 0.049728427082300186, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6296560168266296, |
| "max": 0.889208972454071, |
| "mean": 1.1700575669237878e-05, |
| "std": 0.023527782410383224, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5068318247795105, |
| "max": 0.47398847341537476, |
| "mean": -0.0030159649904817343, |
| "std": 0.06930278241634369, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5377185344696045, |
| "max": 1.1807185411453247, |
| "mean": 0.782741904258728, |
| "std": 0.09885998070240021, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2669522166252136, |
| "max": 0.2126760631799698, |
| "mean": -0.00022303443984128535, |
| "std": 0.053996436297893524, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23790661990642548, |
| "max": 0.01483356487005949, |
| "mean": -0.043959345668554306, |
| "std": 0.03433229774236679, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |