| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43058744072914124, |
| "max": 0.29903075098991394, |
| "mean": -0.0025567002594470978, |
| "std": 0.04255249723792076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06321248412132263, |
| "max": 0.107655830681324, |
| "mean": 0.0005928671453148127, |
| "std": 0.03411800414323807, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4126332402229309, |
| "max": 0.8362816572189331, |
| "mean": -0.00021067322813905776, |
| "std": 0.024107061326503754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11544923484325409, |
| "max": 0.3215144872665405, |
| "mean": -0.0009406265453435481, |
| "std": 0.01957659050822258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.791715383529663, |
| "max": 2.870434045791626, |
| "mean": -0.0003647833364084363, |
| "std": 0.6153609752655029, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.27896371483802795, |
| "max": 0.3819044828414917, |
| "mean": 0.0004220041155349463, |
| "std": 0.04275014251470566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22224494814872742, |
| "max": 0.20959755778312683, |
| "mean": -0.004497884772717953, |
| "std": 0.040913522243499756, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4279385209083557, |
| "max": 0.4752762019634247, |
| "mean": 2.009033551075845e-06, |
| "std": 0.024508582428097725, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32550832629203796, |
| "max": 0.1569339483976364, |
| "mean": -0.046702392399311066, |
| "std": 0.0515773706138134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4104919135570526, |
| "max": 0.3544883131980896, |
| "mean": -0.00012644486560020596, |
| "std": 0.02360026352107525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.229718416929245, |
| "max": 0.26262396574020386, |
| "mean": -0.02914787270128727, |
| "std": 0.04934746399521828, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2545970380306244, |
| "max": 0.8200467824935913, |
| "mean": 0.5254305601119995, |
| "std": 0.08080543577671051, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.29690292477607727, |
| "max": 0.26533740758895874, |
| "mean": -0.00042425302672199905, |
| "std": 0.0321030355989933, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09272623807191849, |
| "max": 0.12487658858299255, |
| "mean": 0.0006494724657386541, |
| "std": 0.025737110525369644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.29031693935394287, |
| "max": 0.2813326120376587, |
| "mean": -7.68666504882276e-05, |
| "std": 0.03093528188765049, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.899355888366699, |
| "max": 5.814132213592529, |
| "mean": -0.00933213159441948, |
| "std": 1.29543137550354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.42477670311927795, |
| "max": 0.3437301814556122, |
| "mean": 9.746497380547225e-05, |
| "std": 0.029952634125947952, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028919341042637825, |
| "max": 0.027677638456225395, |
| "mean": -0.00031004834454506636, |
| "std": 0.012572667561471462, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4539007246494293, |
| "max": 0.4487650692462921, |
| "mean": 2.293557918164879e-05, |
| "std": 0.023855043575167656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08868313580751419, |
| "max": 0.09119853377342224, |
| "mean": 0.0022740147542208433, |
| "std": 0.019512386992573738, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.2666647434234619, |
| "max": 1.0563400983810425, |
| "mean": 0.5311195850372314, |
| "std": 0.10441721975803375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5746223330497742, |
| "max": 0.6085677742958069, |
| "mean": -0.0004311846860218793, |
| "std": 0.038594383746385574, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18177427351474762, |
| "max": 0.04579279571771622, |
| "mean": -0.029445737600326538, |
| "std": 0.04258440434932709, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1666346788406372, |
| "max": 1.6346005201339722, |
| "mean": 0.0003186643880326301, |
| "std": 0.027693353593349457, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16253599524497986, |
| "max": 0.20575034618377686, |
| "mean": -0.02111678197979927, |
| "std": 0.027937985956668854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22444167733192444, |
| "max": 0.8436422944068909, |
| "mean": 0.4875181317329407, |
| "std": 0.07519698888063431, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.25531357526779175, |
| "max": 0.3059065341949463, |
| "mean": -9.770956239663064e-06, |
| "std": 0.03346950560808182, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.0954207256436348, |
| "max": 0.11047575622797012, |
| "mean": 5.4158546845428646e-05, |
| "std": 0.026984980329871178, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.2974885404109955, |
| "max": 0.29604607820510864, |
| "mean": 5.041498661739752e-05, |
| "std": 0.03253797069191933, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.164185523986816, |
| "max": 5.084409236907959, |
| "mean": -0.014593909494578838, |
| "std": 1.1573563814163208, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.34487831592559814, |
| "max": 0.34348052740097046, |
| "mean": 7.885653030825779e-05, |
| "std": 0.030057402327656746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03615832328796387, |
| "max": 0.03314381092786789, |
| "mean": -0.00014287084923125803, |
| "std": 0.01301794033497572, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.31527891755104065, |
| "max": 0.3751768469810486, |
| "mean": -2.1734818801633082e-05, |
| "std": 0.02405463345348835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10528924316167831, |
| "max": 0.12185486406087875, |
| "mean": -0.0019566768314689398, |
| "std": 0.028841182589530945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.3117589056491852, |
| "max": 1.1208702325820923, |
| "mean": 0.6662365198135376, |
| "std": 0.09775208681821823, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.872468888759613, |
| "max": 0.6275586485862732, |
| "mean": 0.0016758753918111324, |
| "std": 0.047438040375709534, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.2710355520248413, |
| "max": 0.03406016156077385, |
| "mean": -0.04659765958786011, |
| "std": 0.04059656709432602, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9201626181602478, |
| "max": 0.9643434882164001, |
| "mean": 0.0010215931106358767, |
| "std": 0.04070163145661354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14462199807167053, |
| "max": 0.07486966252326965, |
| "mean": -0.009085646830499172, |
| "std": 0.02570141665637493, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.23963269591331482, |
| "max": 0.7123461365699768, |
| "mean": 0.4472006559371948, |
| "std": 0.05932367965579033, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.2729354500770569, |
| "max": 0.29745981097221375, |
| "mean": 8.72666532814037e-06, |
| "std": 0.03547453135251999, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11902111023664474, |
| "max": 0.1184910237789154, |
| "mean": 0.0007516429759562016, |
| "std": 0.02761562168598175, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.28102290630340576, |
| "max": 0.27947571873664856, |
| "mean": -7.658830872969702e-05, |
| "std": 0.03510264679789543, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.509542465209961, |
| "max": 2.521538496017456, |
| "mean": 0.026744995266199112, |
| "std": 0.5867680311203003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.2209818959236145, |
| "max": 0.2715614438056946, |
| "mean": 2.5364215616718866e-06, |
| "std": 0.0307310800999403, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03315867856144905, |
| "max": 0.0312359519302845, |
| "mean": 0.00011449654994066805, |
| "std": 0.012396099045872688, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23535357415676117, |
| "max": 0.23171932995319366, |
| "mean": 5.724863876821473e-05, |
| "std": 0.025697464123368263, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13585864007472992, |
| "max": 0.12803053855895996, |
| "mean": -0.0054976665414869785, |
| "std": 0.039962489157915115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3546965718269348, |
| "max": 1.1723699569702148, |
| "mean": 0.7105212211608887, |
| "std": 0.10377959161996841, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6174826622009277, |
| "max": 0.5556296706199646, |
| "mean": 0.001160400453954935, |
| "std": 0.04611344262957573, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.18955032527446747, |
| "max": 0.024929288774728775, |
| "mean": -0.03484814986586571, |
| "std": 0.02862328663468361, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.130905032157898, |
| "max": 0.970402181148529, |
| "mean": 0.00035809652763418853, |
| "std": 0.04234178364276886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5977792143821716, |
| "max": 0.06286704540252686, |
| "mean": -0.004878203850239515, |
| "std": 0.028615841642022133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3753129839897156, |
| "max": 0.9404288530349731, |
| "mean": 0.5924519896507263, |
| "std": 0.06695062667131424, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3918393850326538, |
| "max": 0.3694100081920624, |
| "mean": 7.003510108916089e-05, |
| "std": 0.03718580678105354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11892382800579071, |
| "max": 0.1364460289478302, |
| "mean": 0.0009139248286373913, |
| "std": 0.02918536402285099, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6189467310905457, |
| "max": 0.5086581707000732, |
| "mean": 1.522459842817625e-05, |
| "std": 0.036438774317502975, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.18658447265625, |
| "max": 8.788694381713867, |
| "mean": -0.10927355289459229, |
| "std": 1.6988238096237183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.27650272846221924, |
| "max": 0.2397344559431076, |
| "mean": 5.2208531997166574e-05, |
| "std": 0.03261270374059677, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.051591187715530396, |
| "max": 0.039499007165431976, |
| "mean": 9.101108298636973e-05, |
| "std": 0.01296647172421217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.2308182418346405, |
| "max": 0.23492185771465302, |
| "mean": -2.198125366703607e-05, |
| "std": 0.0293892789632082, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20422494411468506, |
| "max": 0.10520327836275101, |
| "mean": -0.004020952619612217, |
| "std": 0.032637566328048706, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3395068645477295, |
| "max": 1.0124397277832031, |
| "mean": 0.7006875872612, |
| "std": 0.09675538539886475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5645881295204163, |
| "max": 0.8335761427879333, |
| "mean": 0.00041510065784677863, |
| "std": 0.04229363799095154, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.2121758759021759, |
| "max": 0.0300263874232769, |
| "mean": -0.032174285501241684, |
| "std": 0.026499440893530846, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7549118995666504, |
| "max": 0.7191137671470642, |
| "mean": -1.6272973880404606e-05, |
| "std": 0.03683432564139366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.2633835971355438, |
| "max": 0.10630631446838379, |
| "mean": -0.00301279011182487, |
| "std": 0.028871648013591766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.2839854061603546, |
| "max": 0.695024311542511, |
| "mean": 0.49937066435813904, |
| "std": 0.04653334617614746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.2781727910041809, |
| "max": 0.23389220237731934, |
| "mean": -0.00011100011033704504, |
| "std": 0.0387568399310112, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15358875691890717, |
| "max": 0.12641564011573792, |
| "mean": -0.0022295925300568342, |
| "std": 0.03333538770675659, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41443270444869995, |
| "max": 0.6594027280807495, |
| "mean": -1.858997711678967e-05, |
| "std": 0.03909648209810257, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.237802028656006, |
| "max": 4.722365379333496, |
| "mean": -0.020456280559301376, |
| "std": 1.0076717138290405, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24511729180812836, |
| "max": 0.20752397179603577, |
| "mean": 4.432153218658641e-05, |
| "std": 0.03396220877766609, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03445148468017578, |
| "max": 0.044871583580970764, |
| "mean": -1.9065962987951934e-05, |
| "std": 0.012637496925890446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20115934312343597, |
| "max": 0.20639759302139282, |
| "mean": -2.9241522497613914e-05, |
| "std": 0.031020423397421837, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.19977232813835144, |
| "max": 0.1132478341460228, |
| "mean": -0.002891883021220565, |
| "std": 0.03452973812818527, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.3667006194591522, |
| "max": 1.0575865507125854, |
| "mean": 0.6704831123352051, |
| "std": 0.06640235334634781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.39832764863967896, |
| "max": 0.5020085573196411, |
| "mean": -3.8792531995568424e-05, |
| "std": 0.041129473596811295, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12865175306797028, |
| "max": 0.02696564421057701, |
| "mean": -0.030531559139490128, |
| "std": 0.021883869543671608, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.44955554604530334, |
| "max": 0.4331819415092468, |
| "mean": 7.46890582377091e-05, |
| "std": 0.034889888018369675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.26744911074638367, |
| "max": 0.07309805601835251, |
| "mean": -0.0010887861717492342, |
| "std": 0.023132896050810814, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28746652603149414, |
| "max": 0.6852710843086243, |
| "mean": 0.5245163440704346, |
| "std": 0.04753531143069267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.2225414365530014, |
| "max": 0.2233862727880478, |
| "mean": 1.5953022739267908e-05, |
| "std": 0.038948602974414825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13633988797664642, |
| "max": 0.10930000245571136, |
| "mean": 0.00024919791030697525, |
| "std": 0.029206812381744385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.3749636113643646, |
| "max": 0.43756094574928284, |
| "mean": -9.44960629567504e-06, |
| "std": 0.03928674757480621, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.845799684524536, |
| "max": 4.999211311340332, |
| "mean": 0.009741385467350483, |
| "std": 0.8452029228210449, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22279420495033264, |
| "max": 0.22023756802082062, |
| "mean": -3.8509870137204416e-07, |
| "std": 0.03440963104367256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04381667822599411, |
| "max": 0.03586551547050476, |
| "mean": -0.0002609736402519047, |
| "std": 0.012077639810740948, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21273735165596008, |
| "max": 0.18841038644313812, |
| "mean": -1.714246354822535e-05, |
| "std": 0.031536102294921875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18087971210479736, |
| "max": 0.12077755481004715, |
| "mean": -0.0023926026187837124, |
| "std": 0.04127210006117821, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4229143261909485, |
| "max": 0.941786527633667, |
| "mean": 0.6626389026641846, |
| "std": 0.056811243295669556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.37079188227653503, |
| "max": 0.47652140259742737, |
| "mean": -8.189280197257176e-05, |
| "std": 0.040888600051403046, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20858491957187653, |
| "max": 0.027342500165104866, |
| "mean": -0.03023093193769455, |
| "std": 0.021366029977798462, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3407646119594574, |
| "max": 0.7343085408210754, |
| "mean": 8.227993384934962e-05, |
| "std": 0.03476560488343239, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2401275634765625, |
| "max": 0.05064300820231438, |
| "mean": -0.0011859382502734661, |
| "std": 0.020460018888115883, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3059234321117401, |
| "max": 0.6536071300506592, |
| "mean": 0.5251041054725647, |
| "std": 0.046117961406707764, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30434539914131165, |
| "max": 0.21718497574329376, |
| "mean": 6.997769378358498e-05, |
| "std": 0.03949679434299469, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.1491607427597046, |
| "max": 0.1309996247291565, |
| "mean": 0.00032534098136238754, |
| "std": 0.030453510582447052, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.25696200132369995, |
| "max": 0.20183700323104858, |
| "mean": 3.1303323339670897e-05, |
| "std": 0.0394880436360836, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.3362133502960205, |
| "max": 2.3758370876312256, |
| "mean": -0.026241015642881393, |
| "std": 0.4497620761394501, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.1885133534669876, |
| "max": 0.21026504039764404, |
| "mean": 3.72500107914675e-05, |
| "std": 0.03479313850402832, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03166966885328293, |
| "max": 0.035711731761693954, |
| "mean": -0.00019632275507319719, |
| "std": 0.012291603721678257, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18826794624328613, |
| "max": 0.17029285430908203, |
| "mean": -6.840371497673914e-05, |
| "std": 0.03216983750462532, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13950176537036896, |
| "max": 0.13710856437683105, |
| "mean": -0.002513276878744364, |
| "std": 0.05129357427358627, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.46702930331230164, |
| "max": 0.9555635452270508, |
| "mean": 0.6688482761383057, |
| "std": 0.05276886373758316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.3244642913341522, |
| "max": 0.30925843119621277, |
| "mean": -9.10853486857377e-07, |
| "std": 0.04094461724162102, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12482384592294693, |
| "max": 0.02569793164730072, |
| "mean": -0.03068721666932106, |
| "std": 0.019822420552372932, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.43951860070228577, |
| "max": 0.4452158510684967, |
| "mean": 9.512923134025186e-05, |
| "std": 0.03511851280927658, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22458022832870483, |
| "max": 0.051897209137678146, |
| "mean": -0.0011794487945735455, |
| "std": 0.018467247486114502, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.3391944468021393, |
| "max": 0.7399035096168518, |
| "mean": 0.558688759803772, |
| "std": 0.04139659181237221, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27298545837402344, |
| "max": 0.2789517045021057, |
| "mean": 2.041603875113651e-05, |
| "std": 0.041056908667087555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13676847517490387, |
| "max": 0.1398179680109024, |
| "mean": 0.0004908779519610107, |
| "std": 0.026629263535141945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.49038437008857727, |
| "max": 0.35562369227409363, |
| "mean": 8.908439485821873e-05, |
| "std": 0.04069468006491661, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.297020673751831, |
| "max": 1.7451350688934326, |
| "mean": -0.02108073979616165, |
| "std": 0.5001184940338135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.2181541919708252, |
| "max": 0.19748014211654663, |
| "mean": -4.031343632959761e-05, |
| "std": 0.034232787787914276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.0411330908536911, |
| "max": 0.03885316848754883, |
| "mean": -0.00013403715274762362, |
| "std": 0.012882057577371597, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17773869633674622, |
| "max": 0.18285222351551056, |
| "mean": 4.8017449444159865e-05, |
| "std": 0.03155619651079178, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.18002314865589142, |
| "max": 0.18396146595478058, |
| "mean": -0.0022139688953757286, |
| "std": 0.05483314022421837, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.474223792552948, |
| "max": 1.025842308998108, |
| "mean": 0.6452140212059021, |
| "std": 0.05035461485385895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.2715917229652405, |
| "max": 0.30928391218185425, |
| "mean": 0.00011250950046814978, |
| "std": 0.04068081080913544, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10539427399635315, |
| "max": 0.026698507368564606, |
| "mean": -0.02951802872121334, |
| "std": 0.017934730276465416, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.3393958806991577, |
| "max": 0.3293214440345764, |
| "mean": 5.262523700366728e-05, |
| "std": 0.03441222757101059, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18173128366470337, |
| "max": 0.04261557012796402, |
| "mean": -0.001059417612850666, |
| "std": 0.017207711935043335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.32517319917678833, |
| "max": 0.6865989565849304, |
| "mean": 0.5111718773841858, |
| "std": 0.03694766014814377, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.2340274453163147, |
| "max": 0.22541004419326782, |
| "mean": -3.624596502049826e-05, |
| "std": 0.039175089448690414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11520740389823914, |
| "max": 0.1319286823272705, |
| "mean": 0.00015029977657832205, |
| "std": 0.029165174812078476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3522850573062897, |
| "max": 0.28482842445373535, |
| "mean": 6.6099587456847075e-06, |
| "std": 0.03924406319856644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.132234573364258, |
| "max": 3.5437166690826416, |
| "mean": -0.011590443551540375, |
| "std": 0.6826013326644897, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21073928475379944, |
| "max": 0.20945559442043304, |
| "mean": 3.4624928957782686e-05, |
| "std": 0.03448405861854553, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.035892292857170105, |
| "max": 0.0479779876768589, |
| "mean": 0.0007904525264166296, |
| "std": 0.012872384861111641, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21030081808567047, |
| "max": 0.19305069744586945, |
| "mean": -9.318873708252795e-07, |
| "std": 0.03169514983892441, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18656854331493378, |
| "max": 0.17726241052150726, |
| "mean": -0.002840438624843955, |
| "std": 0.0586128756403923, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4746079444885254, |
| "max": 1.041317105293274, |
| "mean": 0.6513123512268066, |
| "std": 0.04965612292289734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24824990332126617, |
| "max": 0.32916077971458435, |
| "mean": 0.0001809034583857283, |
| "std": 0.04056909307837486, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.1252717822790146, |
| "max": 0.024853328242897987, |
| "mean": -0.03049679473042488, |
| "std": 0.01761467382311821, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.4204847514629364, |
| "max": 0.4814334511756897, |
| "mean": 1.0858502719202079e-06, |
| "std": 0.03539634868502617, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.1512894481420517, |
| "max": 0.0435330905020237, |
| "mean": 4.2967651097569615e-05, |
| "std": 0.014878639951348305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31564587354660034, |
| "max": 0.6816184520721436, |
| "mean": 0.5528937578201294, |
| "std": 0.04068783298134804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20636627078056335, |
| "max": 0.2197655737400055, |
| "mean": 3.1909676181385294e-05, |
| "std": 0.038298994302749634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13777659833431244, |
| "max": 0.11261031776666641, |
| "mean": 2.2643122065346688e-05, |
| "std": 0.025812044739723206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.40279680490493774, |
| "max": 0.3708725571632385, |
| "mean": 2.5475083020864986e-05, |
| "std": 0.03817913681268692, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.770826816558838, |
| "max": 2.8686459064483643, |
| "mean": 0.001154756173491478, |
| "std": 0.5168185234069824, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20366711914539337, |
| "max": 0.1976872831583023, |
| "mean": 2.9746484869974665e-05, |
| "std": 0.03429698571562767, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.050587497651576996, |
| "max": 0.039878759533166885, |
| "mean": -0.00042467116145417094, |
| "std": 0.013416356407105923, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19594806432724, |
| "max": 0.20180270075798035, |
| "mean": -1.2511954992078245e-05, |
| "std": 0.031805265694856644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.1929892897605896, |
| "max": 0.19512949883937836, |
| "mean": -0.002963980659842491, |
| "std": 0.06252874433994293, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.3488827645778656, |
| "max": 1.0837209224700928, |
| "mean": 0.6670882701873779, |
| "std": 0.05524449050426483, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22563330829143524, |
| "max": 0.25133612751960754, |
| "mean": 0.00035861917422153056, |
| "std": 0.040758710354566574, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09100860357284546, |
| "max": 0.04368036612868309, |
| "mean": -0.03007863275706768, |
| "std": 0.01761433854699135, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.35325002670288086, |
| "max": 0.3038857877254486, |
| "mean": -4.542069655144587e-05, |
| "std": 0.037121765315532684, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16173334419727325, |
| "max": 0.06341976672410965, |
| "mean": -7.59128452045843e-05, |
| "std": 0.019423963502049446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.348746657371521, |
| "max": 0.7219499945640564, |
| "mean": 0.5423322916030884, |
| "std": 0.03906194120645523, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21932680904865265, |
| "max": 0.22335435450077057, |
| "mean": -1.1452927537902724e-05, |
| "std": 0.03923005238175392, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11840008199214935, |
| "max": 0.1704910695552826, |
| "mean": 0.00028676993679255247, |
| "std": 0.025109266862273216, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24656711518764496, |
| "max": 0.30068346858024597, |
| "mean": -3.68916334991809e-05, |
| "std": 0.03892939165234566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.504953145980835, |
| "max": 3.7143990993499756, |
| "mean": 0.015847017988562584, |
| "std": 0.7823704481124878, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21910522878170013, |
| "max": 0.23737633228302002, |
| "mean": -1.3034959920332767e-05, |
| "std": 0.036302801221609116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04721483215689659, |
| "max": 0.051370855420827866, |
| "mean": 0.00048040057299658656, |
| "std": 0.013522167690098286, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.2142011672258377, |
| "max": 0.21717870235443115, |
| "mean": 5.644252087222412e-05, |
| "std": 0.03361529856920242, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21134759485721588, |
| "max": 0.23112934827804565, |
| "mean": -0.005099965259432793, |
| "std": 0.061861325055360794, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.361937016248703, |
| "max": 1.1009857654571533, |
| "mean": 0.6992422342300415, |
| "std": 0.053594909608364105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.2350708544254303, |
| "max": 0.24471336603164673, |
| "mean": 0.00046341665438376367, |
| "std": 0.041268061846494675, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.0980960875749588, |
| "max": 0.06807035952806473, |
| "mean": -0.03142966330051422, |
| "std": 0.018127702176570892, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.30174583196640015, |
| "max": 0.3516803979873657, |
| "mean": -8.28510383144021e-05, |
| "std": 0.04027377441525459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.1523003727197647, |
| "max": 0.1496732383966446, |
| "mean": 0.00026386568788439035, |
| "std": 0.023037536069750786, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9992449879646301, |
| "max": 1.001513123512268, |
| "mean": 1.0000585317611694, |
| "std": 0.0006324834539555013, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031258270144462585, |
| "max": 0.031254518777132034, |
| "mean": -1.929036807268858e-05, |
| "std": 0.018040649592876434, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.03122791275382042, |
| "max": 0.030987516045570374, |
| "mean": -0.0010841463226824999, |
| "std": 0.01795026659965515, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.03125518560409546, |
| "max": 0.0312589630484581, |
| "mean": 3.5481098166201264e-06, |
| "std": 0.018041057512164116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031153831630945206, |
| "max": 0.03117419220507145, |
| "mean": 0.00033391290344297886, |
| "std": 0.018062464892864227, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0006552772247232497, |
| "max": 0.0007129037985578179, |
| "mean": 5.131376383360475e-06, |
| "std": 0.0001946619595400989, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.997419536113739, |
| "max": 1.0028407573699951, |
| "mean": 0.9999656081199646, |
| "std": 0.000851841235999018, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03356073051691055, |
| "max": 0.03384723141789436, |
| "mean": -5.6891162785177585e-06, |
| "std": 0.018047483637928963, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03327289596199989, |
| "max": 0.03337877616286278, |
| "mean": -0.00020134463557042181, |
| "std": 0.017954064533114433, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.001495416508987546, |
| "max": 0.0016743302112445235, |
| "mean": 2.175480403820984e-06, |
| "std": 0.00029829132836312056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0005666155484504998, |
| "max": 0.0007540585356764495, |
| "mean": 8.17788895801641e-06, |
| "std": 0.00017612945521250367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3832930624485016, |
| "max": 0.7191212773323059, |
| "mean": 0.5806662440299988, |
| "std": 0.03885548189282417, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.239033043384552, |
| "max": 0.19648200273513794, |
| "mean": 2.5991641450673342e-05, |
| "std": 0.03746527060866356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11883819848299026, |
| "max": 0.1667412370443344, |
| "mean": 0.0009821474086493254, |
| "std": 0.02755241096019745, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.24662744998931885, |
| "max": 0.4999285340309143, |
| "mean": -5.0414026190992445e-05, |
| "std": 0.037622544914484024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.941795825958252, |
| "max": 3.768937587738037, |
| "mean": -0.0035722628235816956, |
| "std": 0.681327760219574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22736430168151855, |
| "max": 0.25185492634773254, |
| "mean": -1.1772199286497198e-05, |
| "std": 0.037433888763189316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07156982272863388, |
| "max": 0.08060310035943985, |
| "mean": -0.0005125089664943516, |
| "std": 0.01565583609044552, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22800227999687195, |
| "max": 0.25769373774528503, |
| "mean": -2.863763802452013e-05, |
| "std": 0.035420775413513184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20050473511219025, |
| "max": 0.2148960828781128, |
| "mean": -0.005524474661797285, |
| "std": 0.06832842528820038, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.405087411403656, |
| "max": 1.1892733573913574, |
| "mean": 0.7378814816474915, |
| "std": 0.05523177236318588, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.2209046483039856, |
| "max": 0.24561487138271332, |
| "mean": 0.000521098030731082, |
| "std": 0.041335128247737885, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.1032090112566948, |
| "max": 0.02416798658668995, |
| "mean": -0.032665450125932693, |
| "std": 0.018891815096139908, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.4496724605560303, |
| "max": 0.4224262237548828, |
| "mean": -0.0004358820151537657, |
| "std": 0.04689519852399826, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2515088617801666, |
| "max": 0.47011902928352356, |
| "mean": 0.003207466099411249, |
| "std": 0.044524550437927246, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3168761134147644, |
| "max": 0.3331414461135864, |
| "mean": -2.506819146219641e-05, |
| "std": 0.02128741703927517, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3245299160480499, |
| "max": 0.6855776906013489, |
| "mean": 0.5709930658340454, |
| "std": 0.04470643773674965, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.1645486205816269, |
| "max": 0.1745065301656723, |
| "mean": -4.8789879656396806e-05, |
| "std": 0.03318168222904205, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18692335486412048, |
| "max": 0.14329002797603607, |
| "mean": 3.758035018108785e-05, |
| "std": 0.029700448736548424, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.3810470402240753, |
| "max": 0.24586895108222961, |
| "mean": -9.737135769682936e-06, |
| "std": 0.03276293724775314, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6554298400878906, |
| "max": 3.2897167205810547, |
| "mean": -0.014251163229346275, |
| "std": 0.9850608110427856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23475222289562225, |
| "max": 0.2473384439945221, |
| "mean": -1.814275310607627e-05, |
| "std": 0.041697416454553604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.0725652277469635, |
| "max": 0.15448249876499176, |
| "mean": 0.0006658083875663579, |
| "std": 0.02517012506723404, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2663499712944031, |
| "max": 0.2480984330177307, |
| "mean": -1.5296925994334742e-05, |
| "std": 0.04013863205909729, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18960432708263397, |
| "max": 0.194618359208107, |
| "mean": -0.0012379353865981102, |
| "std": 0.06668508052825928, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32916781306266785, |
| "max": 0.9996783137321472, |
| "mean": 0.7191422581672668, |
| "std": 0.0523388646543026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23172040283679962, |
| "max": 0.2451343685388565, |
| "mean": 0.00018265256949234754, |
| "std": 0.04089942201972008, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11451541632413864, |
| "max": 0.01910208724439144, |
| "mean": -0.04247751086950302, |
| "std": 0.0188636165112257, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.38971978425979614, |
| "max": 0.40751317143440247, |
| "mean": -2.1620868210447952e-05, |
| "std": 0.04853251948952675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6930332779884338, |
| "max": 0.4125932455062866, |
| "mean": 0.0008482532575726509, |
| "std": 0.06028350815176964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0015386008890345693, |
| "max": 1.0007996559143066, |
| "mean": 0.00048813552712090313, |
| "std": 0.022089246660470963, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9992700219154358, |
| "max": 1.0015240907669067, |
| "mean": 1.0000568628311157, |
| "std": 0.000619773636572063, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.031252991408109665, |
| "max": 0.031256891787052155, |
| "mean": -2.1020092390244827e-05, |
| "std": 0.01803199015557766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.03121490404009819, |
| "max": 0.03123173676431179, |
| "mean": -0.0006769870524294674, |
| "std": 0.01782653108239174, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.0312560498714447, |
| "max": 0.03126147389411926, |
| "mean": -8.831357263261452e-06, |
| "std": 0.01803101785480976, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031231652945280075, |
| "max": 0.031244346871972084, |
| "mean": -0.0007297407719306648, |
| "std": 0.01794145628809929, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.0005350728752091527, |
| "max": 0.0004281355068087578, |
| "mean": -3.930799721274525e-06, |
| "std": 0.00015574153803754598, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9972792267799377, |
| "max": 1.0023835897445679, |
| "mean": 0.9995018243789673, |
| "std": 0.0008350047282874584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03338531777262688, |
| "max": 0.03282884135842323, |
| "mean": -2.971738467749674e-06, |
| "std": 0.018026772886514664, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.03250397369265556, |
| "max": 0.031224608421325684, |
| "mean": -0.0005561817670240998, |
| "std": 0.01803283393383026, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.001761053572408855, |
| "max": 0.0016201753169298172, |
| "mean": -9.977067065847223e-07, |
| "std": 0.00029509843443520367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.0005179685540497303, |
| "max": 0.00046010586083866656, |
| "mean": -3.1889690035313834e-06, |
| "std": 0.00014008936705067754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23426799476146698, |
| "max": 0.2724316120147705, |
| "mean": 6.618206498387735e-06, |
| "std": 0.01881008967757225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32140958309173584, |
| "max": 0.6938180923461914, |
| "mean": 0.58160400390625, |
| "std": 0.045936692506074905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18184486031532288, |
| "max": 0.19783763587474823, |
| "mean": -1.1537180398590863e-05, |
| "std": 0.03318366780877113, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16044476628303528, |
| "max": 0.12933249771595, |
| "mean": -0.001071967650204897, |
| "std": 0.03413407504558563, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.33228737115859985, |
| "max": 0.31113728880882263, |
| "mean": -1.0175894203712232e-05, |
| "std": 0.03223416581749916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.80244779586792, |
| "max": 8.761518478393555, |
| "mean": 0.093451589345932, |
| "std": 1.619434118270874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23388099670410156, |
| "max": 0.2418091893196106, |
| "mean": 4.1715411498444155e-05, |
| "std": 0.04085543006658554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07592413574457169, |
| "max": 0.06573085486888885, |
| "mean": 0.00048532572691328824, |
| "std": 0.019415952265262604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.2459113746881485, |
| "max": 0.23399382829666138, |
| "mean": -3.2584175642114133e-06, |
| "std": 0.039430178701877594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.1629519760608673, |
| "max": 0.16087952256202698, |
| "mean": 0.0016248535830527544, |
| "std": 0.06528551876544952, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5571001172065735, |
| "max": 0.9435561299324036, |
| "mean": 0.712803840637207, |
| "std": 0.040119532495737076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.2279409021139145, |
| "max": 0.25474709272384644, |
| "mean": -4.549993900582194e-05, |
| "std": 0.040573619306087494, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13481706380844116, |
| "max": 0.02219359762966633, |
| "mean": -0.041350673884153366, |
| "std": 0.018385522067546844, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.42158395051956177, |
| "max": 0.3924521505832672, |
| "mean": -4.16895818489138e-06, |
| "std": 0.047782838344573975, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.607164204120636, |
| "max": 0.6512984037399292, |
| "mean": 0.0015855339588597417, |
| "std": 0.056834105402231216, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.25181475281715393, |
| "max": 0.32078737020492554, |
| "mean": -6.139540346339345e-06, |
| "std": 0.019613103941082954, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.3595266342163086, |
| "max": 0.6821960806846619, |
| "mean": 0.5706722140312195, |
| "std": 0.042985353618860245, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.2202295958995819, |
| "max": 0.177076518535614, |
| "mean": -3.443878813413903e-05, |
| "std": 0.03429801017045975, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16317804157733917, |
| "max": 0.23287786543369293, |
| "mean": 0.00035837513860315084, |
| "std": 0.03280922770500183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.2639525532722473, |
| "max": 0.23980671167373657, |
| "mean": -5.297175084706396e-05, |
| "std": 0.03389657661318779, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.854193210601807, |
| "max": 5.090420722961426, |
| "mean": 0.043878111988306046, |
| "std": 1.2290726900100708, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24640515446662903, |
| "max": 0.250241219997406, |
| "mean": 7.21166143193841e-05, |
| "std": 0.043985553085803986, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06247914582490921, |
| "max": 0.054487086832523346, |
| "mean": 0.0006464287871494889, |
| "std": 0.017190182581543922, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2863953709602356, |
| "max": 0.27215418219566345, |
| "mean": -5.014354974264279e-05, |
| "std": 0.0429837629199028, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16105736792087555, |
| "max": 0.17032958567142487, |
| "mean": -0.0028887835796922445, |
| "std": 0.05930224433541298, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5198467373847961, |
| "max": 0.9329147338867188, |
| "mean": 0.7133820652961731, |
| "std": 0.03842068091034889, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23785468935966492, |
| "max": 0.2487422525882721, |
| "mean": 0.00046461093006655574, |
| "std": 0.04045235738158226, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14500072598457336, |
| "max": 0.04102769121527672, |
| "mean": -0.039694253355264664, |
| "std": 0.020542506128549576, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.532442569732666, |
| "max": 0.5823614597320557, |
| "mean": 6.013309757690877e-06, |
| "std": 0.04885788634419441, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5189021229743958, |
| "max": 0.4934021234512329, |
| "mean": 0.0023652694653719664, |
| "std": 0.05344180017709732, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.2737113833427429, |
| "max": 0.3155929148197174, |
| "mean": 1.988332769542467e-06, |
| "std": 0.020049693062901497, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.3658766746520996, |
| "max": 0.7116788029670715, |
| "mean": 0.5931248664855957, |
| "std": 0.04595986381173134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21088893711566925, |
| "max": 0.19901061058044434, |
| "mean": 3.061449388042092e-05, |
| "std": 0.0348670557141304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18707768619060516, |
| "max": 0.20344795286655426, |
| "mean": 0.0009536991128697991, |
| "std": 0.03149910271167755, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.2897132933139801, |
| "max": 0.3398728668689728, |
| "mean": -4.695481766248122e-05, |
| "std": 0.034587565809488297, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.8768022060394287, |
| "max": 3.386897563934326, |
| "mean": 0.014455738477408886, |
| "std": 0.8582935929298401, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22446562349796295, |
| "max": 0.24974551796913147, |
| "mean": -3.865096914523747e-06, |
| "std": 0.042228855192661285, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.055283673107624054, |
| "max": 0.046579472720623016, |
| "mean": -2.0229621441103518e-05, |
| "std": 0.015845011919736862, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2932588756084442, |
| "max": 0.29019662737846375, |
| "mean": -7.67192614148371e-06, |
| "std": 0.04194393754005432, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12487518787384033, |
| "max": 0.2589555084705353, |
| "mean": -0.0032450095750391483, |
| "std": 0.053175244480371475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.45627039670944214, |
| "max": 0.8444806933403015, |
| "mean": 0.7054478526115417, |
| "std": 0.03522774204611778, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.512130856513977, |
| "max": 0.34817978739738464, |
| "mean": 0.00034297071397304535, |
| "std": 0.040197573602199554, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18561507761478424, |
| "max": 0.039553456008434296, |
| "mean": -0.039388205856084824, |
| "std": 0.02135956473648548, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5439714193344116, |
| "max": 0.5556594729423523, |
| "mean": -7.099103095242754e-05, |
| "std": 0.050732966512441635, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5116639137268066, |
| "max": 0.6642246842384338, |
| "mean": 0.002442360855638981, |
| "std": 0.04952433332800865, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.33249908685684204, |
| "max": 0.2653781771659851, |
| "mean": 3.2569464565312956e-06, |
| "std": 0.019386788830161095, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.3219698965549469, |
| "max": 0.766376256942749, |
| "mean": 0.651033878326416, |
| "std": 0.04532676190137863, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.2498074471950531, |
| "max": 0.21987499296665192, |
| "mean": -1.9507724573486485e-06, |
| "std": 0.036501552909612656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.3268783390522003, |
| "max": 0.2866748869419098, |
| "mean": -0.0006870508659631014, |
| "std": 0.03855406492948532, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3101723790168762, |
| "max": 0.37016358971595764, |
| "mean": 6.504941848106682e-05, |
| "std": 0.03624220937490463, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.7166595458984375, |
| "max": 5.806900978088379, |
| "mean": 0.03795350342988968, |
| "std": 1.4129759073257446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22155693173408508, |
| "max": 0.2057628631591797, |
| "mean": -7.524936518166214e-05, |
| "std": 0.042484089732170105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07764487713575363, |
| "max": 0.051462698727846146, |
| "mean": -0.000925063737668097, |
| "std": 0.0164109468460083, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.33050650358200073, |
| "max": 0.329324871301651, |
| "mean": -4.5611386667587794e-06, |
| "std": 0.042790405452251434, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2847575545310974, |
| "max": 0.11197607964277267, |
| "mean": -0.0012040773872286081, |
| "std": 0.04701252654194832, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.48601120710372925, |
| "max": 0.8868346214294434, |
| "mean": 0.7373513579368591, |
| "std": 0.038241803646087646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3624517619609833, |
| "max": 0.27458682656288147, |
| "mean": 5.118873013998382e-05, |
| "std": 0.040643129497766495, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24757687747478485, |
| "max": 0.046393755823373795, |
| "mean": -0.039262838661670685, |
| "std": 0.023290209472179413, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.626139223575592, |
| "max": 0.5965114235877991, |
| "mean": -6.056673373677768e-05, |
| "std": 0.0531148836016655, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7093748450279236, |
| "max": 0.2657814621925354, |
| "mean": 0.0009187416289933026, |
| "std": 0.05122179910540581, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3433896005153656, |
| "max": 0.3037145733833313, |
| "mean": 3.0547948881576303e-07, |
| "std": 0.019135164096951485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34973248839378357, |
| "max": 0.7829060554504395, |
| "mean": 0.6387954354286194, |
| "std": 0.049250222742557526, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20535039901733398, |
| "max": 0.20685911178588867, |
| "mean": -5.973261431790888e-05, |
| "std": 0.03769532963633537, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25850412249565125, |
| "max": 0.2679128050804138, |
| "mean": -0.00040441699093207717, |
| "std": 0.044591374695301056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.354056179523468, |
| "max": 0.3223519027233124, |
| "mean": -6.86804014549125e-06, |
| "std": 0.03720388934016228, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.260861873626709, |
| "max": 4.203889846801758, |
| "mean": -0.02641155757009983, |
| "std": 1.0066218376159668, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23860004544258118, |
| "max": 0.24336647987365723, |
| "mean": -2.503740142856259e-05, |
| "std": 0.043208908289670944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06237001344561577, |
| "max": 0.05677289888262749, |
| "mean": 0.0003429377684369683, |
| "std": 0.014151404611766338, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.43683916330337524, |
| "max": 0.37347522377967834, |
| "mean": 1.453105596738169e-05, |
| "std": 0.04412021487951279, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.096480593085289, |
| "max": 0.17590999603271484, |
| "mean": -0.0006604294758290052, |
| "std": 0.03515587002038956, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4216778874397278, |
| "max": 1.0693583488464355, |
| "mean": 0.7482997179031372, |
| "std": 0.04205985367298126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2665577530860901, |
| "max": 0.2968434989452362, |
| "mean": -7.962346717249602e-05, |
| "std": 0.040803126990795135, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18574897944927216, |
| "max": 0.04386778548359871, |
| "mean": -0.036819178611040115, |
| "std": 0.02561137080192566, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.45699048042297363, |
| "max": 0.4864794611930847, |
| "mean": 4.341273597674444e-05, |
| "std": 0.05420761927962303, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.28645777702331543, |
| "max": 0.5512458086013794, |
| "mean": -0.0008799894712865353, |
| "std": 0.04782594367861748, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.29278504848480225, |
| "max": 0.32276028394699097, |
| "mean": 6.534221029141918e-06, |
| "std": 0.019969386979937553, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.29091978073120117, |
| "max": 0.760124921798706, |
| "mean": 0.6508240699768066, |
| "std": 0.05213485658168793, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.24355527758598328, |
| "max": 0.2617471516132355, |
| "mean": -6.045864211046137e-06, |
| "std": 0.03961271047592163, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2675487995147705, |
| "max": 0.19986717402935028, |
| "mean": -0.0008803302189335227, |
| "std": 0.051758527755737305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2720382511615753, |
| "max": 0.25365304946899414, |
| "mean": 3.97135409002658e-06, |
| "std": 0.03870992362499237, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.963478088378906, |
| "max": 15.945467948913574, |
| "mean": 0.03322439640760422, |
| "std": 1.988944411277771, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20726989209651947, |
| "max": 0.2258823961019516, |
| "mean": -7.221873966045678e-05, |
| "std": 0.04055318236351013, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06934336572885513, |
| "max": 0.06329023838043213, |
| "mean": 0.00015188338875304908, |
| "std": 0.014744000509381294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46502813696861267, |
| "max": 0.3207668662071228, |
| "mean": 1.9557133782655e-05, |
| "std": 0.04058815911412239, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06403840333223343, |
| "max": 0.11518330872058868, |
| "mean": 0.001191072165966034, |
| "std": 0.02470429427921772, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.3746289610862732, |
| "max": 0.9322671294212341, |
| "mean": 0.7508296370506287, |
| "std": 0.040182456374168396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2793700397014618, |
| "max": 0.27312716841697693, |
| "mean": -0.00016854800924193114, |
| "std": 0.040993720293045044, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19878964126110077, |
| "max": 0.050874363631010056, |
| "mean": -0.03202495723962784, |
| "std": 0.02511216513812542, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6572921276092529, |
| "max": 0.5353701114654541, |
| "mean": -4.860567787545733e-05, |
| "std": 0.052844274789094925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19308353960514069, |
| "max": 0.5820099115371704, |
| "mean": -0.0005148603231646121, |
| "std": 0.04106666147708893, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41772764921188354, |
| "max": 0.3719545602798462, |
| "mean": 6.02346335654147e-06, |
| "std": 0.021620826795697212, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21424666047096252, |
| "max": 0.7470943331718445, |
| "mean": 0.6495506763458252, |
| "std": 0.05437405779957771, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.2095523476600647, |
| "max": 0.19568544626235962, |
| "mean": 4.010393604403362e-05, |
| "std": 0.03946491330862045, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.32928818464279175, |
| "max": 0.2594093382358551, |
| "mean": -0.0032241325825452805, |
| "std": 0.05625630542635918, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.2056845873594284, |
| "max": 0.254710853099823, |
| "mean": 5.4258445743471384e-05, |
| "std": 0.038567040115594864, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.242719650268555, |
| "max": 6.931571006774902, |
| "mean": 0.04833323508501053, |
| "std": 1.384921908378601, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20961354672908783, |
| "max": 0.2300715446472168, |
| "mean": -5.3330231821746565e-06, |
| "std": 0.04131212830543518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04391402378678322, |
| "max": 0.03599291667342186, |
| "mean": 3.6780984373763204e-06, |
| "std": 0.012800832279026508, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.39794921875, |
| "max": 0.34475040435791016, |
| "mean": -5.557174881687388e-05, |
| "std": 0.0423884317278862, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.055058449506759644, |
| "max": 0.06288675218820572, |
| "mean": 0.0003690638695843518, |
| "std": 0.018671618774533272, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3500124216079712, |
| "max": 1.0451101064682007, |
| "mean": 0.789310097694397, |
| "std": 0.048743680119514465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.33340734243392944, |
| "max": 0.3858667314052582, |
| "mean": -0.00016963679809123278, |
| "std": 0.04147941246628761, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15730711817741394, |
| "max": 0.05913476645946503, |
| "mean": -0.031834498047828674, |
| "std": 0.025142161175608635, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6963925361633301, |
| "max": 0.46865832805633545, |
| "mean": -9.133096318691969e-05, |
| "std": 0.05179010331630707, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.248288094997406, |
| "max": 0.3285192847251892, |
| "mean": -0.0002480646944604814, |
| "std": 0.04143183305859566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2872416079044342, |
| "max": 0.35022279620170593, |
| "mean": -2.109378556269803e-06, |
| "std": 0.024238325655460358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19658105075359344, |
| "max": 0.7791422605514526, |
| "mean": 0.6702942848205566, |
| "std": 0.0586935319006443, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22860872745513916, |
| "max": 0.2311849147081375, |
| "mean": -1.9817682186840102e-05, |
| "std": 0.04044090211391449, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.21965830028057098, |
| "max": 0.2406904250383377, |
| "mean": 0.0007772702374495566, |
| "std": 0.05579812079668045, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21554625034332275, |
| "max": 0.2266112118959427, |
| "mean": -7.155907223932445e-05, |
| "std": 0.03937710076570511, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.904163360595703, |
| "max": 9.067035675048828, |
| "mean": -0.001250317320227623, |
| "std": 1.848069429397583, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.26928046345710754, |
| "max": 0.2589084208011627, |
| "mean": 4.358497244538739e-05, |
| "std": 0.03840699419379234, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05760843679308891, |
| "max": 0.057633914053440094, |
| "mean": 0.0003498811274766922, |
| "std": 0.014721624553203583, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.265085905790329, |
| "max": 0.2886793613433838, |
| "mean": -6.175917224027216e-05, |
| "std": 0.03907330706715584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.043753523379564285, |
| "max": 0.03726416453719139, |
| "mean": -8.701729530002922e-05, |
| "std": 0.013365592807531357, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.3394947946071625, |
| "max": 1.092633843421936, |
| "mean": 0.8636797666549683, |
| "std": 0.06384899467229843, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.42328590154647827, |
| "max": 0.4191039204597473, |
| "mean": 0.0003126378287561238, |
| "std": 0.043501876294612885, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.2147369235754013, |
| "max": 0.17059248685836792, |
| "mean": -0.029485618695616722, |
| "std": 0.03195330873131752, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5996397733688354, |
| "max": 0.5595637559890747, |
| "mean": -0.00015250420256052166, |
| "std": 0.05344444885849953, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17845340073108673, |
| "max": 0.37662389874458313, |
| "mean": 0.0013645882718265057, |
| "std": 0.037309858947992325, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.3942789137363434, |
| "max": 0.36899739503860474, |
| "mean": 3.645062679424882e-05, |
| "std": 0.028621336445212364, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2902868390083313, |
| "max": 0.8265326619148254, |
| "mean": 0.7055679559707642, |
| "std": 0.0678958147764206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.926041305065155, |
| "max": 1.026432991027832, |
| "mean": -2.5475666916463524e-05, |
| "std": 0.0476241335272789, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.87814861536026, |
| "max": 0.8150070905685425, |
| "mean": -0.00031320619746111333, |
| "std": 0.09553563594818115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.2693868577480316, |
| "max": 0.24089287221431732, |
| "mean": -2.29374309128616e-05, |
| "std": 0.03895637392997742, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.73939323425293, |
| "max": 22.84785270690918, |
| "mean": -0.0918712168931961, |
| "std": 4.0697784423828125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22775250673294067, |
| "max": 0.24510256946086884, |
| "mean": -2.5825131160672754e-05, |
| "std": 0.03863884136080742, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06045493483543396, |
| "max": 0.04607832431793213, |
| "mean": -0.00014694462879560888, |
| "std": 0.01469829585403204, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.33846479654312134, |
| "max": 0.37447792291641235, |
| "mean": 7.293592716450803e-06, |
| "std": 0.04081470146775246, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04649795591831207, |
| "max": 0.19573213160037994, |
| "mean": 0.00027208085521124303, |
| "std": 0.013573010452091694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.37458330392837524, |
| "max": 1.1300410032272339, |
| "mean": 0.8900002241134644, |
| "std": 0.06398438662290573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.4478272497653961, |
| "max": 0.5424814224243164, |
| "mean": 2.45622759393882e-05, |
| "std": 0.045566376298666, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22404029965400696, |
| "max": 0.08835332095623016, |
| "mean": -0.032017190009355545, |
| "std": 0.03776315227150917, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7251995801925659, |
| "max": 0.6892821788787842, |
| "mean": 3.438512794673443e-05, |
| "std": 0.05177679285407066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.1745474934577942, |
| "max": 0.2185421884059906, |
| "mean": 4.038875340484083e-05, |
| "std": 0.03178102895617485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.3403666019439697, |
| "max": 0.3743104040622711, |
| "mean": 4.2970114009222016e-05, |
| "std": 0.03414527699351311, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.31756705045700073, |
| "max": 1.2868698835372925, |
| "mean": 0.6014533042907715, |
| "std": 0.08345934003591537, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.28337857127189636, |
| "max": 0.26026472449302673, |
| "mean": -3.1064557788340608e-06, |
| "std": 0.03598480299115181, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23555569350719452, |
| "max": 0.2053573727607727, |
| "mean": 0.0002324726083315909, |
| "std": 0.05600997060537338, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.4354943335056305, |
| "max": 0.3252315819263458, |
| "mean": 2.4552073227823712e-05, |
| "std": 0.03413620963692665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.544710159301758, |
| "max": 7.31260871887207, |
| "mean": -0.007366638630628586, |
| "std": 0.6992178559303284, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.34383082389831543, |
| "max": 0.3635445833206177, |
| "mean": 0.00010339185246266425, |
| "std": 0.04782695323228836, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07375096529722214, |
| "max": 0.06034737080335617, |
| "mean": 0.000933139817789197, |
| "std": 0.014950517565011978, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2554619610309601, |
| "max": 0.28651097416877747, |
| "mean": 4.460267518879846e-06, |
| "std": 0.04155408963561058, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.055337581783533096, |
| "max": 0.06284268200397491, |
| "mean": 0.00014179576828610152, |
| "std": 0.007177725899964571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.4937240481376648, |
| "max": 1.2209070920944214, |
| "mean": 1.01340913772583, |
| "std": 0.11743401736021042, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0935479402542114, |
| "max": 1.0468977689743042, |
| "mean": -4.9845290050143376e-05, |
| "std": 0.05240994319319725, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22365206480026245, |
| "max": 0.17271095514297485, |
| "mean": -0.027249177917838097, |
| "std": 0.03635435923933983, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8846310973167419, |
| "max": 0.9225372672080994, |
| "mean": -0.00014597778499592096, |
| "std": 0.053280774503946304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17102883756160736, |
| "max": 0.3799268901348114, |
| "mean": 0.0033686391543596983, |
| "std": 0.039900682866573334, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7772161960601807, |
| "max": 0.7236161828041077, |
| "mean": 1.9240971596445888e-05, |
| "std": 0.04616595432162285, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.33854806423187256, |
| "max": 1.4277222156524658, |
| "mean": 0.9483012557029724, |
| "std": 0.20673148334026337, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7455986738204956, |
| "max": 1.7045377492904663, |
| "mean": 0.00022702554997522384, |
| "std": 0.15868352353572845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.199636459350586, |
| "max": 1.0996308326721191, |
| "mean": -0.009536425583064556, |
| "std": 0.20382796227931976, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4213047921657562, |
| "max": 0.4262976348400116, |
| "mean": 6.459288124460727e-05, |
| "std": 0.04801792651414871, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.743492126464844, |
| "max": 19.538597106933594, |
| "mean": -0.24829509854316711, |
| "std": 4.776083946228027, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.3239092528820038, |
| "max": 0.43836328387260437, |
| "mean": -1.204050931846723e-05, |
| "std": 0.046160612255334854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.0340789370238781, |
| "max": 0.03713114559650421, |
| "mean": 0.0006417044205591083, |
| "std": 0.012921737506985664, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7034957408905029, |
| "max": 0.664257287979126, |
| "mean": 4.352344694780186e-05, |
| "std": 0.05788278207182884, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07222186028957367, |
| "max": 0.06749024242162704, |
| "mean": -0.00013264152221381664, |
| "std": 0.012920759618282318, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38012510538101196, |
| "max": 1.3909755945205688, |
| "mean": 1.0665355920791626, |
| "std": 0.21970459818840027, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6164048314094543, |
| "max": 0.7170195579528809, |
| "mean": 0.00011136491957586259, |
| "std": 0.05802035331726074, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21974356472492218, |
| "max": 0.22506725788116455, |
| "mean": 0.006242978852242231, |
| "std": 0.04973088204860687, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6296619176864624, |
| "max": 0.8891851902008057, |
| "mean": 1.1489293683553115e-05, |
| "std": 0.023526353761553764, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5068330764770508, |
| "max": 0.4739985764026642, |
| "mean": -0.0030159270390868187, |
| "std": 0.06930534541606903, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5377116799354553, |
| "max": 1.180783748626709, |
| "mean": 0.7827296257019043, |
| "std": 0.09886873513460159, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2669491767883301, |
| "max": 0.21265925467014313, |
| "mean": -0.00022343886666931212, |
| "std": 0.05399514362215996, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23786094784736633, |
| "max": 0.014840648509562016, |
| "mean": -0.04396260902285576, |
| "std": 0.034334905445575714, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |