| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43036678433418274, |
| "max": 0.2982814610004425, |
| "mean": -0.0025639168452471495, |
| "std": 0.04256023094058037, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06307890266180038, |
| "max": 0.10733882337808609, |
| "mean": 0.000591748976148665, |
| "std": 0.034078747034072876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.41281235218048096, |
| "max": 0.8368205428123474, |
| "mean": -0.00020580022828653455, |
| "std": 0.02411011978983879, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11508890986442566, |
| "max": 0.3209010660648346, |
| "mean": -0.0009312849142588675, |
| "std": 0.01954229176044464, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7886247634887695, |
| "max": 2.8676700592041016, |
| "mean": -0.0003673843457363546, |
| "std": 0.6154846549034119, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.27876999974250793, |
| "max": 0.3816433846950531, |
| "mean": 0.00041971245082095265, |
| "std": 0.0427577942609787, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22179193794727325, |
| "max": 0.20910178124904633, |
| "mean": -0.00449436716735363, |
| "std": 0.0408766008913517, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4284907877445221, |
| "max": 0.4762955904006958, |
| "mean": 1.3556076510212733e-06, |
| "std": 0.024511976167559624, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32450857758522034, |
| "max": 0.15602749586105347, |
| "mean": -0.04666242375969887, |
| "std": 0.05150512233376503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4105537235736847, |
| "max": 0.35443225502967834, |
| "mean": -0.00012739744852297008, |
| "std": 0.023602385073900223, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.22917909920215607, |
| "max": 0.2621273994445801, |
| "mean": -0.029117178171873093, |
| "std": 0.049283698201179504, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2544216215610504, |
| "max": 0.8185670971870422, |
| "mean": 0.5252723693847656, |
| "std": 0.08049405366182327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.2967362403869629, |
| "max": 0.26540544629096985, |
| "mean": -0.0004257934633642435, |
| "std": 0.032104942947626114, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09282971918582916, |
| "max": 0.12431935220956802, |
| "mean": 0.000645699561573565, |
| "std": 0.02571764960885048, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.2909117043018341, |
| "max": 0.28097161650657654, |
| "mean": -7.593112241011113e-05, |
| "std": 0.030932165682315826, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.890472888946533, |
| "max": 5.805418491363525, |
| "mean": -0.009322225116193295, |
| "std": 1.2942466735839844, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.42496761679649353, |
| "max": 0.3436029851436615, |
| "mean": 9.743953705765307e-05, |
| "std": 0.029953880235552788, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028933702036738396, |
| "max": 0.027695059776306152, |
| "mean": -0.00032178848050534725, |
| "std": 0.012570273131132126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.45337191224098206, |
| "max": 0.44843629002571106, |
| "mean": 2.4102073439280502e-05, |
| "std": 0.023851700127124786, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08852554112672806, |
| "max": 0.09096554666757584, |
| "mean": 0.0022833123803138733, |
| "std": 0.01949877291917801, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.2666127681732178, |
| "max": 1.0543620586395264, |
| "mean": 0.5309467911720276, |
| "std": 0.10404026508331299, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5751341581344604, |
| "max": 0.6088229417800903, |
| "mean": -0.0004320710140746087, |
| "std": 0.0386008694767952, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18247970938682556, |
| "max": 0.04547928646206856, |
| "mean": -0.029448386281728745, |
| "std": 0.04255641624331474, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.166790246963501, |
| "max": 1.6334140300750732, |
| "mean": 0.00032607169123366475, |
| "std": 0.02769557386636734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16213519871234894, |
| "max": 0.2053978145122528, |
| "mean": -0.021131210029125214, |
| "std": 0.02792428247630596, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22390854358673096, |
| "max": 0.8422228693962097, |
| "mean": 0.4874723255634308, |
| "std": 0.0749419778585434, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.2551497519016266, |
| "max": 0.3057706952095032, |
| "mean": -7.631031621713191e-06, |
| "std": 0.03347672149538994, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09526324272155762, |
| "max": 0.11054196208715439, |
| "mean": 5.9016994782723486e-05, |
| "std": 0.026952214539051056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.29700320959091187, |
| "max": 0.29560279846191406, |
| "mean": 5.1945076847914606e-05, |
| "std": 0.03254617378115654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.157034397125244, |
| "max": 5.077272891998291, |
| "mean": -0.014557666145265102, |
| "std": 1.1561598777770996, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.34469008445739746, |
| "max": 0.3430800437927246, |
| "mean": 7.922034274088219e-05, |
| "std": 0.03006283938884735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03611171245574951, |
| "max": 0.03316429257392883, |
| "mean": -0.00014332182763610035, |
| "std": 0.013021831400692463, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3155629634857178, |
| "max": 0.3745230734348297, |
| "mean": -2.0780769773409702e-05, |
| "std": 0.024060120806097984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10523121803998947, |
| "max": 0.12181323021650314, |
| "mean": -0.0019697900861501694, |
| "std": 0.028833730146288872, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.31127864122390747, |
| "max": 1.118981957435608, |
| "mean": 0.6661038398742676, |
| "std": 0.09739536792039871, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8734181523323059, |
| "max": 0.6272271275520325, |
| "mean": 0.0016762978630140424, |
| "std": 0.04744264855980873, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27110713720321655, |
| "max": 0.03433133661746979, |
| "mean": -0.04661067947745323, |
| "std": 0.04056624323129654, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9215274453163147, |
| "max": 0.9644713997840881, |
| "mean": 0.0010202918201684952, |
| "std": 0.0407060943543911, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.1444365382194519, |
| "max": 0.07489711046218872, |
| "mean": -0.00908645335584879, |
| "std": 0.02568359486758709, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.23954921960830688, |
| "max": 0.7114554047584534, |
| "mean": 0.44711926579475403, |
| "std": 0.059072595089673996, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.27211347222328186, |
| "max": 0.29757410287857056, |
| "mean": 9.160639820038341e-06, |
| "std": 0.03547541797161102, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11930356919765472, |
| "max": 0.1185561791062355, |
| "mean": 0.0007570894667878747, |
| "std": 0.027588583528995514, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2805509567260742, |
| "max": 0.2793390452861786, |
| "mean": -7.711815123911947e-05, |
| "std": 0.03510286659002304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5059573650360107, |
| "max": 2.5179529190063477, |
| "mean": 0.02672126702964306, |
| "std": 0.5862834453582764, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.22094596922397614, |
| "max": 0.27129310369491577, |
| "mean": 2.4950504666776396e-06, |
| "std": 0.030734829604625702, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03352592885494232, |
| "max": 0.03140881285071373, |
| "mean": 0.00011744203220587224, |
| "std": 0.012399573810398579, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23510752618312836, |
| "max": 0.23160243034362793, |
| "mean": 5.7065204600803554e-05, |
| "std": 0.02570049650967121, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13554446399211884, |
| "max": 0.1277279406785965, |
| "mean": -0.005496564321219921, |
| "std": 0.039924751967191696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3543228507041931, |
| "max": 1.169933795928955, |
| "mean": 0.7103918194770813, |
| "std": 0.10339365899562836, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6172032952308655, |
| "max": 0.5551565885543823, |
| "mean": 0.0011604262981563807, |
| "std": 0.04612047225236893, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.18880973756313324, |
| "max": 0.02472936362028122, |
| "mean": -0.034827686846256256, |
| "std": 0.028596267104148865, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1306864023208618, |
| "max": 0.9699204564094543, |
| "mean": 0.00035697812563739717, |
| "std": 0.0423479862511158, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5971187949180603, |
| "max": 0.06284646689891815, |
| "mean": -0.00487535959109664, |
| "std": 0.028591454029083252, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.37525925040245056, |
| "max": 0.938994288444519, |
| "mean": 0.5923536419868469, |
| "std": 0.06656986474990845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3912387192249298, |
| "max": 0.3688672184944153, |
| "mean": 7.05350175849162e-05, |
| "std": 0.03718964010477066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11892075091600418, |
| "max": 0.13641902804374695, |
| "mean": 0.0009228037670254707, |
| "std": 0.029190916568040848, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6183786392211914, |
| "max": 0.5081523060798645, |
| "mean": 1.5137170521484222e-05, |
| "std": 0.036442697048187256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.175475120544434, |
| "max": 8.77673053741455, |
| "mean": -0.10916879773139954, |
| "std": 1.6969348192214966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.27656111121177673, |
| "max": 0.23974747955799103, |
| "mean": 5.267578671919182e-05, |
| "std": 0.03261591121554375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.051889754831790924, |
| "max": 0.03952917456626892, |
| "mean": 9.714082989376038e-05, |
| "std": 0.012956415303051472, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23051224648952484, |
| "max": 0.23422203958034515, |
| "mean": -2.1783589545520954e-05, |
| "std": 0.029392505064606667, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20386114716529846, |
| "max": 0.105349101126194, |
| "mean": -0.004017278086394072, |
| "std": 0.032608963549137115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3398902118206024, |
| "max": 1.0104986429214478, |
| "mean": 0.7006295919418335, |
| "std": 0.09645849466323853, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5644850134849548, |
| "max": 0.8330016136169434, |
| "mean": 0.0004154921043664217, |
| "std": 0.04230193421244621, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21176111698150635, |
| "max": 0.030274739488959312, |
| "mean": -0.03216158226132393, |
| "std": 0.02647627517580986, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7536418437957764, |
| "max": 0.7178125381469727, |
| "mean": -1.392904141539475e-05, |
| "std": 0.03684176877140999, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.2630210220813751, |
| "max": 0.10589547455310822, |
| "mean": -0.0030209862161427736, |
| "std": 0.028848819434642792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.2840619385242462, |
| "max": 0.6940633654594421, |
| "mean": 0.4993802607059479, |
| "std": 0.04630398005247116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27834540605545044, |
| "max": 0.23377880454063416, |
| "mean": -0.00011083983554271981, |
| "std": 0.03876272216439247, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15375865995883942, |
| "max": 0.12639263272285461, |
| "mean": -0.002223189687356353, |
| "std": 0.03333896026015282, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.413473516702652, |
| "max": 0.6594987511634827, |
| "mean": -1.9574425095925108e-05, |
| "std": 0.039102163165807724, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.232054233551025, |
| "max": 4.715608596801758, |
| "mean": -0.020489608868956566, |
| "std": 1.0068248510360718, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24494825303554535, |
| "max": 0.20708487927913666, |
| "mean": 4.434686343302019e-05, |
| "std": 0.03396739438176155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.034493304789066315, |
| "max": 0.04486649110913277, |
| "mean": -2.654863055795431e-05, |
| "std": 0.012638254091143608, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.2005356252193451, |
| "max": 0.2055814564228058, |
| "mean": -3.0033888833713718e-05, |
| "std": 0.031025094911456108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.19959698617458344, |
| "max": 0.11300574988126755, |
| "mean": -0.002902751788496971, |
| "std": 0.03449735790491104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.3668424189090729, |
| "max": 1.05502188205719, |
| "mean": 0.6704874634742737, |
| "std": 0.06617505103349686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.3976363241672516, |
| "max": 0.5017815828323364, |
| "mean": -3.87727704946883e-05, |
| "std": 0.041137050837278366, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12772123515605927, |
| "max": 0.026762252673506737, |
| "mean": -0.03051420859992504, |
| "std": 0.021863147616386414, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.44920089840888977, |
| "max": 0.4333121180534363, |
| "mean": 7.599063974339515e-05, |
| "std": 0.034896738827228546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.2671979069709778, |
| "max": 0.07298687100410461, |
| "mean": -0.0010975392069667578, |
| "std": 0.023116325959563255, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28697913885116577, |
| "max": 0.6839067339897156, |
| "mean": 0.5244333744049072, |
| "std": 0.047293804585933685, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22255805134773254, |
| "max": 0.22290681302547455, |
| "mean": 1.621080627955962e-05, |
| "std": 0.03895403817296028, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13629747927188873, |
| "max": 0.109336718916893, |
| "mean": 0.0002461877593304962, |
| "std": 0.02917083166539669, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.3738900125026703, |
| "max": 0.43744465708732605, |
| "mean": -9.668656275607646e-06, |
| "std": 0.03929208964109421, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.840332269668579, |
| "max": 4.992400646209717, |
| "mean": 0.009748304262757301, |
| "std": 0.8444803953170776, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22292070090770721, |
| "max": 0.21977820992469788, |
| "mean": -4.448638719622977e-07, |
| "std": 0.03441440686583519, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04357949644327164, |
| "max": 0.03590534254908562, |
| "mean": -0.000258232990745455, |
| "std": 0.012078864499926567, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21297886967658997, |
| "max": 0.18814441561698914, |
| "mean": -1.71422834682744e-05, |
| "std": 0.031540658324956894, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.1805071383714676, |
| "max": 0.12073972076177597, |
| "mean": -0.00239769509062171, |
| "std": 0.04125608131289482, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4227307438850403, |
| "max": 0.9400621056556702, |
| "mean": 0.662601888179779, |
| "std": 0.056538671255111694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.37151503562927246, |
| "max": 0.4761146008968353, |
| "mean": -8.195374539354816e-05, |
| "std": 0.040896203368902206, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20797580480575562, |
| "max": 0.027151037007570267, |
| "mean": -0.030222713947296143, |
| "std": 0.021336952224373817, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.33968234062194824, |
| "max": 0.7333835959434509, |
| "mean": 8.077031816355884e-05, |
| "std": 0.034772153943777084, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.23987196385860443, |
| "max": 0.05037139728665352, |
| "mean": -0.0011877692304551601, |
| "std": 0.020454443991184235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.30607396364212036, |
| "max": 0.652435839176178, |
| "mean": 0.5250428915023804, |
| "std": 0.04590361937880516, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.3039066791534424, |
| "max": 0.21754606068134308, |
| "mean": 7.030011329334229e-05, |
| "std": 0.03950100764632225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14914348721504211, |
| "max": 0.13110090792179108, |
| "mean": 0.00035085732815787196, |
| "std": 0.030418941751122475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.2568054795265198, |
| "max": 0.20193904638290405, |
| "mean": 3.147923416690901e-05, |
| "std": 0.03949080780148506, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.3329901695251465, |
| "max": 2.3725619316101074, |
| "mean": -0.02622254565358162, |
| "std": 0.4494195282459259, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18853308260440826, |
| "max": 0.2103482335805893, |
| "mean": 3.745816502487287e-05, |
| "std": 0.03479913994669914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03156094253063202, |
| "max": 0.035385265946388245, |
| "mean": -0.0001973491598619148, |
| "std": 0.012292337603867054, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.1882481426000595, |
| "max": 0.17012155055999756, |
| "mean": -6.810311606386676e-05, |
| "std": 0.03217574581503868, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13900111615657806, |
| "max": 0.13692621886730194, |
| "mean": -0.002514890395104885, |
| "std": 0.051281191408634186, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.46707433462142944, |
| "max": 0.9541991353034973, |
| "mean": 0.6688030958175659, |
| "std": 0.052486222237348557, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.32425403594970703, |
| "max": 0.30980852246284485, |
| "mean": -1.290425643674098e-06, |
| "std": 0.040951915085315704, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12465585768222809, |
| "max": 0.02537902072072029, |
| "mean": -0.030681122094392776, |
| "std": 0.0198006983846426, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.43958571553230286, |
| "max": 0.44490763545036316, |
| "mean": 9.539163875160739e-05, |
| "std": 0.0351250097155571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.2243558019399643, |
| "max": 0.0517578125, |
| "mean": -0.0011802279623225331, |
| "std": 0.018464019522070885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.33896756172180176, |
| "max": 0.7381694912910461, |
| "mean": 0.5586157441139221, |
| "std": 0.04119841381907463, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27227938175201416, |
| "max": 0.27836883068084717, |
| "mean": 1.999387313844636e-05, |
| "std": 0.041062600910663605, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13660800457000732, |
| "max": 0.1392778903245926, |
| "mean": 0.0004841584013774991, |
| "std": 0.02658114954829216, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.4896349310874939, |
| "max": 0.3551800847053528, |
| "mean": 8.872073522070423e-05, |
| "std": 0.04069973900914192, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.293769121170044, |
| "max": 1.742555856704712, |
| "mean": -0.02106180600821972, |
| "std": 0.49974092841148376, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.2175416797399521, |
| "max": 0.19781090319156647, |
| "mean": -4.052485746797174e-05, |
| "std": 0.03423763066530228, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.04145532101392746, |
| "max": 0.038727227598428726, |
| "mean": -0.00013765225594397634, |
| "std": 0.012874336913228035, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.177314892411232, |
| "max": 0.1832207590341568, |
| "mean": 4.75629567517899e-05, |
| "std": 0.03156043216586113, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.1798381805419922, |
| "max": 0.18348462879657745, |
| "mean": -0.002212759107351303, |
| "std": 0.054820165038108826, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4742435812950134, |
| "max": 1.0238897800445557, |
| "mean": 0.6451865434646606, |
| "std": 0.05008064582943916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.2714613080024719, |
| "max": 0.3092961311340332, |
| "mean": 0.00011265614011790603, |
| "std": 0.04068758338689804, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.1055683121085167, |
| "max": 0.026772309094667435, |
| "mean": -0.029506118968129158, |
| "std": 0.017915068194270134, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.33880147337913513, |
| "max": 0.3287900686264038, |
| "mean": 5.556903124670498e-05, |
| "std": 0.03441847860813141, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18144568800926208, |
| "max": 0.04239530488848686, |
| "mean": -0.001068950048647821, |
| "std": 0.017201630398631096, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.3253972828388214, |
| "max": 0.68559730052948, |
| "mean": 0.5111000537872314, |
| "std": 0.03672371804714203, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23373860120773315, |
| "max": 0.22572296857833862, |
| "mean": -3.580976772354916e-05, |
| "std": 0.039181455969810486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11530666053295135, |
| "max": 0.1317266821861267, |
| "mean": 0.00015847355825826526, |
| "std": 0.029152128845453262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3521575629711151, |
| "max": 0.2847552001476288, |
| "mean": 7.120977898011915e-06, |
| "std": 0.039250005036592484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.126590728759766, |
| "max": 3.538623332977295, |
| "mean": -0.01155401673167944, |
| "std": 0.6819069385528564, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21105211973190308, |
| "max": 0.20891818404197693, |
| "mean": 3.4748343750834465e-05, |
| "std": 0.03448968380689621, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03559347987174988, |
| "max": 0.04803197458386421, |
| "mean": 0.0007964627584442496, |
| "std": 0.012855397537350655, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21038679778575897, |
| "max": 0.1929050087928772, |
| "mean": -1.3255728390504373e-06, |
| "std": 0.0317002572119236, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18651214241981506, |
| "max": 0.17674075067043304, |
| "mean": -0.002840832807123661, |
| "std": 0.05859901383519173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4748058021068573, |
| "max": 1.0396208763122559, |
| "mean": 0.6513342261314392, |
| "std": 0.049332328140735626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.2482759803533554, |
| "max": 0.3290877640247345, |
| "mean": 0.00018071771773975343, |
| "std": 0.04057670012116432, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12517917156219482, |
| "max": 0.02484654076397419, |
| "mean": -0.030485937371850014, |
| "std": 0.017585651949048042, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.42004328966140747, |
| "max": 0.48050060868263245, |
| "mean": -1.1724823707481846e-06, |
| "std": 0.03540315851569176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15136678516864777, |
| "max": 0.04356072470545769, |
| "mean": 4.775111301569268e-05, |
| "std": 0.014870403334498405, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.3155934810638428, |
| "max": 0.6807596683502197, |
| "mean": 0.5528346300125122, |
| "std": 0.04051977023482323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.2063884735107422, |
| "max": 0.21910899877548218, |
| "mean": 3.103859489783645e-05, |
| "std": 0.038303472101688385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13769029080867767, |
| "max": 0.1125277578830719, |
| "mean": 1.9220009562559426e-05, |
| "std": 0.02578623965382576, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.40236374735832214, |
| "max": 0.37038296461105347, |
| "mean": 2.613713513710536e-05, |
| "std": 0.03818493336439133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7654759883880615, |
| "max": 2.864607572555542, |
| "mean": 0.0011372193694114685, |
| "std": 0.51633220911026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20273104310035706, |
| "max": 0.1974526047706604, |
| "mean": 2.9206170438556e-05, |
| "std": 0.034301165491342545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.05080447345972061, |
| "max": 0.0398997887969017, |
| "mean": -0.00042000875691883266, |
| "std": 0.013411123305559158, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19611378014087677, |
| "max": 0.20161780714988708, |
| "mean": -1.2710506780422293e-05, |
| "std": 0.03180883079767227, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19288454949855804, |
| "max": 0.1946749985218048, |
| "mean": -0.002961306367069483, |
| "std": 0.06252170354127884, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.3495900332927704, |
| "max": 1.0818731784820557, |
| "mean": 0.6670873165130615, |
| "std": 0.054898131638765335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22497375309467316, |
| "max": 0.25112366676330566, |
| "mean": 0.00035900043440051377, |
| "std": 0.04076608642935753, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09095952659845352, |
| "max": 0.0440162755548954, |
| "mean": -0.030070394277572632, |
| "std": 0.017598489299416542, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.35297849774360657, |
| "max": 0.3037008047103882, |
| "mean": -4.511567021836527e-05, |
| "std": 0.03712863847613335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.1615627110004425, |
| "max": 0.06344226002693176, |
| "mean": -7.402076153084636e-05, |
| "std": 0.019400237128138542, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.3484867811203003, |
| "max": 0.7205584049224854, |
| "mean": 0.5422928333282471, |
| "std": 0.03884059190750122, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21938610076904297, |
| "max": 0.223092183470726, |
| "mean": -1.1128584446851164e-05, |
| "std": 0.0392366424202919, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11822070181369781, |
| "max": 0.1703757494688034, |
| "mean": 0.0002712813438847661, |
| "std": 0.025094762444496155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.2461908757686615, |
| "max": 0.3006460666656494, |
| "mean": -3.654139436548576e-05, |
| "std": 0.03893598914146423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.499889850616455, |
| "max": 3.708961009979248, |
| "mean": 0.01583799161016941, |
| "std": 0.781475305557251, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21841062605381012, |
| "max": 0.23724044859409332, |
| "mean": -1.4060610737942625e-05, |
| "std": 0.03630809485912323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04710822552442551, |
| "max": 0.05138855054974556, |
| "mean": 0.00048449443420395255, |
| "std": 0.013518092222511768, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21374864876270294, |
| "max": 0.2171718180179596, |
| "mean": 5.6465847592335194e-05, |
| "std": 0.03361979499459267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21103325486183167, |
| "max": 0.2311553806066513, |
| "mean": -0.005100366659462452, |
| "std": 0.06185431033372879, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36209091544151306, |
| "max": 1.0989015102386475, |
| "mean": 0.6992126703262329, |
| "std": 0.053264226764440536, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23423242568969727, |
| "max": 0.24471710622310638, |
| "mean": 0.00046349214971996844, |
| "std": 0.04127512127161026, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09780248254537582, |
| "max": 0.06824193894863129, |
| "mean": -0.031424038112163544, |
| "std": 0.018106156960129738, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.301416277885437, |
| "max": 0.35142549872398376, |
| "mean": -8.288547542179003e-05, |
| "std": 0.04028111323714256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15196339786052704, |
| "max": 0.14944323897361755, |
| "mean": 0.0002634537231642753, |
| "std": 0.023027226328849792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 1.0, |
| "max": 1.0, |
| "mean": 1.0, |
| "std": 0.0, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031249936670064926, |
| "max": 0.031249839812517166, |
| "mean": -1.9292721844976768e-05, |
| "std": 0.01804409734904766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031226642429828644, |
| "max": 0.03100142627954483, |
| "mean": -0.0010842883493751287, |
| "std": 0.01795371063053608, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031249966472387314, |
| "max": 0.031249895691871643, |
| "mean": 3.5441100862954045e-06, |
| "std": 0.018044503405690193, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031156372278928757, |
| "max": 0.031184475868940353, |
| "mean": 0.0003338930255267769, |
| "std": 0.018065759912133217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0003838505072053522, |
| "max": 0.00040078736492432654, |
| "mean": 7.502898370148614e-06, |
| "std": 0.00012165026419097558, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9996746778488159, |
| "max": 1.0017435550689697, |
| "mean": 1.0005855560302734, |
| "std": 0.0003091032849624753, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03248094022274017, |
| "max": 0.03274688497185707, |
| "mean": -1.2105063433409669e-05, |
| "std": 0.01805892214179039, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.031171226873993874, |
| "max": 0.03214619308710098, |
| "mean": 0.0004906345857307315, |
| "std": 0.017989112064242363, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.0009105296921916306, |
| "max": 0.001230148016475141, |
| "mean": 2.7432847673480865e-06, |
| "std": 0.0001725118636386469, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.00036263937363401055, |
| "max": 0.00041731935925781727, |
| "mean": 7.396344699373003e-06, |
| "std": 0.00011976793757639825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.38287991285324097, |
| "max": 0.7182613015174866, |
| "mean": 0.5806185603141785, |
| "std": 0.03863256797194481, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23785854876041412, |
| "max": 0.19614756107330322, |
| "mean": 2.640879392856732e-05, |
| "std": 0.037470731884241104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11855358630418777, |
| "max": 0.16578993201255798, |
| "mean": 0.0009884096216410398, |
| "std": 0.027530910447239876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.2458752989768982, |
| "max": 0.500349223613739, |
| "mean": -5.065255027147941e-05, |
| "std": 0.03762831538915634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.936182975769043, |
| "max": 3.763556957244873, |
| "mean": -0.003569458145648241, |
| "std": 0.6807414293289185, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22705353796482086, |
| "max": 0.251341313123703, |
| "mean": -1.142405926657375e-05, |
| "std": 0.03743990138173103, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07149660587310791, |
| "max": 0.08067727833986282, |
| "mean": -0.0005162369925528765, |
| "std": 0.015656527131795883, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22786642611026764, |
| "max": 0.2578106224536896, |
| "mean": -2.8714632207993418e-05, |
| "std": 0.035426877439022064, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20022797584533691, |
| "max": 0.21474605798721313, |
| "mean": -0.005530310794711113, |
| "std": 0.0683104544878006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4048909544944763, |
| "max": 1.1872107982635498, |
| "mean": 0.7378276586532593, |
| "std": 0.05486491322517395, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.22101044654846191, |
| "max": 0.2458520382642746, |
| "mean": 0.0005211633397266269, |
| "std": 0.04134228080511093, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10363762825727463, |
| "max": 0.023918237537145615, |
| "mean": -0.03266144543886185, |
| "std": 0.018866004422307014, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.4487850069999695, |
| "max": 0.42181524634361267, |
| "mean": -0.00043266150169074535, |
| "std": 0.04690360650420189, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.25105422735214233, |
| "max": 0.46941903233528137, |
| "mean": 0.003198462538421154, |
| "std": 0.044503308832645416, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3172111511230469, |
| "max": 0.33329516649246216, |
| "mean": -2.550867066020146e-05, |
| "std": 0.021290993317961693, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.32461607456207275, |
| "max": 0.6840938329696655, |
| "mean": 0.5709556341171265, |
| "std": 0.04454263672232628, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16456733644008636, |
| "max": 0.17394505441188812, |
| "mean": -4.8416688514407724e-05, |
| "std": 0.03318499028682709, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.1864674687385559, |
| "max": 0.14258594810962677, |
| "mean": 3.8281112210825086e-05, |
| "std": 0.029655346646904945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.3803539276123047, |
| "max": 0.2457817941904068, |
| "mean": -1.002950102702016e-05, |
| "std": 0.032765936106443405, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6502115726470947, |
| "max": 3.285125494003296, |
| "mean": -0.014261167496442795, |
| "std": 0.9845166206359863, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23496489226818085, |
| "max": 0.24718151986598969, |
| "mean": -1.8079399524140172e-05, |
| "std": 0.041703000664711, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07261228561401367, |
| "max": 0.15409623086452484, |
| "mean": 0.0006618116749450564, |
| "std": 0.02513669617474079, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.26620712876319885, |
| "max": 0.24820521473884583, |
| "mean": -1.5344019629992545e-05, |
| "std": 0.04014336317777634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18921570479869843, |
| "max": 0.19427257776260376, |
| "mean": -0.0012257307535037398, |
| "std": 0.0666433721780777, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32903727889060974, |
| "max": 0.9973482489585876, |
| "mean": 0.7190757393836975, |
| "std": 0.051972683519124985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23141932487487793, |
| "max": 0.24504587054252625, |
| "mean": 0.0001826788648031652, |
| "std": 0.04090685769915581, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11396601796150208, |
| "max": 0.01875537633895874, |
| "mean": -0.04246020317077637, |
| "std": 0.018833719193935394, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.38934653997421265, |
| "max": 0.4067343473434448, |
| "mean": -2.1657660909113474e-05, |
| "std": 0.04854125902056694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6919497847557068, |
| "max": 0.411848247051239, |
| "mean": 0.0008590769721195102, |
| "std": 0.06023983284831047, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.000941734469961375, |
| "max": 1.0006029605865479, |
| "mean": 0.00048819385119713843, |
| "std": 0.02209211327135563, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 1.0, |
| "max": 1.0, |
| "mean": 1.0, |
| "std": 0.0, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.031249970197677612, |
| "max": 0.031249817460775375, |
| "mean": -2.1022657165303826e-05, |
| "std": 0.018035436049103737, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.03122086077928543, |
| "max": 0.031233571469783783, |
| "mean": -0.0006771883927285671, |
| "std": 0.01782997138798237, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03124987706542015, |
| "max": 0.031249921768903732, |
| "mean": -8.839062502374873e-06, |
| "std": 0.01803446188569069, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031232360750436783, |
| "max": 0.031245984137058258, |
| "mean": -0.0007298353011719882, |
| "std": 0.017944591119885445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.0003224269312340766, |
| "max": 0.0002993023081216961, |
| "mean": 6.5217936935368925e-06, |
| "std": 0.0001044638265739195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9996813535690308, |
| "max": 1.0015599727630615, |
| "mean": 1.000339150428772, |
| "std": 0.0002295201556989923, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.032516807317733765, |
| "max": 0.03226118162274361, |
| "mean": 4.161014203418745e-06, |
| "std": 0.018049873411655426, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.031123636290431023, |
| "max": 0.03165753185749054, |
| "mean": 0.0003850722569040954, |
| "std": 0.018070610240101814, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0009010994690470397, |
| "max": 0.0009490308002568781, |
| "mean": 2.8105064302508254e-06, |
| "std": 0.00016459461767226458, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.00032089874730445445, |
| "max": 0.00031345486058853567, |
| "mean": 6.42746908852132e-06, |
| "std": 0.00010272208601236343, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23485393822193146, |
| "max": 0.27267447113990784, |
| "mean": 6.709969511575764e-06, |
| "std": 0.018812596797943115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32135409116744995, |
| "max": 0.6922963857650757, |
| "mean": 0.5815727710723877, |
| "std": 0.045748595148324966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.1818080097436905, |
| "max": 0.19750945270061493, |
| "mean": -1.1748516044463031e-05, |
| "std": 0.03318887948989868, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16036057472229004, |
| "max": 0.12932586669921875, |
| "mean": -0.0010664488654583693, |
| "std": 0.03411008045077324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.33175674080848694, |
| "max": 0.31088003516197205, |
| "mean": -1.0311603546142578e-05, |
| "std": 0.0322394073009491, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.791174411773682, |
| "max": 8.749550819396973, |
| "mean": 0.09336872398853302, |
| "std": 1.6178374290466309, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23367103934288025, |
| "max": 0.2417406141757965, |
| "mean": 4.146722494624555e-05, |
| "std": 0.04086144268512726, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07598260790109634, |
| "max": 0.06560970842838287, |
| "mean": 0.0004800831666216254, |
| "std": 0.019395504146814346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24560654163360596, |
| "max": 0.23375561833381653, |
| "mean": -2.9877701308578253e-06, |
| "std": 0.03943600133061409, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.1627652794122696, |
| "max": 0.16063357889652252, |
| "mean": 0.0016337584238499403, |
| "std": 0.06525594741106033, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5568895936012268, |
| "max": 0.9421334266662598, |
| "mean": 0.7127605080604553, |
| "std": 0.03978221118450165, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22847090661525726, |
| "max": 0.25493934750556946, |
| "mean": -4.550522498902865e-05, |
| "std": 0.040581200271844864, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.1344706267118454, |
| "max": 0.022221069782972336, |
| "mean": -0.04133939743041992, |
| "std": 0.01835877075791359, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4210115969181061, |
| "max": 0.3920403718948364, |
| "mean": -4.534296749625355e-06, |
| "std": 0.047791384160518646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6062420010566711, |
| "max": 0.6502339243888855, |
| "mean": 0.0015842054272070527, |
| "std": 0.05679100751876831, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.252038836479187, |
| "max": 0.32106301188468933, |
| "mean": -6.296660103544127e-06, |
| "std": 0.019615648314356804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.35961171984672546, |
| "max": 0.6809778809547424, |
| "mean": 0.5706169605255127, |
| "std": 0.042782142758369446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22040791809558868, |
| "max": 0.17709863185882568, |
| "mean": -3.522756742313504e-05, |
| "std": 0.03430448845028877, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16271811723709106, |
| "max": 0.23246890306472778, |
| "mean": 0.0003684491675812751, |
| "std": 0.03280302509665489, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.26368996500968933, |
| "max": 0.23957668244838715, |
| "mean": -5.283607606543228e-05, |
| "std": 0.03390355408191681, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.8473591804504395, |
| "max": 5.083388805389404, |
| "mean": 0.04383918642997742, |
| "std": 1.2279300689697266, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24628077447414398, |
| "max": 0.2501535415649414, |
| "mean": 7.219994586193934e-05, |
| "std": 0.04399203881621361, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.062493205070495605, |
| "max": 0.054467517882585526, |
| "mean": 0.0006505983183160424, |
| "std": 0.01718413643538952, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2860679030418396, |
| "max": 0.27162545919418335, |
| "mean": -4.9951679102377966e-05, |
| "std": 0.04299019277095795, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16042187809944153, |
| "max": 0.1700378805398941, |
| "mean": -0.0028904015198349953, |
| "std": 0.05927493795752525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5196736454963684, |
| "max": 0.931270182132721, |
| "mean": 0.7133467197418213, |
| "std": 0.03808481991291046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.2380017340183258, |
| "max": 0.24893511831760406, |
| "mean": 0.00046494320849888027, |
| "std": 0.04046032205224037, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.1442948430776596, |
| "max": 0.041139233857393265, |
| "mean": -0.03967897593975067, |
| "std": 0.020518682897090912, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5318877696990967, |
| "max": 0.5818965435028076, |
| "mean": 6.336260412354022e-06, |
| "std": 0.048867613077163696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5183113813400269, |
| "max": 0.4925517439842224, |
| "mean": 0.0023608217015862465, |
| "std": 0.053406503051519394, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.2738274037837982, |
| "max": 0.31547796726226807, |
| "mean": 1.8216255739389453e-06, |
| "std": 0.02005232311785221, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.3659067749977112, |
| "max": 0.7100387215614319, |
| "mean": 0.5930584073066711, |
| "std": 0.04572707787156105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21076832711696625, |
| "max": 0.19927603006362915, |
| "mean": 3.0815259378869087e-05, |
| "std": 0.03487056866288185, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.186960831284523, |
| "max": 0.20310287177562714, |
| "mean": 0.0009555225260555744, |
| "std": 0.03147275000810623, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.28951019048690796, |
| "max": 0.33969932794570923, |
| "mean": -4.744817124446854e-05, |
| "std": 0.034591346979141235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.8711647987365723, |
| "max": 3.3820366859436035, |
| "mean": 0.01444312371313572, |
| "std": 0.8576698899269104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.2244085818529129, |
| "max": 0.249923974275589, |
| "mean": -3.961446964240167e-06, |
| "std": 0.04223531484603882, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05502909794449806, |
| "max": 0.04645157977938652, |
| "mean": -2.0665102056227624e-05, |
| "std": 0.01583181880414486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2927229106426239, |
| "max": 0.2906007766723633, |
| "mean": -7.488439223379828e-06, |
| "std": 0.04195013642311096, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12459567189216614, |
| "max": 0.25878894329071045, |
| "mean": -0.0032436519395560026, |
| "std": 0.053140122443437576, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4563407599925995, |
| "max": 0.8428970575332642, |
| "mean": 0.7054145932197571, |
| "std": 0.03490997478365898, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5114501714706421, |
| "max": 0.3482079803943634, |
| "mean": 0.00034245854476466775, |
| "std": 0.04020575433969498, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18575434386730194, |
| "max": 0.03953104466199875, |
| "mean": -0.03936902433633804, |
| "std": 0.021325672045350075, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5437595248222351, |
| "max": 0.5556712746620178, |
| "mean": -7.024264050414786e-05, |
| "std": 0.05074309557676315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5109111666679382, |
| "max": 0.6631372570991516, |
| "mean": 0.002439212054014206, |
| "std": 0.049490757286548615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.33253294229507446, |
| "max": 0.2652721107006073, |
| "mean": 3.378802830411587e-06, |
| "std": 0.019389795139431953, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.3220270276069641, |
| "max": 0.7649413347244263, |
| "mean": 0.6509413719177246, |
| "std": 0.045111026614904404, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.2494993954896927, |
| "max": 0.21881401538848877, |
| "mean": -2.360827238589991e-06, |
| "std": 0.03650495037436485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.3266308009624481, |
| "max": 0.28657323122024536, |
| "mean": -0.0006807027384638786, |
| "std": 0.038520634174346924, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.30951929092407227, |
| "max": 0.36978626251220703, |
| "mean": 6.48990971967578e-05, |
| "std": 0.036245379596948624, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.710280895233154, |
| "max": 5.798713684082031, |
| "mean": 0.037927284836769104, |
| "std": 1.4116240739822388, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22114244103431702, |
| "max": 0.20574785768985748, |
| "mean": -7.537077181041241e-05, |
| "std": 0.04249110445380211, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07735679298639297, |
| "max": 0.05145302414894104, |
| "mean": -0.0009192783036269248, |
| "std": 0.016400594264268875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.3307357728481293, |
| "max": 0.32934609055519104, |
| "mean": -4.647547484637471e-06, |
| "std": 0.042797382920980453, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.28440576791763306, |
| "max": 0.11188910901546478, |
| "mean": -0.0012069176882505417, |
| "std": 0.0469915047287941, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4862346351146698, |
| "max": 0.8851982355117798, |
| "mean": 0.7373509407043457, |
| "std": 0.03795893117785454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3612706959247589, |
| "max": 0.27453744411468506, |
| "mean": 5.114857412991114e-05, |
| "std": 0.04065178707242012, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24725216627120972, |
| "max": 0.04655319079756737, |
| "mean": -0.03925145044922829, |
| "std": 0.023245742544531822, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.625215470790863, |
| "max": 0.5962166786193848, |
| "mean": -5.8090816310141236e-05, |
| "std": 0.05312598869204521, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7085027694702148, |
| "max": 0.2653276026248932, |
| "mean": 0.0009165835799649358, |
| "std": 0.0511946901679039, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.34328368306159973, |
| "max": 0.3035609722137451, |
| "mean": 1.4504064438369824e-07, |
| "std": 0.019138522446155548, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.3498779833316803, |
| "max": 0.7813707590103149, |
| "mean": 0.6387293338775635, |
| "std": 0.049000099301338196, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20522303879261017, |
| "max": 0.20651094615459442, |
| "mean": -5.9693807997973636e-05, |
| "std": 0.03769965097308159, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25792619585990906, |
| "max": 0.2676540017127991, |
| "mean": -0.0004065552493557334, |
| "std": 0.044568419456481934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3535814583301544, |
| "max": 0.32190999388694763, |
| "mean": -7.394870408461429e-06, |
| "std": 0.037208281457424164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.253505706787109, |
| "max": 4.198240280151367, |
| "mean": -0.026390478014945984, |
| "std": 1.0056747198104858, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.2384454905986786, |
| "max": 0.24342015385627747, |
| "mean": -2.5527655452606268e-05, |
| "std": 0.043215684592723846, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06227009370923042, |
| "max": 0.05663022771477699, |
| "mean": 0.0003446021000854671, |
| "std": 0.01414022222161293, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.43697887659072876, |
| "max": 0.3737882673740387, |
| "mean": 1.4649482181994244e-05, |
| "std": 0.04412706196308136, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09632225334644318, |
| "max": 0.1757834255695343, |
| "mean": -0.0006590378470718861, |
| "std": 0.03513453155755997, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4219363331794739, |
| "max": 1.0674819946289062, |
| "mean": 0.7483711838722229, |
| "std": 0.041829537600278854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.26578643918037415, |
| "max": 0.29607900977134705, |
| "mean": -7.925635145511478e-05, |
| "std": 0.04081210494041443, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18497370183467865, |
| "max": 0.04346155747771263, |
| "mean": -0.03679885342717171, |
| "std": 0.025566671043634415, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.45727846026420593, |
| "max": 0.48611682653427124, |
| "mean": 4.68605212518014e-05, |
| "std": 0.05422008037567139, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.285878986120224, |
| "max": 0.5506833791732788, |
| "mean": -0.0008855935884639621, |
| "std": 0.047791752964258194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2927459478378296, |
| "max": 0.32270148396492004, |
| "mean": 6.155781647976255e-06, |
| "std": 0.019972333684563637, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.29097816348075867, |
| "max": 0.7588945627212524, |
| "mean": 0.6507570743560791, |
| "std": 0.05195188894867897, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.24343979358673096, |
| "max": 0.2611932158470154, |
| "mean": -5.595570200966904e-06, |
| "std": 0.039616428315639496, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2672193646430969, |
| "max": 0.19968828558921814, |
| "mean": -0.0008741158526390791, |
| "std": 0.051719244569540024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2713148593902588, |
| "max": 0.25280529260635376, |
| "mean": 4.686854481406044e-06, |
| "std": 0.03871333599090576, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.945391654968262, |
| "max": 15.922587394714355, |
| "mean": 0.0331900492310524, |
| "std": 1.9867922067642212, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20660938322544098, |
| "max": 0.22584253549575806, |
| "mean": -7.262543658725917e-05, |
| "std": 0.04055970162153244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06933361291885376, |
| "max": 0.06314393132925034, |
| "mean": 0.00014905043644830585, |
| "std": 0.014740395359694958, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46516552567481995, |
| "max": 0.3203747570514679, |
| "mean": 1.989086922549177e-05, |
| "std": 0.04059458523988724, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06398216634988785, |
| "max": 0.11521662026643753, |
| "mean": 0.0011892176698893309, |
| "std": 0.02469474822282791, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.37489306926727295, |
| "max": 0.9301723837852478, |
| "mean": 0.7509260177612305, |
| "std": 0.04003360494971275, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.27877017855644226, |
| "max": 0.27262061834335327, |
| "mean": -0.00016865786164999008, |
| "std": 0.0410030372440815, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19846785068511963, |
| "max": 0.05112157389521599, |
| "mean": -0.032006848603487015, |
| "std": 0.02506233938038349, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6571894884109497, |
| "max": 0.5354637503623962, |
| "mean": -4.8520763812121004e-05, |
| "std": 0.05285634472966194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19253292679786682, |
| "max": 0.5813104510307312, |
| "mean": -0.0005173450335860252, |
| "std": 0.04104470834136009, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41767504811286926, |
| "max": 0.3719256818294525, |
| "mean": 6.585116807400482e-06, |
| "std": 0.02162640169262886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21444188058376312, |
| "max": 0.7454288601875305, |
| "mean": 0.6494399309158325, |
| "std": 0.054196760058403015, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20942556858062744, |
| "max": 0.19570672512054443, |
| "mean": 4.021516360808164e-05, |
| "std": 0.03946828842163086, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.32898303866386414, |
| "max": 0.2592002749443054, |
| "mean": -0.0032279789447784424, |
| "std": 0.05622360482811928, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.2054453343153, |
| "max": 0.2543545663356781, |
| "mean": 5.45132061233744e-05, |
| "std": 0.03857067599892616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.233641624450684, |
| "max": 6.921432971954346, |
| "mean": 0.04828529804944992, |
| "std": 1.3836402893066406, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20949925482273102, |
| "max": 0.2304454892873764, |
| "mean": -4.72849160360056e-06, |
| "std": 0.041318491101264954, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04375026375055313, |
| "max": 0.03585176169872284, |
| "mean": -5.88857801631093e-07, |
| "std": 0.012790623120963573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.39803647994995117, |
| "max": 0.34512725472450256, |
| "mean": -5.491710908245295e-05, |
| "std": 0.042394764721393585, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.054978147149086, |
| "max": 0.06269973516464233, |
| "mean": 0.0003556903393473476, |
| "std": 0.018663441762328148, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.35058680176734924, |
| "max": 1.043295979499817, |
| "mean": 0.789494514465332, |
| "std": 0.04858649522066116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.33317434787750244, |
| "max": 0.3864516317844391, |
| "mean": -0.00016881646297406405, |
| "std": 0.041488684713840485, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15732650458812714, |
| "max": 0.058728814125061035, |
| "mean": -0.03181058540940285, |
| "std": 0.025098087266087532, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6958801746368408, |
| "max": 0.46852678060531616, |
| "mean": -8.982194412965328e-05, |
| "std": 0.05180330574512482, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24772712588310242, |
| "max": 0.32808512449264526, |
| "mean": -0.0002515119267627597, |
| "std": 0.04140802100300789, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.28731903433799744, |
| "max": 0.3503708243370056, |
| "mean": -2.625113665999379e-06, |
| "std": 0.024243580177426338, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19668713212013245, |
| "max": 0.7778334617614746, |
| "mean": 0.670162558555603, |
| "std": 0.05853449925780296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.2283114343881607, |
| "max": 0.23055444657802582, |
| "mean": -2.0571733330143616e-05, |
| "std": 0.04044181853532791, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.2195570170879364, |
| "max": 0.24048519134521484, |
| "mean": 0.000782210670877248, |
| "std": 0.055770643055438995, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21605147421360016, |
| "max": 0.22674262523651123, |
| "mean": -7.179281237768009e-05, |
| "std": 0.03937681019306183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.892273902893066, |
| "max": 9.054671287536621, |
| "mean": -0.0012077325955033302, |
| "std": 1.846124529838562, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2689066231250763, |
| "max": 0.2583616375923157, |
| "mean": 4.3370266212150455e-05, |
| "std": 0.03841203823685646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05771247297525406, |
| "max": 0.05783558264374733, |
| "mean": 0.00035597707028500736, |
| "std": 0.014716549776494503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.2647928297519684, |
| "max": 0.28871840238571167, |
| "mean": -6.220719660632312e-05, |
| "std": 0.0390787236392498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04365166649222374, |
| "max": 0.037368953227996826, |
| "mean": -8.94215190783143e-05, |
| "std": 0.013351045548915863, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.33930352330207825, |
| "max": 1.090523362159729, |
| "mean": 0.8638416528701782, |
| "std": 0.06374476104974747, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.4229956567287445, |
| "max": 0.41935035586357117, |
| "mean": 0.00031358242267742753, |
| "std": 0.04351169988512993, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.2143605649471283, |
| "max": 0.17033977806568146, |
| "mean": -0.029430482536554337, |
| "std": 0.031879011541604996, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5980925559997559, |
| "max": 0.5593904852867126, |
| "mean": -0.0001523983955848962, |
| "std": 0.05345866456627846, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17843037843704224, |
| "max": 0.3764672875404358, |
| "mean": 0.0013608136214315891, |
| "std": 0.037283699959516525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.3941720128059387, |
| "max": 0.3687548339366913, |
| "mean": 3.7372221413534135e-05, |
| "std": 0.02862183377146721, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2906048893928528, |
| "max": 0.825853168964386, |
| "mean": 0.7055732607841492, |
| "std": 0.0677838996052742, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9263502359390259, |
| "max": 1.027148962020874, |
| "mean": -2.6785823138197884e-05, |
| "std": 0.04763893038034439, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8774253129959106, |
| "max": 0.8142860531806946, |
| "mean": -0.0003061135357711464, |
| "std": 0.09545911848545074, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.2697736918926239, |
| "max": 0.24071107804775238, |
| "mean": -2.2601629098062404e-05, |
| "std": 0.038958579301834106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.70609474182129, |
| "max": 22.81615447998047, |
| "mean": -0.09178254753351212, |
| "std": 4.064568042755127, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22739385068416595, |
| "max": 0.24493008852005005, |
| "mean": -2.535741987230722e-05, |
| "std": 0.03864453360438347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06026393920183182, |
| "max": 0.045535702258348465, |
| "mean": -0.00013921607751399279, |
| "std": 0.014681815169751644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.3383011817932129, |
| "max": 0.3741171360015869, |
| "mean": 6.997803211561404e-06, |
| "std": 0.040823448449373245, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.046280112117528915, |
| "max": 0.19523115456104279, |
| "mean": 0.00027006896561942995, |
| "std": 0.01355893723666668, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.3735462725162506, |
| "max": 1.1277151107788086, |
| "mean": 0.8900589942932129, |
| "std": 0.06382670253515244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.4478131830692291, |
| "max": 0.5424441695213318, |
| "mean": 2.4745060727582313e-05, |
| "std": 0.04557563737034798, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22360379993915558, |
| "max": 0.08794356882572174, |
| "mean": -0.03199389576911926, |
| "std": 0.03773387894034386, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7249262928962708, |
| "max": 0.6877928376197815, |
| "mean": 3.6950204957975075e-05, |
| "std": 0.051789939403533936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.17425872385501862, |
| "max": 0.21810372173786163, |
| "mean": 3.0209601391106844e-05, |
| "std": 0.03174462914466858, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.3392157554626465, |
| "max": 0.3738991320133209, |
| "mean": 4.299447755329311e-05, |
| "std": 0.03414613753557205, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3178655207157135, |
| "max": 1.2844390869140625, |
| "mean": 0.6014401912689209, |
| "std": 0.08323848247528076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2828904390335083, |
| "max": 0.260010302066803, |
| "mean": -3.007857230841182e-06, |
| "std": 0.03598371520638466, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.2351931631565094, |
| "max": 0.20519772171974182, |
| "mean": 0.00022795653785578907, |
| "std": 0.055979955941438675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.43529582023620605, |
| "max": 0.32459068298339844, |
| "mean": 2.450653482810594e-05, |
| "std": 0.03413282707333565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.542441368103027, |
| "max": 7.307634353637695, |
| "mean": -0.007349876686930656, |
| "std": 0.6985355019569397, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.3433660864830017, |
| "max": 0.3625560700893402, |
| "mean": 0.00010314527025911957, |
| "std": 0.04783623665571213, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07354722917079926, |
| "max": 0.060343291610479355, |
| "mean": 0.0009371445048600435, |
| "std": 0.014936422929167747, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.25582820177078247, |
| "max": 0.286111980676651, |
| "mean": 4.655210432247259e-06, |
| "std": 0.04156283661723137, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05514800176024437, |
| "max": 0.06263813376426697, |
| "mean": 0.0001386886287946254, |
| "std": 0.007160879671573639, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.4938517212867737, |
| "max": 1.2188584804534912, |
| "mean": 1.0133963823318481, |
| "std": 0.11724550276994705, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.093719720840454, |
| "max": 1.0471616983413696, |
| "mean": -4.925714529235847e-05, |
| "std": 0.05241731181740761, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.2243891805410385, |
| "max": 0.172992542386055, |
| "mean": -0.027224872261285782, |
| "std": 0.03628592565655708, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8836102485656738, |
| "max": 0.9222370386123657, |
| "mean": -0.0001438588951714337, |
| "std": 0.053294114768505096, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17069175839424133, |
| "max": 0.37931114435195923, |
| "mean": 0.003359442111104727, |
| "std": 0.03984633460640907, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.777143120765686, |
| "max": 0.7232267260551453, |
| "mean": 1.830433029681444e-05, |
| "std": 0.0461735762655735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3386678695678711, |
| "max": 1.4252641201019287, |
| "mean": 0.9481973648071289, |
| "std": 0.20639142394065857, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.746235728263855, |
| "max": 1.7046191692352295, |
| "mean": 0.00022743589943274856, |
| "std": 0.1587381213903427, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.1972129344940186, |
| "max": 1.0979515314102173, |
| "mean": -0.00952577032148838, |
| "std": 0.2035541981458664, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4209991693496704, |
| "max": 0.42664653062820435, |
| "mean": 6.461775046773255e-05, |
| "std": 0.04803095757961273, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.71938133239746, |
| "max": 19.514814376831055, |
| "mean": -0.24804288148880005, |
| "std": 4.770266532897949, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32366812229156494, |
| "max": 0.43827319145202637, |
| "mean": -1.2008969861199148e-05, |
| "std": 0.04616396129131317, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03389401733875275, |
| "max": 0.03695628046989441, |
| "mean": 0.0006402541184797883, |
| "std": 0.012914549559354782, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7030304074287415, |
| "max": 0.6659538745880127, |
| "mean": 4.320529478718527e-05, |
| "std": 0.05788206309080124, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07218055427074432, |
| "max": 0.0675114244222641, |
| "mean": -0.0001346912613371387, |
| "std": 0.012894386425614357, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.3805386424064636, |
| "max": 1.3893085718154907, |
| "mean": 1.0665242671966553, |
| "std": 0.21952925622463226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6161316633224487, |
| "max": 0.717426061630249, |
| "mean": 0.00011223374167457223, |
| "std": 0.0580313578248024, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21904653310775757, |
| "max": 0.22452397644519806, |
| "mean": 0.006222008261829615, |
| "std": 0.049658045172691345, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6296318769454956, |
| "max": 0.8893842101097107, |
| "mean": 1.2104990673833527e-05, |
| "std": 0.02354114130139351, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5061390995979309, |
| "max": 0.473175585269928, |
| "mean": -0.003011696506291628, |
| "std": 0.06919368356466293, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5380294322967529, |
| "max": 1.1777888536453247, |
| "mean": 0.7825304865837097, |
| "std": 0.09833591431379318, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.26662442088127136, |
| "max": 0.21249151229858398, |
| "mean": -0.00022446915681939572, |
| "std": 0.054007817059755325, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23786024749279022, |
| "max": 0.014854340814054012, |
| "mean": -0.04389730468392372, |
| "std": 0.03425038233399391, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |