| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43053385615348816, |
| "max": 0.2987181544303894, |
| "mean": -0.0025508857797831297, |
| "std": 0.04255492985248566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06311193853616714, |
| "max": 0.10768741369247437, |
| "mean": 0.0006200151983648539, |
| "std": 0.03410356491804123, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.41268399357795715, |
| "max": 0.8365581035614014, |
| "mean": -0.00020668540673796088, |
| "std": 0.02410803735256195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11543754488229752, |
| "max": 0.3218643069267273, |
| "mean": -0.0009378742543049157, |
| "std": 0.019571715965867043, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7987403869628906, |
| "max": 2.8775689601898193, |
| "mean": -0.0003620539791882038, |
| "std": 0.6153795123100281, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.2798178493976593, |
| "max": 0.38195931911468506, |
| "mean": 0.0004235386732034385, |
| "std": 0.04274815320968628, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22243480384349823, |
| "max": 0.20970797538757324, |
| "mean": -0.004494894295930862, |
| "std": 0.04093479365110397, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.42797601222991943, |
| "max": 0.47545987367630005, |
| "mean": 3.68623682334146e-06, |
| "std": 0.024507373571395874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32538774609565735, |
| "max": 0.15757951140403748, |
| "mean": -0.046732865273952484, |
| "std": 0.05161404609680176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4106280207633972, |
| "max": 0.35474810004234314, |
| "mean": -0.000128601081087254, |
| "std": 0.02359883114695549, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.22982768714427948, |
| "max": 0.2626851797103882, |
| "mean": -0.029157839715480804, |
| "std": 0.04937523230910301, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2546607255935669, |
| "max": 0.8210369348526001, |
| "mean": 0.5255380868911743, |
| "std": 0.08102277666330338, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.29706501960754395, |
| "max": 0.26598596572875977, |
| "mean": -0.0004244564042892307, |
| "std": 0.03210071846842766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09280094504356384, |
| "max": 0.12531320750713348, |
| "mean": 0.0006500966264866292, |
| "std": 0.025744492188096046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.2907920181751251, |
| "max": 0.2819848656654358, |
| "mean": -7.519756036344916e-05, |
| "std": 0.030932072550058365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.9063615798950195, |
| "max": 5.821039199829102, |
| "mean": -0.009349350817501545, |
| "std": 1.2963582277297974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4252597391605377, |
| "max": 0.3442302644252777, |
| "mean": 9.807890455704182e-05, |
| "std": 0.029951980337500572, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.02886926755309105, |
| "max": 0.027612265199422836, |
| "mean": -0.0003159886400680989, |
| "std": 0.012566552497446537, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4545641541481018, |
| "max": 0.4486750364303589, |
| "mean": 2.288275572936982e-05, |
| "std": 0.023853391408920288, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08885892480611801, |
| "max": 0.09123405814170837, |
| "mean": 0.002273206366226077, |
| "std": 0.019519906491041183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.26680853962898254, |
| "max": 1.0574053525924683, |
| "mean": 0.5312761068344116, |
| "std": 0.10467371344566345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.575035810470581, |
| "max": 0.6089199781417847, |
| "mean": -0.00043114880099892616, |
| "std": 0.03859530761837959, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18226587772369385, |
| "max": 0.04570382833480835, |
| "mean": -0.029475372284650803, |
| "std": 0.04265210032463074, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1675195693969727, |
| "max": 1.6349984407424927, |
| "mean": 0.00032014260068535805, |
| "std": 0.02769290842115879, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16255705058574677, |
| "max": 0.20596350729465485, |
| "mean": -0.021122729405760765, |
| "std": 0.0279533751308918, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.2242354154586792, |
| "max": 0.8446622490882874, |
| "mean": 0.4876382350921631, |
| "std": 0.07536358386278152, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.2560102641582489, |
| "max": 0.3063015341758728, |
| "mean": -8.342660294147208e-06, |
| "std": 0.03346908837556839, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09542153775691986, |
| "max": 0.11059843748807907, |
| "mean": 6.575271254405379e-05, |
| "std": 0.026967303827404976, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.29774004220962524, |
| "max": 0.296736478805542, |
| "mean": 5.098901965538971e-05, |
| "std": 0.03253892436623573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.170334339141846, |
| "max": 5.090466022491455, |
| "mean": -0.014626836404204369, |
| "std": 1.1584166288375854, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.34492507576942444, |
| "max": 0.3436436355113983, |
| "mean": 7.888609980000183e-05, |
| "std": 0.030058223754167557, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03619777783751488, |
| "max": 0.033210255205631256, |
| "mean": -0.00014313205610960722, |
| "std": 0.013021216727793217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.31573694944381714, |
| "max": 0.37568625807762146, |
| "mean": -2.092823342536576e-05, |
| "std": 0.024055050686001778, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10554195195436478, |
| "max": 0.12217912822961807, |
| "mean": -0.001965724630281329, |
| "std": 0.02885899320244789, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.31185638904571533, |
| "max": 1.1226844787597656, |
| "mean": 0.6664173007011414, |
| "std": 0.09809636324644089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8725683689117432, |
| "max": 0.6277270317077637, |
| "mean": 0.001675453968346119, |
| "std": 0.04743659123778343, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27133694291114807, |
| "max": 0.034276124089956284, |
| "mean": -0.04661266878247261, |
| "std": 0.04062533751130104, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9219098687171936, |
| "max": 0.9648231863975525, |
| "mean": 0.0010219970718026161, |
| "std": 0.04070163145661354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14473342895507812, |
| "max": 0.07504827529191971, |
| "mean": -0.009093794040381908, |
| "std": 0.025712795555591583, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.23969869315624237, |
| "max": 0.7134895920753479, |
| "mean": 0.4472740888595581, |
| "std": 0.05947508662939072, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.273428350687027, |
| "max": 0.2982955574989319, |
| "mean": 8.738919859752059e-06, |
| "std": 0.03547058627009392, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11919642984867096, |
| "max": 0.11864279955625534, |
| "mean": 0.0007499873172491789, |
| "std": 0.027633123099803925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2816983759403229, |
| "max": 0.2803042232990265, |
| "mean": -7.669557089684531e-05, |
| "std": 0.03509991616010666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.514967679977417, |
| "max": 2.5269885063171387, |
| "mean": 0.026808204129338264, |
| "std": 0.587342381477356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.22164961695671082, |
| "max": 0.27225953340530396, |
| "mean": 2.8316171665210277e-06, |
| "std": 0.03073127381503582, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.033373475074768066, |
| "max": 0.031244782730937004, |
| "mean": 0.00011742905917344615, |
| "std": 0.012399322353303432, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23557811975479126, |
| "max": 0.23209546506404877, |
| "mean": 5.68200193811208e-05, |
| "std": 0.025696886703372, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13604120910167694, |
| "max": 0.1282019019126892, |
| "mean": -0.005500663537532091, |
| "std": 0.0399833545088768, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3545033931732178, |
| "max": 1.174311876296997, |
| "mean": 0.7105965614318848, |
| "std": 0.10393685102462769, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6180550456047058, |
| "max": 0.555590033531189, |
| "mean": 0.0011606597108766437, |
| "std": 0.046113595366477966, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.1892954707145691, |
| "max": 0.024854592978954315, |
| "mean": -0.034856364130973816, |
| "std": 0.028640495613217354, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1327383518218994, |
| "max": 0.972023606300354, |
| "mean": 0.00035934254992753267, |
| "std": 0.04234174266457558, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5991520285606384, |
| "max": 0.06305119395256042, |
| "mean": -0.004881403874605894, |
| "std": 0.02864677459001541, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3754131495952606, |
| "max": 0.9439838528633118, |
| "mean": 0.5927106142044067, |
| "std": 0.0675281211733818, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3920239806175232, |
| "max": 0.36984747648239136, |
| "mean": 7.029663538560271e-05, |
| "std": 0.03718528896570206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11922823637723923, |
| "max": 0.13680268824100494, |
| "mean": 0.0009289362351410091, |
| "std": 0.029231850057840347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6204254031181335, |
| "max": 0.5099692940711975, |
| "mean": 1.5338478988269344e-05, |
| "std": 0.03643814101815224, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.20434856414795, |
| "max": 8.80774211883545, |
| "mean": -0.10939832031726837, |
| "std": 1.7015736103057861, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.2770227789878845, |
| "max": 0.2399866282939911, |
| "mean": 5.2325925935292616e-05, |
| "std": 0.032612308859825134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05187734216451645, |
| "max": 0.039576977491378784, |
| "mean": 9.007145126815885e-05, |
| "std": 0.01296569500118494, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23111766576766968, |
| "max": 0.23510430753231049, |
| "mean": -2.2175441699801013e-05, |
| "std": 0.029389016330242157, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20455272495746613, |
| "max": 0.10541031509637833, |
| "mean": -0.0040219868533313274, |
| "std": 0.03264109417796135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3396739959716797, |
| "max": 1.0156350135803223, |
| "mean": 0.7007465362548828, |
| "std": 0.09685582667589188, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5659961700439453, |
| "max": 0.8350182771682739, |
| "mean": 0.0004152171895839274, |
| "std": 0.042294152081012726, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.2122603803873062, |
| "max": 0.03037133999168873, |
| "mean": -0.03219597041606903, |
| "std": 0.026528161019086838, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7565364837646484, |
| "max": 0.7206384539604187, |
| "mean": -1.6425212379544973e-05, |
| "std": 0.03683505579829216, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.2637326717376709, |
| "max": 0.10635162889957428, |
| "mean": -0.003013473004102707, |
| "std": 0.028875315561890602, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.2841384708881378, |
| "max": 0.6960581541061401, |
| "mean": 0.4994935393333435, |
| "std": 0.046687543392181396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27911630272865295, |
| "max": 0.23450130224227905, |
| "mean": -0.00011085053847637028, |
| "std": 0.038756489753723145, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15422014892101288, |
| "max": 0.1267157793045044, |
| "mean": -0.0022325206082314253, |
| "std": 0.03337828069925308, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.4152379035949707, |
| "max": 0.6604457497596741, |
| "mean": -1.880790659924969e-05, |
| "std": 0.0390951968729496, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.242863178253174, |
| "max": 4.727988243103027, |
| "mean": -0.020436234772205353, |
| "std": 1.0083643198013306, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24566201865673065, |
| "max": 0.2078404426574707, |
| "mean": 4.393987182993442e-05, |
| "std": 0.03396216034889221, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03461671993136406, |
| "max": 0.04490647837519646, |
| "mean": -1.8480626749806106e-05, |
| "std": 0.012636142782866955, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.201488196849823, |
| "max": 0.20679476857185364, |
| "mean": -2.9119719329173677e-05, |
| "std": 0.03102005459368229, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.2001321166753769, |
| "max": 0.11347545683383942, |
| "mean": -0.0028973689768463373, |
| "std": 0.03452814370393753, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.3668859004974365, |
| "max": 1.0606576204299927, |
| "mean": 0.6705638766288757, |
| "std": 0.06651072949171066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.3992147445678711, |
| "max": 0.5030191540718079, |
| "mean": -3.829112756648101e-05, |
| "std": 0.04113021492958069, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12906894087791443, |
| "max": 0.02686660923063755, |
| "mean": -0.030545957386493683, |
| "std": 0.02190822921693325, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.45001378655433655, |
| "max": 0.43416494131088257, |
| "mean": 7.559473306173459e-05, |
| "std": 0.03489038348197937, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.26795703172683716, |
| "max": 0.07305809110403061, |
| "mean": -0.0010922406800091267, |
| "std": 0.023138197138905525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28738605976104736, |
| "max": 0.6873639225959778, |
| "mean": 0.5246094465255737, |
| "std": 0.04773576930165291, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.2230275720357895, |
| "max": 0.22428689897060394, |
| "mean": 1.5606414308422245e-05, |
| "std": 0.03894846886396408, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.1365431845188141, |
| "max": 0.1094546914100647, |
| "mean": 0.0002404236583970487, |
| "std": 0.02924003079533577, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.375844806432724, |
| "max": 0.4382041096687317, |
| "mean": -9.796498488867655e-06, |
| "std": 0.039285723119974136, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8503658771514893, |
| "max": 5.0051727294921875, |
| "mean": 0.009742870926856995, |
| "std": 0.8458123803138733, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22342294454574585, |
| "max": 0.22070662677288055, |
| "mean": -2.869974196073599e-07, |
| "std": 0.03440912440419197, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04364994913339615, |
| "max": 0.03587768226861954, |
| "mean": -0.00025836972054094076, |
| "std": 0.012079192325472832, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21351207792758942, |
| "max": 0.18924757838249207, |
| "mean": -1.7089078028220683e-05, |
| "std": 0.03153553605079651, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18106123805046082, |
| "max": 0.12093079835176468, |
| "mean": -0.0023932361509650946, |
| "std": 0.04127350077033043, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4227260649204254, |
| "max": 0.9448354244232178, |
| "mean": 0.662743330001831, |
| "std": 0.05696980655193329, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.37132784724235535, |
| "max": 0.4766311049461365, |
| "mean": -8.210691157728434e-05, |
| "std": 0.0408891923725605, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20899881422519684, |
| "max": 0.027237456291913986, |
| "mean": -0.03024902194738388, |
| "std": 0.02138604037463665, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3416992723941803, |
| "max": 0.735672652721405, |
| "mean": 8.195172995328903e-05, |
| "std": 0.03476583957672119, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2404329776763916, |
| "max": 0.05046902596950531, |
| "mean": -0.001188310096040368, |
| "std": 0.020469345152378082, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3061925768852234, |
| "max": 0.654449999332428, |
| "mean": 0.5251765251159668, |
| "std": 0.04624079912900925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.3049762547016144, |
| "max": 0.21794484555721283, |
| "mean": 7.015730807324871e-05, |
| "std": 0.03949474170804024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14950910210609436, |
| "max": 0.13137659430503845, |
| "mean": 0.000338978337822482, |
| "std": 0.030483614653348923, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.2578408122062683, |
| "max": 0.20263631641864777, |
| "mean": 3.113361162832007e-05, |
| "std": 0.03948460891842842, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.339005708694458, |
| "max": 2.378676176071167, |
| "mean": -0.026260126382112503, |
| "std": 0.45006638765335083, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18907499313354492, |
| "max": 0.21106310188770294, |
| "mean": 3.715493221534416e-05, |
| "std": 0.03479326516389847, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.031842123717069626, |
| "max": 0.03563522920012474, |
| "mean": -0.00019889514078386128, |
| "std": 0.012288383208215237, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18906620144844055, |
| "max": 0.17065204679965973, |
| "mean": -6.830548954894766e-05, |
| "std": 0.032169949263334274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13967929780483246, |
| "max": 0.13765227794647217, |
| "mean": -0.0025106696411967278, |
| "std": 0.051296915858983994, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4670410752296448, |
| "max": 0.9571460485458374, |
| "mean": 0.668942928314209, |
| "std": 0.052938032895326614, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.32453829050064087, |
| "max": 0.3093876242637634, |
| "mean": -9.305285857408307e-07, |
| "std": 0.04094512388110161, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12521511316299438, |
| "max": 0.025563344359397888, |
| "mean": -0.030704183503985405, |
| "std": 0.01984286867082119, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.4407917857170105, |
| "max": 0.4464106857776642, |
| "mean": 9.500519081484526e-05, |
| "std": 0.03511863574385643, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22505341470241547, |
| "max": 0.051904987543821335, |
| "mean": -0.0011818333296105266, |
| "std": 0.018484966829419136, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.33911338448524475, |
| "max": 0.7404670715332031, |
| "mean": 0.5587128400802612, |
| "std": 0.04148301109671593, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27348393201828003, |
| "max": 0.2790311872959137, |
| "mean": 2.0330318875494413e-05, |
| "std": 0.041056688874959946, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13706564903259277, |
| "max": 0.14011380076408386, |
| "mean": 0.0004902533255517483, |
| "std": 0.026642272248864174, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.4915930926799774, |
| "max": 0.35670116543769836, |
| "mean": 8.893256745068356e-05, |
| "std": 0.04069444537162781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.300570249557495, |
| "max": 1.7478224039077759, |
| "mean": -0.021113090217113495, |
| "std": 0.5004414319992065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.21829766035079956, |
| "max": 0.19811730086803436, |
| "mean": -4.052483200212009e-05, |
| "std": 0.034232448786497116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.041401587426662445, |
| "max": 0.038982585072517395, |
| "mean": -0.00013965339167043567, |
| "std": 0.012888636440038681, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17814268171787262, |
| "max": 0.1835789680480957, |
| "mean": 4.7900641220621765e-05, |
| "std": 0.031555790454149246, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.1802123337984085, |
| "max": 0.1839253157377243, |
| "mean": -0.0022146895062178373, |
| "std": 0.05485367402434349, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.47431865334510803, |
| "max": 1.0268715620040894, |
| "mean": 0.6453023552894592, |
| "std": 0.05052410438656807, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.27204346656799316, |
| "max": 0.30987846851348877, |
| "mean": 0.00011226898641325533, |
| "std": 0.04068146273493767, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10556552559137344, |
| "max": 0.02664870023727417, |
| "mean": -0.02952779270708561, |
| "std": 0.017950357869267464, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.3395896553993225, |
| "max": 0.3302164077758789, |
| "mean": 5.2438736020121723e-05, |
| "std": 0.03441261127591133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18205386400222778, |
| "max": 0.04234303906559944, |
| "mean": -0.0010605738498270512, |
| "std": 0.01722128316760063, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.3254714906215668, |
| "max": 0.6875306367874146, |
| "mean": 0.5112907886505127, |
| "std": 0.03710601106286049, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23404580354690552, |
| "max": 0.22564062476158142, |
| "mean": -3.628679769462906e-05, |
| "std": 0.03917597234249115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.1157650500535965, |
| "max": 0.13217955827713013, |
| "mean": 0.00015458319103345275, |
| "std": 0.02921123616397381, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3531610369682312, |
| "max": 0.28566646575927734, |
| "mean": 7.01215958542889e-06, |
| "std": 0.03924458101391792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.1371684074401855, |
| "max": 3.5479142665863037, |
| "mean": -0.011608399450778961, |
| "std": 0.6831862926483154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.2113451212644577, |
| "max": 0.20978450775146484, |
| "mean": 3.466297494014725e-05, |
| "std": 0.03448467701673508, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.0358961820602417, |
| "max": 0.04827914386987686, |
| "mean": 0.000792390201240778, |
| "std": 0.012867480516433716, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21074581146240234, |
| "max": 0.19335627555847168, |
| "mean": -1.3081223642075201e-06, |
| "std": 0.031695783138275146, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18677425384521484, |
| "max": 0.17732204496860504, |
| "mean": -0.002835639752447605, |
| "std": 0.05864328145980835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.47452113032341003, |
| "max": 1.0454236268997192, |
| "mean": 0.651544451713562, |
| "std": 0.05015648156404495, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.2484469711780548, |
| "max": 0.3293426036834717, |
| "mean": 0.00018075718253385276, |
| "std": 0.04056986793875694, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12511543929576874, |
| "max": 0.024807237088680267, |
| "mean": -0.03050871379673481, |
| "std": 0.017624877393245697, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.42189329862594604, |
| "max": 0.4829460680484772, |
| "mean": -1.433467332390137e-06, |
| "std": 0.0353967621922493, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.1517147570848465, |
| "max": 0.043470486998558044, |
| "mean": 4.9440553993918e-05, |
| "std": 0.014891887083649635, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31546592712402344, |
| "max": 0.6829473972320557, |
| "mean": 0.552940845489502, |
| "std": 0.0407881923019886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20687490701675415, |
| "max": 0.22027458250522614, |
| "mean": 3.187588299624622e-05, |
| "std": 0.03829912096261978, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.1380588412284851, |
| "max": 0.11287239193916321, |
| "mean": 2.8096917958464473e-05, |
| "std": 0.025843404233455658, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.40360599756240845, |
| "max": 0.37176549434661865, |
| "mean": 2.5846293283393607e-05, |
| "std": 0.03817979246377945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7753050327301025, |
| "max": 2.8720550537109375, |
| "mean": 0.001174271572381258, |
| "std": 0.5172262787818909, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.203634575009346, |
| "max": 0.19783173501491547, |
| "mean": 2.9641731089213863e-05, |
| "std": 0.034296903759241104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.050782062113285065, |
| "max": 0.039943333715200424, |
| "mean": -0.00042034429498016834, |
| "std": 0.01341927982866764, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.1968999058008194, |
| "max": 0.20258377492427826, |
| "mean": -1.2486772902775556e-05, |
| "std": 0.03180483356118202, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19323131442070007, |
| "max": 0.19526611268520355, |
| "mean": -0.002963971346616745, |
| "std": 0.06255338340997696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.34893983602523804, |
| "max": 1.0871814489364624, |
| "mean": 0.6672742962837219, |
| "std": 0.05565904080867767, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22620373964309692, |
| "max": 0.251870721578598, |
| "mean": 0.00035865549580194056, |
| "std": 0.040759552270174026, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09141312539577484, |
| "max": 0.043738093227148056, |
| "mean": -0.03009146638214588, |
| "std": 0.017630403861403465, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.3538018465042114, |
| "max": 0.30474764108657837, |
| "mean": -4.393163908389397e-05, |
| "std": 0.03712210804224014, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16189776360988617, |
| "max": 0.06336814165115356, |
| "mean": -8.093340147752315e-05, |
| "std": 0.019419532269239426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34863826632499695, |
| "max": 0.7244340777397156, |
| "mean": 0.5424437522888184, |
| "std": 0.039265505969524384, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21948550641536713, |
| "max": 0.22342580556869507, |
| "mean": -1.1189426913915668e-05, |
| "std": 0.039230361580848694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11868271231651306, |
| "max": 0.17081572115421295, |
| "mean": 0.00028613023459911346, |
| "std": 0.025137728080153465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24677123129367828, |
| "max": 0.30096495151519775, |
| "mean": -3.686630952870473e-05, |
| "std": 0.03892983868718147, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5091044902801514, |
| "max": 3.718792676925659, |
| "mean": 0.01584971882402897, |
| "std": 0.7831407189369202, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21897639334201813, |
| "max": 0.23756206035614014, |
| "mean": -1.3331029549590312e-05, |
| "std": 0.036302708089351654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.047262854874134064, |
| "max": 0.05141079053282738, |
| "mean": 0.00047719862777739763, |
| "std": 0.013516917824745178, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.2142534703016281, |
| "max": 0.21756578981876373, |
| "mean": 5.647125362884253e-05, |
| "std": 0.03361497074365616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21157103776931763, |
| "max": 0.23160234093666077, |
| "mean": -0.005100839305669069, |
| "std": 0.06188952922821045, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.3621518015861511, |
| "max": 1.1046018600463867, |
| "mean": 0.6994094252586365, |
| "std": 0.0540287047624588, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23531383275985718, |
| "max": 0.24546286463737488, |
| "mean": 0.0004634457582142204, |
| "std": 0.0412684828042984, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.0981631875038147, |
| "max": 0.06831478327512741, |
| "mean": -0.031439878046512604, |
| "std": 0.01814098283648491, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.30264386534690857, |
| "max": 0.3523462414741516, |
| "mean": -8.214355329982936e-05, |
| "std": 0.0402742475271225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15258914232254028, |
| "max": 0.14998860657215118, |
| "mean": 0.0002567686606198549, |
| "std": 0.023048948496580124, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9985297322273254, |
| "max": 1.007304310798645, |
| "mean": 0.9998952150344849, |
| "std": 0.0011818050406873226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031265806406736374, |
| "max": 0.0312703475356102, |
| "mean": -1.928816709551029e-05, |
| "std": 0.018041111528873444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031225843355059624, |
| "max": 0.030984507873654366, |
| "mean": -0.001084179850295186, |
| "std": 0.01795078068971634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031264401972293854, |
| "max": 0.03126936033368111, |
| "mean": 3.5438486065686448e-06, |
| "std": 0.018041551113128662, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031160537153482437, |
| "max": 0.031171930953860283, |
| "mean": 0.00033398409141227603, |
| "std": 0.01806296594440937, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0006154034635983407, |
| "max": 0.00041452725417912006, |
| "mean": 1.3732544630329357e-06, |
| "std": 0.00013773542013950646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9981350898742676, |
| "max": 1.0061345100402832, |
| "mean": 1.0003111362457275, |
| "std": 0.0018558463780209422, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.032749444246292114, |
| "max": 0.03284144029021263, |
| "mean": -6.684205800411291e-06, |
| "std": 0.01804272271692753, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03275851905345917, |
| "max": 0.03259003907442093, |
| "mean": -0.00013117710477672517, |
| "std": 0.017956379801034927, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.0011779898777604103, |
| "max": 0.001155506120994687, |
| "mean": 3.63817605375516e-07, |
| "std": 0.00021426456805784255, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0005257476586848497, |
| "max": 0.0003992951533291489, |
| "mean": 2.2647066089120926e-06, |
| "std": 0.00012679416977334768, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3831771910190582, |
| "max": 0.7203002572059631, |
| "mean": 0.5807632207870483, |
| "std": 0.039030127227306366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.238657608628273, |
| "max": 0.1965981125831604, |
| "mean": 2.6105446522706188e-05, |
| "std": 0.03746547922492027, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11904074251651764, |
| "max": 0.16665399074554443, |
| "mean": 0.0009819172555580735, |
| "std": 0.027577750384807587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.2464642971754074, |
| "max": 0.5006471276283264, |
| "mean": -5.0186910812044516e-05, |
| "std": 0.03762289881706238, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.946474552154541, |
| "max": 3.7734150886535645, |
| "mean": -0.0035824859514832497, |
| "std": 0.681806743144989, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22754359245300293, |
| "max": 0.25217491388320923, |
| "mean": -1.1530558367667254e-05, |
| "std": 0.03743445873260498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07182253897190094, |
| "max": 0.0808083638548851, |
| "mean": -0.000513089878950268, |
| "std": 0.015668950974941254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22810040414333344, |
| "max": 0.2579977512359619, |
| "mean": -2.8758044209098443e-05, |
| "std": 0.03542134538292885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20080700516700745, |
| "max": 0.2153109759092331, |
| "mean": -0.005534037947654724, |
| "std": 0.0683637484908104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4053238332271576, |
| "max": 1.1908336877822876, |
| "mean": 0.7380030155181885, |
| "std": 0.05547412484884262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.2215573787689209, |
| "max": 0.24592049419879913, |
| "mean": 0.000521159905474633, |
| "std": 0.041335850954055786, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10345371812582016, |
| "max": 0.024234607815742493, |
| "mean": -0.032675523310899734, |
| "std": 0.018910475075244904, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.4504980742931366, |
| "max": 0.42334607243537903, |
| "mean": -0.0004341673047747463, |
| "std": 0.04689619690179825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2517021596431732, |
| "max": 0.4706237316131592, |
| "mean": 0.0032027317211031914, |
| "std": 0.04455312713980675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3171294331550598, |
| "max": 0.33335307240486145, |
| "mean": -2.5211516913259402e-05, |
| "std": 0.021287426352500916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3245099186897278, |
| "max": 0.6862163543701172, |
| "mean": 0.5710394978523254, |
| "std": 0.04481911659240723, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.1647811233997345, |
| "max": 0.1747460514307022, |
| "mean": -4.884982990915887e-05, |
| "std": 0.03318081423640251, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18714644014835358, |
| "max": 0.1431918442249298, |
| "mean": 4.32572269346565e-05, |
| "std": 0.029710030183196068, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.3816376030445099, |
| "max": 0.24683159589767456, |
| "mean": -9.986059922084678e-06, |
| "std": 0.0327618382871151, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6597650051116943, |
| "max": 3.293627977371216, |
| "mean": -0.014285150915384293, |
| "std": 0.9855467677116394, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23503181338310242, |
| "max": 0.24772128462791443, |
| "mean": -1.80145725607872e-05, |
| "std": 0.04169723764061928, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07281922549009323, |
| "max": 0.1548185795545578, |
| "mean": 0.0006660926737822592, |
| "std": 0.025179805234074593, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2665092945098877, |
| "max": 0.2483654022216797, |
| "mean": -1.536182753625326e-05, |
| "std": 0.04013803228735924, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18998344242572784, |
| "max": 0.1951427161693573, |
| "mean": -0.0012352201156318188, |
| "std": 0.06669348478317261, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32910633087158203, |
| "max": 1.0014653205871582, |
| "mean": 0.7192941308021545, |
| "std": 0.05263138189911842, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23228409886360168, |
| "max": 0.24597151577472687, |
| "mean": 0.00018284631369169801, |
| "std": 0.040899865329265594, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11449356377124786, |
| "max": 0.019026821479201317, |
| "mean": -0.042487140744924545, |
| "std": 0.018874552100896835, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.39081379771232605, |
| "max": 0.4084374010562897, |
| "mean": -2.154261528630741e-05, |
| "std": 0.04853346198797226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6939337849617004, |
| "max": 0.4130322337150574, |
| "mean": 0.0008477974915876985, |
| "std": 0.06032131612300873, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0010364858899265528, |
| "max": 1.000504493713379, |
| "mean": 0.00048820534721016884, |
| "std": 0.022089021280407906, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.997757077217102, |
| "max": 1.0054128170013428, |
| "mean": 0.9996482133865356, |
| "std": 0.0006391748902387917, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.031263865530490875, |
| "max": 0.03126693516969681, |
| "mean": -2.1029807612649165e-05, |
| "std": 0.018032483756542206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031225642189383507, |
| "max": 0.031231923028826714, |
| "mean": -0.000677043863106519, |
| "std": 0.017827108502388, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.031264521181583405, |
| "max": 0.03126373142004013, |
| "mean": -8.835060725687072e-06, |
| "std": 0.018031509593129158, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031228171661496162, |
| "max": 0.031247133389115334, |
| "mean": -0.0007299243006855249, |
| "std": 0.017942015081644058, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.00041853971197269857, |
| "max": 0.0003325868456158787, |
| "mean": -3.1447550554730697e-06, |
| "std": 0.0001163617562269792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9978469014167786, |
| "max": 1.0084865093231201, |
| "mean": 1.0002028942108154, |
| "std": 0.002608145819976926, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03244105726480484, |
| "max": 0.03237903118133545, |
| "mean": -1.7311865576630225e-06, |
| "std": 0.018027927726507187, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.032130636274814606, |
| "max": 0.03116563893854618, |
| "mean": -0.0003740063984878361, |
| "std": 0.01804370991885662, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0012820950942113996, |
| "max": 0.0011165018659085035, |
| "mean": -8.955282169154088e-07, |
| "std": 0.00020968639000784606, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.0003423716698307544, |
| "max": 0.00029734382405877113, |
| "mean": -3.7682302718167193e-06, |
| "std": 0.00010476629540789872, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23448625206947327, |
| "max": 0.272605836391449, |
| "mean": 6.777544967917493e-06, |
| "std": 0.018809372559189796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32130253314971924, |
| "max": 0.6949947476387024, |
| "mean": 0.5816991329193115, |
| "std": 0.04608374834060669, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18193963170051575, |
| "max": 0.19776132702827454, |
| "mean": -1.1586925211304333e-05, |
| "std": 0.033183593302965164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16079005599021912, |
| "max": 0.12958164513111115, |
| "mean": -0.0010761492885649204, |
| "std": 0.03415785729885101, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.33248019218444824, |
| "max": 0.31138068437576294, |
| "mean": -1.0150852176593617e-05, |
| "std": 0.0322343148291111, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.811703681945801, |
| "max": 8.77199935913086, |
| "mean": 0.09351971745491028, |
| "std": 1.6208088397979736, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23395448923110962, |
| "max": 0.24196705222129822, |
| "mean": 4.150588938500732e-05, |
| "std": 0.04085612669587135, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07607380300760269, |
| "max": 0.06586506962776184, |
| "mean": 0.0004828007658943534, |
| "std": 0.01941879838705063, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24638578295707703, |
| "max": 0.23463943600654602, |
| "mean": -3.1122344807954505e-06, |
| "std": 0.03943074867129326, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16335651278495789, |
| "max": 0.16123652458190918, |
| "mean": 0.001627025194466114, |
| "std": 0.0652812197804451, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5570001006126404, |
| "max": 0.9467727541923523, |
| "mean": 0.7130157351493835, |
| "std": 0.04052889347076416, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22856345772743225, |
| "max": 0.2556101679801941, |
| "mean": -4.5706547098234296e-05, |
| "std": 0.040574584156274796, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.1351189911365509, |
| "max": 0.02213732711970806, |
| "mean": -0.04135933890938759, |
| "std": 0.018408460542559624, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4225083887577057, |
| "max": 0.3927571773529053, |
| "mean": -4.4740827433997765e-06, |
| "std": 0.04778379574418068, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6081869602203369, |
| "max": 0.6523037552833557, |
| "mean": 0.0015862288419157267, |
| "std": 0.0568697564303875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.25164270401000977, |
| "max": 0.32068535685539246, |
| "mean": -6.094380296417512e-06, |
| "std": 0.019612763077020645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.3596651554107666, |
| "max": 0.6836386322975159, |
| "mean": 0.5707623958587646, |
| "std": 0.04307318106293678, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.2204994410276413, |
| "max": 0.17691564559936523, |
| "mean": -3.469674993539229e-05, |
| "std": 0.034298643469810486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16342805325984955, |
| "max": 0.23329652845859528, |
| "mean": 0.0003627383557613939, |
| "std": 0.03284167870879173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.2643727660179138, |
| "max": 0.2404099404811859, |
| "mean": -5.280954064801335e-05, |
| "std": 0.03389745578169823, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.859966278076172, |
| "max": 5.0964674949646, |
| "mean": 0.04393793269991875, |
| "std": 1.230094075202942, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.2466304451227188, |
| "max": 0.25078442692756653, |
| "mean": 7.233464566525072e-05, |
| "std": 0.04398677870631218, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06267692148685455, |
| "max": 0.054532695561647415, |
| "mean": 0.000642440456431359, |
| "std": 0.017191536724567413, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.28690966963768005, |
| "max": 0.27239924669265747, |
| "mean": -5.01475042256061e-05, |
| "std": 0.04298488423228264, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.1612030565738678, |
| "max": 0.1705736219882965, |
| "mean": -0.0028862706385552883, |
| "std": 0.05929599329829216, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.519792377948761, |
| "max": 0.9359998106956482, |
| "mean": 0.7136070132255554, |
| "std": 0.03880562260746956, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23828113079071045, |
| "max": 0.24893540143966675, |
| "mean": 0.0004648254835046828, |
| "std": 0.040453579276800156, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14526499807834625, |
| "max": 0.041103385388851166, |
| "mean": -0.03970393165946007, |
| "std": 0.02056412398815155, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5338290929794312, |
| "max": 0.5837586522102356, |
| "mean": 5.762096407124773e-06, |
| "std": 0.04885942488908768, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5195844769477844, |
| "max": 0.4939325749874115, |
| "mean": 0.002366485306993127, |
| "std": 0.05347662419080734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.273802787065506, |
| "max": 0.3155968487262726, |
| "mean": 2.01077523342974e-06, |
| "std": 0.02004941552877426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36614885926246643, |
| "max": 0.7128685116767883, |
| "mean": 0.5932222604751587, |
| "std": 0.04609934985637665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21129509806632996, |
| "max": 0.19956757128238678, |
| "mean": 3.06197653117124e-05, |
| "std": 0.034865960478782654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18738499283790588, |
| "max": 0.20401518046855927, |
| "mean": 0.0009546762448735535, |
| "std": 0.031527843326330185, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.2900747060775757, |
| "max": 0.3402419686317444, |
| "mean": -4.711254223366268e-05, |
| "std": 0.03458685800433159, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.881408214569092, |
| "max": 3.3909339904785156, |
| "mean": 0.014485932886600494, |
| "std": 0.8588526248931885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22496825456619263, |
| "max": 0.2504532039165497, |
| "mean": -3.7677732507290784e-06, |
| "std": 0.04222949594259262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05539275333285332, |
| "max": 0.046729691326618195, |
| "mean": -1.6585952835157514e-05, |
| "std": 0.01585092395544052, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.29304519295692444, |
| "max": 0.2904603183269501, |
| "mean": -7.356060450547375e-06, |
| "std": 0.04194435849785805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12514667212963104, |
| "max": 0.25951117277145386, |
| "mean": -0.003241210710257292, |
| "std": 0.05318121612071991, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4564977288246155, |
| "max": 0.847152590751648, |
| "mean": 0.7056270837783813, |
| "std": 0.03555477410554886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5119993090629578, |
| "max": 0.3481258749961853, |
| "mean": 0.0003428043273743242, |
| "std": 0.04019870236515999, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18613915145397186, |
| "max": 0.03958306089043617, |
| "mean": -0.03939869999885559, |
| "std": 0.021371137350797653, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5454614162445068, |
| "max": 0.5573456287384033, |
| "mean": -7.15605856385082e-05, |
| "std": 0.05073413625359535, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5122924447059631, |
| "max": 0.6649084091186523, |
| "mean": 0.002443553414195776, |
| "std": 0.04954148083925247, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3326307237148285, |
| "max": 0.2655903100967407, |
| "mean": 3.417561856622342e-06, |
| "std": 0.01938662678003311, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.32189854979515076, |
| "max": 0.7676428556442261, |
| "mean": 0.6510834097862244, |
| "std": 0.045412834733724594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.24963903427124023, |
| "max": 0.21975325047969818, |
| "mean": -2.1360538084991276e-06, |
| "std": 0.03650053218007088, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.3272336423397064, |
| "max": 0.2872598171234131, |
| "mean": -0.000690902175847441, |
| "std": 0.038575589656829834, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3108454644680023, |
| "max": 0.3709103763103485, |
| "mean": 6.501353345811367e-05, |
| "std": 0.036241017282009125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.7285308837890625, |
| "max": 5.821481227874756, |
| "mean": 0.03798262029886246, |
| "std": 1.4149147272109985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22184839844703674, |
| "max": 0.20582044124603271, |
| "mean": -7.514897151850164e-05, |
| "std": 0.04248502478003502, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07779642939567566, |
| "max": 0.05152571201324463, |
| "mean": -0.0009286667918786407, |
| "std": 0.016416585072875023, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.33085039258003235, |
| "max": 0.3292792737483978, |
| "mean": -4.624932898877887e-06, |
| "std": 0.04279141500592232, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2853098511695862, |
| "max": 0.11214260756969452, |
| "mean": -0.001206133747473359, |
| "std": 0.0470227487385273, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.48610714077949524, |
| "max": 0.8880516886711121, |
| "mean": 0.7374852299690247, |
| "std": 0.038454823195934296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3625198006629944, |
| "max": 0.274814248085022, |
| "mean": 5.1260511099826545e-05, |
| "std": 0.040644217282533646, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24789389967918396, |
| "max": 0.046399183571338654, |
| "mean": -0.0392770953476429, |
| "std": 0.023303059861063957, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6276291012763977, |
| "max": 0.5983994007110596, |
| "mean": -6.147650128696114e-05, |
| "std": 0.053116291761398315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7109575271606445, |
| "max": 0.2664211392402649, |
| "mean": 0.0009173410944640636, |
| "std": 0.05126515030860901, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3433755040168762, |
| "max": 0.30368152260780334, |
| "mean": 1.5963701116561424e-07, |
| "std": 0.01913503371179104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34989097714424133, |
| "max": 0.7839252948760986, |
| "mean": 0.6388714909553528, |
| "std": 0.04933994635939598, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20559623837471008, |
| "max": 0.20719166100025177, |
| "mean": -5.992387013975531e-05, |
| "std": 0.03769540786743164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25907090306282043, |
| "max": 0.2685673236846924, |
| "mean": -0.00039763032691553235, |
| "std": 0.04464223235845566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.35467270016670227, |
| "max": 0.3229817748069763, |
| "mean": -6.9561183408950455e-06, |
| "std": 0.03720381483435631, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.267129421234131, |
| "max": 4.20892858505249, |
| "mean": -0.02641383744776249, |
| "std": 1.0074299573898315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23873740434646606, |
| "max": 0.24359266459941864, |
| "mean": -2.525941454223357e-05, |
| "std": 0.04320967569947243, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06238892674446106, |
| "max": 0.056785948574543, |
| "mean": 0.0003448878414928913, |
| "std": 0.014156854711472988, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.43733710050582886, |
| "max": 0.3737178444862366, |
| "mean": 1.443843029846903e-05, |
| "std": 0.044121142476797104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09657814353704453, |
| "max": 0.1761663407087326, |
| "mean": -0.0006602209759876132, |
| "std": 0.03516199812293053, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4218268096446991, |
| "max": 1.070821762084961, |
| "mean": 0.7484229803085327, |
| "std": 0.042183347046375275, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2668064832687378, |
| "max": 0.2973981201648712, |
| "mean": -7.947084668558091e-05, |
| "std": 0.0408041812479496, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18554465472698212, |
| "max": 0.04366818815469742, |
| "mean": -0.03683188557624817, |
| "std": 0.025637373328208923, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4576941728591919, |
| "max": 0.4877614378929138, |
| "mean": 4.342636384535581e-05, |
| "std": 0.05420947074890137, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.28702330589294434, |
| "max": 0.5525704622268677, |
| "mean": -0.0008802832453511655, |
| "std": 0.04786703363060951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2927917540073395, |
| "max": 0.32283690571784973, |
| "mean": 6.15146973359515e-06, |
| "std": 0.019968591630458832, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2908625304698944, |
| "max": 0.7625526785850525, |
| "mean": 0.650852382183075, |
| "std": 0.052188921719789505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.24394108355045319, |
| "max": 0.2618102431297302, |
| "mean": -5.981732101645321e-06, |
| "std": 0.0396115742623806, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.267729252576828, |
| "max": 0.20025481283664703, |
| "mean": -0.0008811865700408816, |
| "std": 0.05178782343864441, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.27242225408554077, |
| "max": 0.25395235419273376, |
| "mean": 4.551842721411958e-06, |
| "std": 0.03870858997106552, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.978915214538574, |
| "max": 15.964410781860352, |
| "mean": 0.033282238990068436, |
| "std": 1.9907665252685547, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20757102966308594, |
| "max": 0.2263997346162796, |
| "mean": -7.214213110273704e-05, |
| "std": 0.04055347666144371, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06938357651233673, |
| "max": 0.06327643245458603, |
| "mean": 0.00015629694098606706, |
| "std": 0.014746708795428276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46517401933670044, |
| "max": 0.320604145526886, |
| "mean": 1.968832475540694e-05, |
| "std": 0.040588606148958206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06422771513462067, |
| "max": 0.11537671089172363, |
| "mean": 0.0011921785771846771, |
| "std": 0.024717185646295547, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.3747407793998718, |
| "max": 0.935266375541687, |
| "mean": 0.750953733921051, |
| "std": 0.040338218212127686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2802576720714569, |
| "max": 0.2736368775367737, |
| "mean": -0.00016840582247823477, |
| "std": 0.04099476709961891, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19888785481452942, |
| "max": 0.05115103721618652, |
| "mean": -0.0320354662835598, |
| "std": 0.025122012943029404, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6593934297561646, |
| "max": 0.5366666913032532, |
| "mean": -4.888622788712382e-05, |
| "std": 0.05284604802727699, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19323143362998962, |
| "max": 0.5829473733901978, |
| "mean": -0.0005128738121129572, |
| "std": 0.041099581867456436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41776803135871887, |
| "max": 0.3719577491283417, |
| "mean": 6.155986739031505e-06, |
| "std": 0.02162076160311699, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21425622701644897, |
| "max": 0.7496172189712524, |
| "mean": 0.6495488882064819, |
| "std": 0.054406262934207916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20992936193943024, |
| "max": 0.1961071640253067, |
| "mean": 4.025327507406473e-05, |
| "std": 0.039461661130189896, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.3297625780105591, |
| "max": 0.25971850752830505, |
| "mean": -0.003232162445783615, |
| "std": 0.05629448592662811, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.2059866487979889, |
| "max": 0.25485166907310486, |
| "mean": 5.424032860901207e-05, |
| "std": 0.0385642871260643, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.252347469329834, |
| "max": 6.942240238189697, |
| "mean": 0.0483565516769886, |
| "std": 1.3863071203231812, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.21015214920043945, |
| "max": 0.2306891679763794, |
| "mean": -5.141047040524427e-06, |
| "std": 0.041312482208013535, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04387219622731209, |
| "max": 0.036041487008333206, |
| "mean": 6.907794158905745e-07, |
| "std": 0.012801294215023518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.3976612091064453, |
| "max": 0.3448401689529419, |
| "mean": -5.557302574743517e-05, |
| "std": 0.04238886013627052, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.055147796869277954, |
| "max": 0.06285040080547333, |
| "mean": 0.00036463249125517905, |
| "std": 0.018676765263080597, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3504812717437744, |
| "max": 1.0465654134750366, |
| "mean": 0.7894250154495239, |
| "std": 0.048819400370121, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3337032198905945, |
| "max": 0.3862806558609009, |
| "mean": -0.00016953393060248345, |
| "std": 0.04147983714938164, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.1576613336801529, |
| "max": 0.0590929239988327, |
| "mean": -0.03184548765420914, |
| "std": 0.02515709400177002, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6968328952789307, |
| "max": 0.469901978969574, |
| "mean": -8.902316039893776e-05, |
| "std": 0.051792412996292114, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24871186912059784, |
| "max": 0.32932594418525696, |
| "mean": -0.0002525809977669269, |
| "std": 0.04146667197346687, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2871420085430145, |
| "max": 0.35027819871902466, |
| "mean": -2.14410374610452e-06, |
| "std": 0.024236002936959267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19654829800128937, |
| "max": 0.7817674279212952, |
| "mean": 0.6702600121498108, |
| "std": 0.058710552752017975, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22942325472831726, |
| "max": 0.2315986454486847, |
| "mean": -1.993781370401848e-05, |
| "std": 0.04043741896748543, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22012899816036224, |
| "max": 0.24119356274604797, |
| "mean": 0.0007787380600348115, |
| "std": 0.0558554045855999, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21590574085712433, |
| "max": 0.22671166062355042, |
| "mean": -7.169770105974749e-05, |
| "std": 0.03937356546521187, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.917876243591309, |
| "max": 9.080994606018066, |
| "mean": -0.001221940852701664, |
| "std": 1.850203514099121, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2697039842605591, |
| "max": 0.2592160999774933, |
| "mean": 4.3639320210786536e-05, |
| "std": 0.03840581700205803, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.057751722633838654, |
| "max": 0.05785238742828369, |
| "mean": 0.0003506582579575479, |
| "std": 0.014723116531968117, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.26493385434150696, |
| "max": 0.28856679797172546, |
| "mean": -6.166309321997687e-05, |
| "std": 0.0390719398856163, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04392173886299133, |
| "max": 0.037354789674282074, |
| "mean": -9.023403254104778e-05, |
| "std": 0.013362305238842964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.339423805475235, |
| "max": 1.0940691232681274, |
| "mean": 0.8637771010398865, |
| "std": 0.06392761319875717, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.42344674468040466, |
| "max": 0.419131875038147, |
| "mean": 0.0003126289520878345, |
| "std": 0.04350034520030022, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.2149772197008133, |
| "max": 0.1709900051355362, |
| "mean": -0.02949333004653454, |
| "std": 0.03195162117481232, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.6006098985671997, |
| "max": 0.5608966946601868, |
| "mean": -0.00015077056013979018, |
| "std": 0.05344511568546295, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17873013019561768, |
| "max": 0.3772476017475128, |
| "mean": 0.001360590336844325, |
| "std": 0.03732540085911751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.3945685923099518, |
| "max": 0.3692152500152588, |
| "mean": 3.696953717735596e-05, |
| "std": 0.02861735410988331, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2903454899787903, |
| "max": 0.8293581604957581, |
| "mean": 0.7055460214614868, |
| "std": 0.0678996667265892, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9263197779655457, |
| "max": 1.0265021324157715, |
| "mean": -2.6120340407942422e-05, |
| "std": 0.04762475937604904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8807425498962402, |
| "max": 0.8172140717506409, |
| "mean": -0.00030884621082805097, |
| "std": 0.09569496661424637, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.2697499990463257, |
| "max": 0.24099533259868622, |
| "mean": -2.2782449377700686e-05, |
| "std": 0.03895165026187897, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.798847198486328, |
| "max": 22.90509796142578, |
| "mean": -0.0919695645570755, |
| "std": 4.078832626342773, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22787398099899292, |
| "max": 0.24508967995643616, |
| "mean": -2.5707324311952107e-05, |
| "std": 0.038637157529592514, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06037602946162224, |
| "max": 0.04592515528202057, |
| "mean": -0.00014296159497462213, |
| "std": 0.01469582598656416, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.33830153942108154, |
| "max": 0.3749238848686218, |
| "mean": 7.406164513668045e-06, |
| "std": 0.04081294313073158, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04650312289595604, |
| "max": 0.19583187997341156, |
| "mean": 0.00027365636196918786, |
| "std": 0.01356838084757328, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.37443819642066956, |
| "max": 1.133804440498352, |
| "mean": 0.8900732398033142, |
| "std": 0.06407663971185684, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.44803717732429504, |
| "max": 0.5431130528450012, |
| "mean": 2.468598904670216e-05, |
| "std": 0.045565586537122726, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22437457740306854, |
| "max": 0.08822718262672424, |
| "mean": -0.03203187137842178, |
| "std": 0.037792954593896866, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7267696261405945, |
| "max": 0.6905267834663391, |
| "mean": 3.431630102568306e-05, |
| "std": 0.05177779868245125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.17477792501449585, |
| "max": 0.2187574803829193, |
| "mean": 4.145095590502024e-05, |
| "std": 0.03179146349430084, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.34067779779434204, |
| "max": 0.37430673837661743, |
| "mean": 4.298752173781395e-05, |
| "std": 0.034139689058065414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3176548182964325, |
| "max": 1.2885946035385132, |
| "mean": 0.6015164256095886, |
| "std": 0.08361472934484482, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2833174467086792, |
| "max": 0.2604674696922302, |
| "mean": -2.836968405972584e-06, |
| "std": 0.0359807163476944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23581622540950775, |
| "max": 0.20569506287574768, |
| "mean": 0.00023786764359101653, |
| "std": 0.05603973567485809, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.4358341097831726, |
| "max": 0.3255886137485504, |
| "mean": 2.4293056412716396e-05, |
| "std": 0.03413134440779686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.55698823928833, |
| "max": 7.328329086303711, |
| "mean": -0.007412843406200409, |
| "std": 0.7006030082702637, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.3444630801677704, |
| "max": 0.36411502957344055, |
| "mean": 0.00010332845704397187, |
| "std": 0.04782791808247566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07393012195825577, |
| "max": 0.06051904335618019, |
| "mean": 0.0009339260286651552, |
| "std": 0.014950446784496307, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2559783458709717, |
| "max": 0.2868276536464691, |
| "mean": 4.447174433153123e-06, |
| "std": 0.041554734110832214, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05538920685648918, |
| "max": 0.06289947777986526, |
| "mean": 0.0001379675231873989, |
| "std": 0.007169328164309263, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.4936121106147766, |
| "max": 1.2250889539718628, |
| "mean": 1.0134532451629639, |
| "std": 0.11746872216463089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0937550067901611, |
| "max": 1.0471408367156982, |
| "mean": -4.919863567920402e-05, |
| "std": 0.05240846797823906, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22377891838550568, |
| "max": 0.1730729043483734, |
| "mean": -0.0272611565887928, |
| "std": 0.036391731351614, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8862237930297852, |
| "max": 0.9243613481521606, |
| "mean": -0.000145945290569216, |
| "std": 0.05328156799077988, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.1714460700750351, |
| "max": 0.38068291544914246, |
| "mean": 0.0033734007738530636, |
| "std": 0.03993367776274681, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7782041430473328, |
| "max": 0.7242955565452576, |
| "mean": 1.8867685867007822e-05, |
| "std": 0.046160779893398285, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.338652104139328, |
| "max": 1.4327832460403442, |
| "mean": 0.9483770728111267, |
| "std": 0.20681361854076385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7457636594772339, |
| "max": 1.704433560371399, |
| "mean": 0.00022719459957443178, |
| "std": 0.15868502855300903, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.2020785808563232, |
| "max": 1.102237343788147, |
| "mean": -0.009557764045894146, |
| "std": 0.20423445105552673, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4211972653865814, |
| "max": 0.42695388197898865, |
| "mean": 6.460870645241812e-05, |
| "std": 0.04801572859287262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.786317825317383, |
| "max": 19.58098602294922, |
| "mean": -0.24868716299533844, |
| "std": 4.785643100738525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32400673627853394, |
| "max": 0.4385600686073303, |
| "mean": -1.1902460755663924e-05, |
| "std": 0.046161260455846786, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03411344811320305, |
| "max": 0.03715973347425461, |
| "mean": 0.000642350991256535, |
| "std": 0.012920677661895752, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7036018371582031, |
| "max": 0.6655198335647583, |
| "mean": 4.3310083128744736e-05, |
| "std": 0.057881489396095276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07239808887243271, |
| "max": 0.06768179684877396, |
| "mean": -0.0001333777909167111, |
| "std": 0.012929531745612621, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38025927543640137, |
| "max": 1.3922340869903564, |
| "mean": 1.0665740966796875, |
| "std": 0.21970504522323608, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6167835593223572, |
| "max": 0.7178800106048584, |
| "mean": 0.00011188755161128938, |
| "std": 0.05802030861377716, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21991026401519775, |
| "max": 0.22539444267749786, |
| "mean": 0.006232057698071003, |
| "std": 0.049761686474084854, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6298967599868774, |
| "max": 0.8895401954650879, |
| "mean": 1.17591189336963e-05, |
| "std": 0.023527691140770912, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5078860521316528, |
| "max": 0.47492364048957825, |
| "mean": -0.0030241229105740786, |
| "std": 0.0694146603345871, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5377801060676575, |
| "max": 1.1812876462936401, |
| "mean": 0.7827885746955872, |
| "std": 0.09896031767129898, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2672213613986969, |
| "max": 0.21292650699615479, |
| "mean": -0.00022339042334351689, |
| "std": 0.05399598926305771, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23856915533542633, |
| "max": 0.014836194925010204, |
| "mean": -0.043973349034786224, |
| "std": 0.03437991812825203, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |