| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.430247962474823, |
| "max": 0.29814788699150085, |
| "mean": -0.0025456156581640244, |
| "std": 0.042562179267406464, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06305033713579178, |
| "max": 0.10756707191467285, |
| "mean": 0.0006329622119665146, |
| "std": 0.03406817466020584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4126828908920288, |
| "max": 0.8368642926216125, |
| "mean": -0.00020196933473926038, |
| "std": 0.024113450199365616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11526867002248764, |
| "max": 0.3216077983379364, |
| "mean": -0.0009404964512214065, |
| "std": 0.019565371796488762, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7922351360321045, |
| "max": 2.8709537982940674, |
| "mean": -0.0003647372650448233, |
| "std": 0.6154845356941223, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.27921348810195923, |
| "max": 0.38164129853248596, |
| "mean": 0.0004232236242387444, |
| "std": 0.04274886101484299, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.2224942147731781, |
| "max": 0.20972047746181488, |
| "mean": -0.004487486090511084, |
| "std": 0.040916070342063904, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4284340739250183, |
| "max": 0.47617435455322266, |
| "mean": 3.322187239973573e-06, |
| "std": 0.024511422961950302, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32528114318847656, |
| "max": 0.15677402913570404, |
| "mean": -0.04670446366071701, |
| "std": 0.051589105278253555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.41054657101631165, |
| "max": 0.3546879291534424, |
| "mean": -0.00012705953849945217, |
| "std": 0.023604456335306168, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.22982755303382874, |
| "max": 0.26271378993988037, |
| "mean": -0.029137738049030304, |
| "std": 0.049353621900081635, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.25457319617271423, |
| "max": 0.8201438188552856, |
| "mean": 0.5254908800125122, |
| "std": 0.08082503080368042, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.29710477590560913, |
| "max": 0.26579147577285767, |
| "mean": -0.0004257034743204713, |
| "std": 0.03210267424583435, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09286229312419891, |
| "max": 0.12479868531227112, |
| "mean": 0.0006487525533884764, |
| "std": 0.025735046714544296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.290811687707901, |
| "max": 0.2813718020915985, |
| "mean": -7.56493245717138e-05, |
| "std": 0.030931707471609116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.900395393371582, |
| "max": 5.815171718597412, |
| "mean": -0.009333105757832527, |
| "std": 1.295695185661316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4251435399055481, |
| "max": 0.3437366187572479, |
| "mean": 9.79713149718009e-05, |
| "std": 0.02995358221232891, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028972996398806572, |
| "max": 0.027724435552954674, |
| "mean": -0.00031865754863247275, |
| "std": 0.012574296444654465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.45405057072639465, |
| "max": 0.44834038615226746, |
| "mean": 2.372298331465572e-05, |
| "std": 0.02385387383401394, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08870794624090195, |
| "max": 0.09110292047262192, |
| "mean": 0.0022859524469822645, |
| "std": 0.01951485686004162, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.26681551337242126, |
| "max": 1.056317687034607, |
| "mean": 0.5312033891677856, |
| "std": 0.10443911701440811, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5745526552200317, |
| "max": 0.6082873940467834, |
| "mean": -0.00043126955279149115, |
| "std": 0.03860025480389595, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18273141980171204, |
| "max": 0.04556818678975105, |
| "mean": -0.029461650177836418, |
| "std": 0.042611170560121536, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1671894788742065, |
| "max": 1.6339271068572998, |
| "mean": 0.0003239789803046733, |
| "std": 0.027696946635842323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16238771378993988, |
| "max": 0.20571960508823395, |
| "mean": -0.021131085231900215, |
| "std": 0.02794588916003704, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22399598360061646, |
| "max": 0.8438678979873657, |
| "mean": 0.48765647411346436, |
| "std": 0.07522650808095932, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.2555526793003082, |
| "max": 0.305812269449234, |
| "mean": -6.7934306571260095e-06, |
| "std": 0.03347478806972504, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09538023918867111, |
| "max": 0.11050069332122803, |
| "mean": 6.53832103125751e-05, |
| "std": 0.02696637623012066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.297147661447525, |
| "max": 0.2961280345916748, |
| "mean": 5.286935265758075e-05, |
| "std": 0.032545968890190125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.165225028991699, |
| "max": 5.085448741912842, |
| "mean": -0.014597500674426556, |
| "std": 1.1575955152511597, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.3449052572250366, |
| "max": 0.34331217408180237, |
| "mean": 7.911311695352197e-05, |
| "std": 0.03006201609969139, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03610210865736008, |
| "max": 0.03328812122344971, |
| "mean": -0.0001417656458215788, |
| "std": 0.01303204894065857, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3154510259628296, |
| "max": 0.37501609325408936, |
| "mean": -2.077353019558359e-05, |
| "std": 0.024059347808361053, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10547598451375961, |
| "max": 0.1221047043800354, |
| "mean": -0.0019677607342600822, |
| "std": 0.028854791074991226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.31151488423347473, |
| "max": 1.1208997964859009, |
| "mean": 0.6663015484809875, |
| "std": 0.09774678200483322, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8727833032608032, |
| "max": 0.6275414824485779, |
| "mean": 0.001675266888923943, |
| "std": 0.04743880406022072, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.2714674770832062, |
| "max": 0.03427550569176674, |
| "mean": -0.04661353677511215, |
| "std": 0.040598493069410324, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9226045608520508, |
| "max": 0.9647504687309265, |
| "mean": 0.0010200842516496778, |
| "std": 0.040706485509872437, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.1445719450712204, |
| "max": 0.07502147555351257, |
| "mean": -0.009089105762541294, |
| "std": 0.025694996118545532, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.24015086889266968, |
| "max": 0.7130303978919983, |
| "mean": 0.4472612142562866, |
| "std": 0.05932846665382385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.27250105142593384, |
| "max": 0.29779112339019775, |
| "mean": 9.235942343366332e-06, |
| "std": 0.03546915203332901, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.1193777546286583, |
| "max": 0.11857955157756805, |
| "mean": 0.0007589810993522406, |
| "std": 0.02763049118220806, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.28105634450912476, |
| "max": 0.2798849046230316, |
| "mean": -7.697378896409646e-05, |
| "std": 0.0350995697081089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5100622177124023, |
| "max": 2.5220582485198975, |
| "mean": 0.02675231173634529, |
| "std": 0.5868890285491943, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.2211104929447174, |
| "max": 0.27162447571754456, |
| "mean": 2.60172691923799e-06, |
| "std": 0.030733274295926094, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.033548399806022644, |
| "max": 0.03133385255932808, |
| "mean": 0.00011904191342182457, |
| "std": 0.012407796457409859, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23527584969997406, |
| "max": 0.23167696595191956, |
| "mean": 5.708727621822618e-05, |
| "std": 0.025696981698274612, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13586905598640442, |
| "max": 0.12758414447307587, |
| "mean": -0.0054936036467552185, |
| "std": 0.039962876588106155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.35451188683509827, |
| "max": 1.1720999479293823, |
| "mean": 0.710637629032135, |
| "std": 0.10376914590597153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6174948811531067, |
| "max": 0.5544577240943909, |
| "mean": 0.0011600415455177426, |
| "std": 0.04611966758966446, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.1883939653635025, |
| "max": 0.02492486871778965, |
| "mean": -0.03484141081571579, |
| "std": 0.028610829263925552, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.131612777709961, |
| "max": 0.9714275002479553, |
| "mean": 0.00035819801269099116, |
| "std": 0.04234758019447327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5980822443962097, |
| "max": 0.06284141540527344, |
| "mean": -0.004877430386841297, |
| "std": 0.028617603704333305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.37526264786720276, |
| "max": 0.9405426383018494, |
| "mean": 0.5925549268722534, |
| "std": 0.0669507160782814, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.39145711064338684, |
| "max": 0.3691279888153076, |
| "mean": 7.120549707906321e-05, |
| "std": 0.03718876466155052, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11895960569381714, |
| "max": 0.13652607798576355, |
| "mean": 0.0009289687732234597, |
| "std": 0.029236802831292152, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.619219183921814, |
| "max": 0.5088949203491211, |
| "mean": 1.4944693248253316e-05, |
| "std": 0.036442093551158905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.188663482666016, |
| "max": 8.790773391723633, |
| "mean": -0.10929473489522934, |
| "std": 1.6991605758666992, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.2766683101654053, |
| "max": 0.23983481526374817, |
| "mean": 5.299611802911386e-05, |
| "std": 0.032615721225738525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.052095651626586914, |
| "max": 0.039515361189842224, |
| "mean": 9.424134623259306e-05, |
| "std": 0.012960628606379032, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23076868057250977, |
| "max": 0.234751895070076, |
| "mean": -2.1736430426244624e-05, |
| "std": 0.029392007738351822, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20435833930969238, |
| "max": 0.10555171221494675, |
| "mean": -0.004022371023893356, |
| "std": 0.03262435272336006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.33977094292640686, |
| "max": 1.0126755237579346, |
| "mean": 0.7008676528930664, |
| "std": 0.0967569425702095, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5649488568305969, |
| "max": 0.8331477046012878, |
| "mean": 0.00041524306288920343, |
| "std": 0.04230210557579994, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21171551942825317, |
| "max": 0.030433084815740585, |
| "mean": -0.03218771517276764, |
| "std": 0.026509009301662445, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7544965744018555, |
| "max": 0.7186921834945679, |
| "mean": -1.2556927686091512e-05, |
| "std": 0.036842044442892075, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.26356518268585205, |
| "max": 0.10585562884807587, |
| "mean": -0.003026221413165331, |
| "std": 0.028868772089481354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28427132964134216, |
| "max": 0.6951562762260437, |
| "mean": 0.4995492994785309, |
| "std": 0.046537742018699646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27920955419540405, |
| "max": 0.23424308001995087, |
| "mean": -0.00011120487761218101, |
| "std": 0.038762450218200684, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15435229241847992, |
| "max": 0.126743882894516, |
| "mean": -0.002232551807537675, |
| "std": 0.03338867425918579, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41404443979263306, |
| "max": 0.6600516438484192, |
| "mean": -1.9756593246711418e-05, |
| "std": 0.03909948095679283, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.238841533660889, |
| "max": 4.723404884338379, |
| "mean": -0.02046278491616249, |
| "std": 1.0078744888305664, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24500444531440735, |
| "max": 0.20759114623069763, |
| "mean": 4.401802652864717e-05, |
| "std": 0.03396647423505783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03457580879330635, |
| "max": 0.04486193135380745, |
| "mean": -1.914246240630746e-05, |
| "std": 0.012628658674657345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20080946385860443, |
| "max": 0.20593363046646118, |
| "mean": -2.9703282052651048e-05, |
| "std": 0.03102399967610836, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.20000168681144714, |
| "max": 0.11336001008749008, |
| "mean": -0.002912652213126421, |
| "std": 0.03451835736632347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.3670476973056793, |
| "max": 1.0570876598358154, |
| "mean": 0.6706215143203735, |
| "std": 0.06639451533555984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.39835721254348755, |
| "max": 0.5023353695869446, |
| "mean": -3.849938002531417e-05, |
| "std": 0.0411369614303112, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12806333601474762, |
| "max": 0.026793837547302246, |
| "mean": -0.030542662367224693, |
| "std": 0.021876059472560883, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.4490928053855896, |
| "max": 0.4329548478126526, |
| "mean": 7.997997454367578e-05, |
| "std": 0.03489622473716736, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.2676912248134613, |
| "max": 0.07277432084083557, |
| "mean": -0.0011054163333028555, |
| "std": 0.023129144683480263, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28743863105773926, |
| "max": 0.6852545738220215, |
| "mean": 0.5245908498764038, |
| "std": 0.047539178282022476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22235621511936188, |
| "max": 0.2234710454940796, |
| "mean": 1.5755222193547525e-05, |
| "std": 0.03895283117890358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13644249737262726, |
| "max": 0.10925862938165665, |
| "mean": 0.00023633803357370198, |
| "std": 0.029229167848825455, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.3750911056995392, |
| "max": 0.4374293088912964, |
| "mean": -9.469786164117977e-06, |
| "std": 0.03928925842046738, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8464367389678955, |
| "max": 5.000250816345215, |
| "mean": 0.009745623916387558, |
| "std": 0.8453732132911682, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22324559092521667, |
| "max": 0.22006931900978088, |
| "mean": -2.64663412963273e-07, |
| "std": 0.03441375494003296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04371564835309982, |
| "max": 0.03597109019756317, |
| "mean": -0.0002580236759968102, |
| "std": 0.012081029824912548, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21329627931118011, |
| "max": 0.1888744831085205, |
| "mean": -1.6700443666195497e-05, |
| "std": 0.03154045715928078, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.1808258593082428, |
| "max": 0.12078980356454849, |
| "mean": -0.002406290266662836, |
| "std": 0.04127614200115204, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.42247915267944336, |
| "max": 0.9420861601829529, |
| "mean": 0.6627910733222961, |
| "std": 0.0568135567009449, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.3714267611503601, |
| "max": 0.47587329149246216, |
| "mean": -8.246101788245142e-05, |
| "std": 0.04089611768722534, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.208319753408432, |
| "max": 0.02722310833632946, |
| "mean": -0.03024582751095295, |
| "std": 0.021349623799324036, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.34010598063468933, |
| "max": 0.7335456013679504, |
| "mean": 8.291324775200337e-05, |
| "std": 0.03477157652378082, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2402523010969162, |
| "max": 0.050502024590969086, |
| "mean": -0.0011936500668525696, |
| "std": 0.020464643836021423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3060189485549927, |
| "max": 0.6537417769432068, |
| "mean": 0.5251810550689697, |
| "std": 0.046129435300827026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.3043527901172638, |
| "max": 0.2173452079296112, |
| "mean": 6.987799861235544e-05, |
| "std": 0.03949924185872078, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.1495305597782135, |
| "max": 0.13139042258262634, |
| "mean": 0.0003452928503975272, |
| "std": 0.03046758659183979, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.25741448998451233, |
| "max": 0.2021329253911972, |
| "mean": 3.105932046310045e-05, |
| "std": 0.039488501846790314, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.336733102798462, |
| "max": 2.376356840133667, |
| "mean": -0.026247980073094368, |
| "std": 0.44985267519950867, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18904413282871246, |
| "max": 0.2104651778936386, |
| "mean": 3.720704626175575e-05, |
| "std": 0.03479856252670288, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03166992589831352, |
| "max": 0.035564228892326355, |
| "mean": -0.00020107123418711126, |
| "std": 0.012294227257370949, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18845464289188385, |
| "max": 0.17046742141246796, |
| "mean": -6.800049595767632e-05, |
| "std": 0.03217524290084839, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13940171897411346, |
| "max": 0.13724905252456665, |
| "mean": -0.002515769563615322, |
| "std": 0.05131084844470024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4671289920806885, |
| "max": 0.9564934968948364, |
| "mean": 0.6689913272857666, |
| "std": 0.05279172211885452, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.3243524730205536, |
| "max": 0.30971962213516235, |
| "mean": -1.389088538417127e-06, |
| "std": 0.04095206782221794, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12475074827671051, |
| "max": 0.02534548193216324, |
| "mean": -0.03070956841111183, |
| "std": 0.019817529246211052, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.44013386964797974, |
| "max": 0.44524946808815, |
| "mean": 9.531535761198029e-05, |
| "std": 0.03512435778975487, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22465433180332184, |
| "max": 0.05168891325592995, |
| "mean": -0.0011842836393043399, |
| "std": 0.018476232886314392, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.3392145037651062, |
| "max": 0.739431619644165, |
| "mean": 0.5587528944015503, |
| "std": 0.04140577092766762, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.2725517153739929, |
| "max": 0.2784435749053955, |
| "mean": 1.987360155908391e-05, |
| "std": 0.04106256738305092, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13695892691612244, |
| "max": 0.13984902203083038, |
| "mean": 0.00048777679330669343, |
| "std": 0.026632118970155716, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.4907291829586029, |
| "max": 0.35599952936172485, |
| "mean": 8.879909000825137e-05, |
| "std": 0.0407005213201046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.2975404262542725, |
| "max": 1.7454535961151123, |
| "mean": -0.02108157053589821, |
| "std": 0.5002167820930481, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.2176651507616043, |
| "max": 0.19791799783706665, |
| "mean": -4.056983016198501e-05, |
| "std": 0.03423743695020676, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.04131868854165077, |
| "max": 0.038581475615501404, |
| "mean": -0.00014208082575351, |
| "std": 0.012879491783678532, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17750245332717896, |
| "max": 0.18368542194366455, |
| "mean": 4.755006739287637e-05, |
| "std": 0.031560346484184265, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.17995940148830414, |
| "max": 0.18388336896896362, |
| "mean": -0.0022164953406900167, |
| "std": 0.05484570935368538, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4742797613143921, |
| "max": 1.0257062911987305, |
| "mean": 0.6453534960746765, |
| "std": 0.05035950988531113, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.27185168862342834, |
| "max": 0.3093569278717041, |
| "mean": 0.00011239617015235126, |
| "std": 0.04068810120224953, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10582997649908066, |
| "max": 0.02683391235768795, |
| "mean": -0.029520545154809952, |
| "std": 0.01793094538152218, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.3390536606311798, |
| "max": 0.32923397421836853, |
| "mean": 5.560236604651436e-05, |
| "std": 0.03441813215613365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.181716188788414, |
| "max": 0.04217486456036568, |
| "mean": -0.0010700200218707323, |
| "std": 0.017213836312294006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.32544824481010437, |
| "max": 0.6866950988769531, |
| "mean": 0.511271595954895, |
| "std": 0.036954350769519806, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23384520411491394, |
| "max": 0.22571122646331787, |
| "mean": -3.601049320423044e-05, |
| "std": 0.0391816720366478, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.1153523325920105, |
| "max": 0.1316574662923813, |
| "mean": 0.000150712497998029, |
| "std": 0.029186168685555458, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.35289716720581055, |
| "max": 0.285473108291626, |
| "mean": 7.233719770738389e-06, |
| "std": 0.03925013542175293, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.133274078369141, |
| "max": 3.544353723526001, |
| "mean": -0.011593173258006573, |
| "std": 0.6827409267425537, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21133771538734436, |
| "max": 0.20911119878292084, |
| "mean": 3.477419522823766e-05, |
| "std": 0.034489333629608154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03563081845641136, |
| "max": 0.04807223752140999, |
| "mean": 0.0007964536780491471, |
| "std": 0.012856329791247845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21064519882202148, |
| "max": 0.19317731261253357, |
| "mean": -1.2986236015422037e-06, |
| "std": 0.03169986233115196, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.1866597682237625, |
| "max": 0.17717307806015015, |
| "mean": -0.002846275921911001, |
| "std": 0.05864023044705391, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.47464174032211304, |
| "max": 1.0418421030044556, |
| "mean": 0.6514742970466614, |
| "std": 0.049661051481962204, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.2484884411096573, |
| "max": 0.3291080594062805, |
| "mean": 0.00018062048184219748, |
| "std": 0.040576666593551636, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12466001510620117, |
| "max": 0.024652821943163872, |
| "mean": -0.030505184084177017, |
| "std": 0.01760147698223591, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.42117249965667725, |
| "max": 0.48183169960975647, |
| "mean": 4.90086677018553e-07, |
| "std": 0.03540300950407982, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15187376737594604, |
| "max": 0.04340476170182228, |
| "mean": 4.305229231249541e-05, |
| "std": 0.014882412739098072, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31561803817749023, |
| "max": 0.6820628046989441, |
| "mean": 0.5529670715332031, |
| "std": 0.04071620851755142, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20640292763710022, |
| "max": 0.2199181616306305, |
| "mean": 3.100156754953787e-05, |
| "std": 0.03830336779356003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13785934448242188, |
| "max": 0.11272227019071579, |
| "mean": 2.0263127225916833e-05, |
| "std": 0.02582014910876751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.4027767777442932, |
| "max": 0.37112095952033997, |
| "mean": 2.6220748623018153e-05, |
| "std": 0.038185179233551025, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7714638710021973, |
| "max": 2.8691656589508057, |
| "mean": 0.0011573480442166328, |
| "std": 0.5169197916984558, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20294718444347382, |
| "max": 0.1975032389163971, |
| "mean": 2.9508448278647847e-05, |
| "std": 0.03430049493908882, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.050956204533576965, |
| "max": 0.04001324996352196, |
| "mean": -0.0004197848029434681, |
| "std": 0.013423827476799488, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.1965385526418686, |
| "max": 0.20179617404937744, |
| "mean": -1.230049292644253e-05, |
| "std": 0.03180824965238571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.1932075023651123, |
| "max": 0.19514988362789154, |
| "mean": -0.002968719694763422, |
| "std": 0.06257235258817673, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.3494449555873871, |
| "max": 1.084139108657837, |
| "mean": 0.6672452688217163, |
| "std": 0.055235255509614944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22517867386341095, |
| "max": 0.2515127956867218, |
| "mean": 0.0003590761625673622, |
| "std": 0.04076584428548813, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09105702489614487, |
| "max": 0.043770160526037216, |
| "mean": -0.030091021209955215, |
| "std": 0.0176088884472847, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.3535248339176178, |
| "max": 0.30410754680633545, |
| "mean": -4.392282062326558e-05, |
| "std": 0.03712813928723335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16202455759048462, |
| "max": 0.06354078650474548, |
| "mean": -8.128902118187398e-05, |
| "std": 0.01940615102648735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34876754879951477, |
| "max": 0.7220309376716614, |
| "mean": 0.5424379706382751, |
| "std": 0.039069268852472305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.2193686068058014, |
| "max": 0.22314214706420898, |
| "mean": -1.1116904715890996e-05, |
| "std": 0.03923606500029564, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11840695887804031, |
| "max": 0.1707676649093628, |
| "mean": 0.00028346438193693757, |
| "std": 0.025122247636318207, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24684838950634003, |
| "max": 0.3010847866535187, |
| "mean": -3.651722363429144e-05, |
| "std": 0.038935575634241104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5055902004241943, |
| "max": 3.715036153793335, |
| "mean": 0.01585192233324051, |
| "std": 0.7825286984443665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21871182322502136, |
| "max": 0.2376304566860199, |
| "mean": -1.361081376671791e-05, |
| "std": 0.03630790859460831, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04719124361872673, |
| "max": 0.05140624940395355, |
| "mean": 0.00048010991304181516, |
| "std": 0.013516944833099842, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21404245495796204, |
| "max": 0.21762129664421082, |
| "mean": 5.64762121939566e-05, |
| "std": 0.03361983224749565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.2114625871181488, |
| "max": 0.231521874666214, |
| "mean": -0.005106819327920675, |
| "std": 0.06188430264592171, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36219048500061035, |
| "max": 1.1013058423995972, |
| "mean": 0.6993670463562012, |
| "std": 0.053603965789079666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23459365963935852, |
| "max": 0.2449057400226593, |
| "mean": 0.00046347593888640404, |
| "std": 0.04127476364374161, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09808015823364258, |
| "max": 0.06838114559650421, |
| "mean": -0.03143930807709694, |
| "std": 0.01812371425330639, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.30170318484306335, |
| "max": 0.3515554368495941, |
| "mean": -8.153638191288337e-05, |
| "std": 0.040280573070049286, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15233194828033447, |
| "max": 0.14967864751815796, |
| "mean": 0.00025540069327689707, |
| "std": 0.023036718368530273, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.99940425157547, |
| "max": 1.0017729997634888, |
| "mean": 1.0002546310424805, |
| "std": 0.0006659556529484689, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.03126639127731323, |
| "max": 0.03126263990998268, |
| "mean": -1.9294351659482345e-05, |
| "std": 0.018044061958789825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031232889741659164, |
| "max": 0.03099249303340912, |
| "mean": -0.001084338640794158, |
| "std": 0.017953665927052498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031263306736946106, |
| "max": 0.031267084181308746, |
| "mean": 3.548895620042458e-06, |
| "std": 0.018044468015432358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.03115880861878395, |
| "max": 0.031179169192910194, |
| "mean": 0.0003339822869747877, |
| "std": 0.018065886572003365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.00013742789451498538, |
| "max": 0.00015863632143009454, |
| "mean": 2.736554449711548e-07, |
| "std": 4.781073585036211e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9996252655982971, |
| "max": 1.0021158456802368, |
| "mean": 1.0004429817199707, |
| "std": 0.0006555348518304527, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.03161333501338959, |
| "max": 0.031580716371536255, |
| "mean": -9.014614079205785e-06, |
| "std": 0.018046868965029716, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.031167982146143913, |
| "max": 0.03145414963364601, |
| "mean": 0.0002899511018767953, |
| "std": 0.01800374686717987, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.00018904745229519904, |
| "max": 0.00019723534933291376, |
| "mean": 1.0521711502065045e-08, |
| "std": 3.849043423542753e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.00014144052693154663, |
| "max": 0.00015886471373960376, |
| "mean": 2.7657870305120014e-07, |
| "std": 4.894055746262893e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.38299599289894104, |
| "max": 0.7195751070976257, |
| "mean": 0.5807684659957886, |
| "std": 0.03886786475777626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23805734515190125, |
| "max": 0.19658388197422028, |
| "mean": 2.6588520995574072e-05, |
| "std": 0.037470221519470215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11865263432264328, |
| "max": 0.16607660055160522, |
| "mean": 0.0009905615588650107, |
| "std": 0.027556024491786957, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.24617764353752136, |
| "max": 0.5007338523864746, |
| "mean": -5.0468875997466967e-05, |
| "std": 0.03762808069586754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.9424328804016113, |
| "max": 3.7695746421813965, |
| "mean": -0.003572134766727686, |
| "std": 0.681464433670044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22736115753650665, |
| "max": 0.2514519989490509, |
| "mean": -1.1535179510246962e-05, |
| "std": 0.037439387291669846, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07172132283449173, |
| "max": 0.08075973391532898, |
| "mean": -0.0005193240358494222, |
| "std": 0.0156661756336689, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.2282123565673828, |
| "max": 0.25804591178894043, |
| "mean": -2.8565638785948977e-05, |
| "std": 0.03542618080973625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20044255256652832, |
| "max": 0.21519678831100464, |
| "mean": -0.005535616539418697, |
| "std": 0.06834741681814194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.40515244007110596, |
| "max": 1.1894633769989014, |
| "mean": 0.7380411624908447, |
| "std": 0.055237166583538055, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.221146821975708, |
| "max": 0.24604949355125427, |
| "mean": 0.0005211484967730939, |
| "std": 0.041342463344335556, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10338832437992096, |
| "max": 0.02417122572660446, |
| "mean": -0.03267121687531471, |
| "std": 0.018886109814047813, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.4494054913520813, |
| "max": 0.4224247634410858, |
| "mean": -0.0004330066149123013, |
| "std": 0.046903740614652634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2513982057571411, |
| "max": 0.47010472416877747, |
| "mean": 0.003200565231963992, |
| "std": 0.04454652965068817, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3171570301055908, |
| "max": 0.33336329460144043, |
| "mean": -2.526402022340335e-05, |
| "std": 0.021290859207510948, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3245790898799896, |
| "max": 0.6854778528213501, |
| "mean": 0.5710608959197998, |
| "std": 0.04472013935446739, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16466441750526428, |
| "max": 0.1739748865365982, |
| "mean": -4.8596641136100516e-05, |
| "std": 0.03318468853831291, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18683482706546783, |
| "max": 0.14287494122982025, |
| "mean": 3.6249548429623246e-05, |
| "std": 0.029692435637116432, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.38059577345848083, |
| "max": 0.24607740342617035, |
| "mean": -9.968647646019235e-06, |
| "std": 0.03276587277650833, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.65606689453125, |
| "max": 3.290353775024414, |
| "mean": -0.01425391435623169, |
| "std": 0.9852582812309265, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23509447276592255, |
| "max": 0.24749873578548431, |
| "mean": -1.7839809515862726e-05, |
| "std": 0.04170282557606697, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07275734841823578, |
| "max": 0.15453355014324188, |
| "mean": 0.0006638452177867293, |
| "std": 0.025170044973492622, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.26656097173690796, |
| "max": 0.24857115745544434, |
| "mean": -1.5359542885562405e-05, |
| "std": 0.040143173187971115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18948662281036377, |
| "max": 0.19466565549373627, |
| "mean": -0.0012274996843189, |
| "std": 0.06669430434703827, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.3292614817619324, |
| "max": 0.9995094537734985, |
| "mean": 0.7192604541778564, |
| "std": 0.05234057828783989, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.2315857857465744, |
| "max": 0.24574460089206696, |
| "mean": 0.00018271194130647928, |
| "std": 0.04090625420212746, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11421883851289749, |
| "max": 0.018689358606934547, |
| "mean": -0.04248232766985893, |
| "std": 0.018854642286896706, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.38993996381759644, |
| "max": 0.4073200523853302, |
| "mean": -2.1967953216517344e-05, |
| "std": 0.04854067787528038, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6932199001312256, |
| "max": 0.4125868082046509, |
| "mean": 0.0008555519161745906, |
| "std": 0.06029324233531952, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0002173546963604167, |
| "max": 1.0001165866851807, |
| "mean": 0.0004882887005805969, |
| "std": 0.0220916960388422, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9994292855262756, |
| "max": 1.0017839670181274, |
| "mean": 1.000253677368164, |
| "std": 0.000652652932330966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03126111254096031, |
| "max": 0.0312650129199028, |
| "mean": -2.1023370209150016e-05, |
| "std": 0.0180354006588459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031219881027936935, |
| "max": 0.031236713752150536, |
| "mean": -0.0006771213375031948, |
| "std": 0.017829909920692444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03126417100429535, |
| "max": 0.03126959502696991, |
| "mean": -8.83279244590085e-06, |
| "std": 0.018034426495432854, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.03123662993311882, |
| "max": 0.03124932385981083, |
| "mean": -0.0007298794225789607, |
| "std": 0.01794484816491604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.00017386232502758503, |
| "max": 0.00014760847261641175, |
| "mean": 3.442557272137492e-06, |
| "std": 5.325600432115607e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9995221495628357, |
| "max": 1.0020443201065063, |
| "mean": 1.0004539489746094, |
| "std": 0.000669351196847856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03147042542695999, |
| "max": 0.03158598765730858, |
| "mean": 5.1154065658920445e-06, |
| "std": 0.018045036122202873, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.03117763064801693, |
| "max": 0.031405530869960785, |
| "mean": 0.00032266404014080763, |
| "std": 0.0180798526853323, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.00019398781296331435, |
| "max": 0.0002045449218712747, |
| "mean": 1.7092556845454965e-06, |
| "std": 3.9782767998985946e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.00017830374417826533, |
| "max": 0.0001471550203859806, |
| "mean": 3.7268218875396997e-06, |
| "std": 5.360128852771595e-05, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23455342650413513, |
| "max": 0.27251818776130676, |
| "mean": 7.011342859186698e-06, |
| "std": 0.018812235444784164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.3213299512863159, |
| "max": 0.6936513781547546, |
| "mean": 0.5816924571990967, |
| "std": 0.045936986804008484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18183718621730804, |
| "max": 0.19770397245883942, |
| "mean": -1.1711626939359121e-05, |
| "std": 0.033187560737133026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16069863736629486, |
| "max": 0.12950360774993896, |
| "mean": -0.001068056095391512, |
| "std": 0.03414401412010193, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.33220145106315613, |
| "max": 0.31142792105674744, |
| "mean": -1.0354739060858265e-05, |
| "std": 0.03223816305398941, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.803721904754639, |
| "max": 8.76359748840332, |
| "mean": 0.09347197413444519, |
| "std": 1.6197658777236938, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23378030955791473, |
| "max": 0.24203070998191833, |
| "mean": 4.133610491408035e-05, |
| "std": 0.0408620685338974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07593037933111191, |
| "max": 0.06580135226249695, |
| "mean": 0.0004787116195075214, |
| "std": 0.019414879381656647, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24592415988445282, |
| "max": 0.2340637594461441, |
| "mean": -2.9871353035559878e-06, |
| "std": 0.03943677991628647, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.1628992110490799, |
| "max": 0.16083794832229614, |
| "mean": 0.001633270876482129, |
| "std": 0.06527844816446304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5569714307785034, |
| "max": 0.9439458250999451, |
| "mean": 0.7129694819450378, |
| "std": 0.04013355076313019, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.2286839783191681, |
| "max": 0.2551024854183197, |
| "mean": -4.545085539575666e-05, |
| "std": 0.04058132320642471, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13476935029029846, |
| "max": 0.02225329726934433, |
| "mean": -0.04135678708553314, |
| "std": 0.018384402617812157, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.42168760299682617, |
| "max": 0.39237409830093384, |
| "mean": -4.401172191137448e-06, |
| "std": 0.04779110476374626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6073517799377441, |
| "max": 0.6513891220092773, |
| "mean": 0.0015880158171057701, |
| "std": 0.05683854594826698, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.2518226206302643, |
| "max": 0.3207785189151764, |
| "mean": -6.094680884416448e-06, |
| "std": 0.019615668803453445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.3598737120628357, |
| "max": 0.6824128031730652, |
| "mean": 0.5707628726959229, |
| "std": 0.0429723858833313, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22058245539665222, |
| "max": 0.1771002560853958, |
| "mean": -3.480628220131621e-05, |
| "std": 0.0343024767935276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16346584260463715, |
| "max": 0.23297329246997833, |
| "mean": 0.000366326654329896, |
| "std": 0.03285832703113556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.2638060748577118, |
| "max": 0.23985332250595093, |
| "mean": -5.253252311376855e-05, |
| "std": 0.033901575952768326, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.8552327156066895, |
| "max": 5.091460227966309, |
| "mean": 0.04388260096311569, |
| "std": 1.2293205261230469, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24656128883361816, |
| "max": 0.2505475580692291, |
| "mean": 7.217615348054096e-05, |
| "std": 0.043992768973112106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.0626230239868164, |
| "max": 0.054548561573028564, |
| "mean": 0.0006508217193186283, |
| "std": 0.017192188650369644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2865524888038635, |
| "max": 0.2719300389289856, |
| "mean": -4.991707464796491e-05, |
| "std": 0.04299106448888779, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.1607704609632492, |
| "max": 0.17038598656654358, |
| "mean": -0.0028860813472419977, |
| "std": 0.05928485840559006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5196844339370728, |
| "max": 0.9328820705413818, |
| "mean": 0.7135865688323975, |
| "std": 0.03841733559966087, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23817408084869385, |
| "max": 0.2493610382080078, |
| "mean": 0.00046480720629915595, |
| "std": 0.04046126455068588, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14443805813789368, |
| "max": 0.04147465527057648, |
| "mean": -0.03969287499785423, |
| "std": 0.020544789731502533, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5328277945518494, |
| "max": 0.5829682350158691, |
| "mean": 6.036185368429869e-06, |
| "std": 0.048868391662836075, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5192180871963501, |
| "max": 0.49342840909957886, |
| "mean": 0.0023608917836099863, |
| "std": 0.05344958230853081, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.2736181318759918, |
| "max": 0.31526556611061096, |
| "mean": 1.8652735889190808e-06, |
| "std": 0.020052799955010414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36623507738113403, |
| "max": 0.7115861177444458, |
| "mean": 0.5932326316833496, |
| "std": 0.045942164957523346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21099260449409485, |
| "max": 0.19959695637226105, |
| "mean": 3.07829977828078e-05, |
| "std": 0.034868910908699036, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18723583221435547, |
| "max": 0.20388372242450714, |
| "mean": 0.000956192088779062, |
| "std": 0.031518690288066864, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.28975075483322144, |
| "max": 0.3398789167404175, |
| "mean": -4.732892557512969e-05, |
| "std": 0.034589968621730804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.877439260482788, |
| "max": 3.3875346183776855, |
| "mean": 0.014458952471613884, |
| "std": 0.858471155166626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22435642778873444, |
| "max": 0.249828040599823, |
| "mean": -4.0124336919689085e-06, |
| "std": 0.04223557561635971, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05512487143278122, |
| "max": 0.046701643615961075, |
| "mean": -1.9162820535711944e-05, |
| "std": 0.015846921131014824, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.29301708936691284, |
| "max": 0.29095572233200073, |
| "mean": -7.334054771490628e-06, |
| "std": 0.04195055365562439, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12482845038175583, |
| "max": 0.25941941142082214, |
| "mean": -0.003237831173464656, |
| "std": 0.05315971001982689, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4561736285686493, |
| "max": 0.8445789813995361, |
| "mean": 0.7056531310081482, |
| "std": 0.035228051245212555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5114014148712158, |
| "max": 0.348456472158432, |
| "mean": 0.00034256701474078, |
| "std": 0.04020610451698303, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18698255717754364, |
| "max": 0.03949001431465149, |
| "mean": -0.03939007595181465, |
| "std": 0.0213507991284132, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.544358491897583, |
| "max": 0.5564395785331726, |
| "mean": -7.145745621528476e-05, |
| "std": 0.05074309930205345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5117879509925842, |
| "max": 0.6644083857536316, |
| "mean": 0.002445152960717678, |
| "std": 0.04953145608305931, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.33249062299728394, |
| "max": 0.2656247019767761, |
| "mean": 3.6327573980088346e-06, |
| "std": 0.019390461966395378, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.3221387565135956, |
| "max": 0.7663495540618896, |
| "mean": 0.651084840297699, |
| "std": 0.04530828446149826, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.24955259263515472, |
| "max": 0.21952223777770996, |
| "mean": -2.4627406673971564e-06, |
| "std": 0.0365021638572216, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32713782787323, |
| "max": 0.2872367203235626, |
| "mean": -0.0006778471870347857, |
| "std": 0.03855384141206741, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.31010347604751587, |
| "max": 0.36993831396102905, |
| "mean": 6.482718890765682e-05, |
| "std": 0.036242760717868805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.71769905090332, |
| "max": 5.807940483093262, |
| "mean": 0.03795948997139931, |
| "std": 1.4132622480392456, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.2217160314321518, |
| "max": 0.20588469505310059, |
| "mean": -7.503203232772648e-05, |
| "std": 0.04249139502644539, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07754088938236237, |
| "max": 0.051487792283296585, |
| "mean": -0.0009253682801499963, |
| "std": 0.016408486291766167, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.3308248519897461, |
| "max": 0.32916712760925293, |
| "mean": -4.993749826098792e-06, |
| "std": 0.042798057198524475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2850324213504791, |
| "max": 0.1117776408791542, |
| "mean": -0.0012074043042957783, |
| "std": 0.047010280191898346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4863123297691345, |
| "max": 0.8869433403015137, |
| "mean": 0.7375507354736328, |
| "std": 0.03823651745915413, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.36125612258911133, |
| "max": 0.27433156967163086, |
| "mean": 5.119972047396004e-05, |
| "std": 0.04065272584557533, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.2477303296327591, |
| "max": 0.04647788032889366, |
| "mean": -0.03926857188344002, |
| "std": 0.023257533088326454, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6263415217399597, |
| "max": 0.5970607399940491, |
| "mean": -6.0351769207045436e-05, |
| "std": 0.05312627553939819, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.709812343120575, |
| "max": 0.2658604085445404, |
| "mean": 0.0009171634446829557, |
| "std": 0.051236364990472794, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3433721363544464, |
| "max": 0.30349576473236084, |
| "mean": 1.867878154371283e-07, |
| "std": 0.019139809533953667, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34990525245666504, |
| "max": 0.7829033136367798, |
| "mean": 0.6388983726501465, |
| "std": 0.04923005402088165, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20573130249977112, |
| "max": 0.2069031298160553, |
| "mean": -5.999910717946477e-05, |
| "std": 0.037698354572057724, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25860944390296936, |
| "max": 0.2683144211769104, |
| "mean": -0.00040654174517840147, |
| "std": 0.04462500661611557, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3541562557220459, |
| "max": 0.3225262761116028, |
| "mean": -7.357165486610029e-06, |
| "std": 0.03720669820904732, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.261901378631592, |
| "max": 4.204929351806641, |
| "mean": -0.026422729715704918, |
| "std": 1.0068349838256836, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23875762522220612, |
| "max": 0.24374397099018097, |
| "mean": -2.557489278842695e-05, |
| "std": 0.04321581870317459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06234561279416084, |
| "max": 0.05673680081963539, |
| "mean": 0.00034723637509159744, |
| "std": 0.01415068656206131, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.4374503195285797, |
| "max": 0.37361523509025574, |
| "mean": 1.4507659216178581e-05, |
| "std": 0.044127773493528366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09634225070476532, |
| "max": 0.17621064186096191, |
| "mean": -0.0006586947711184621, |
| "std": 0.035146258771419525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.421725332736969, |
| "max": 1.0694254636764526, |
| "mean": 0.7485451698303223, |
| "std": 0.04206714406609535, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2659734785556793, |
| "max": 0.2969002425670624, |
| "mean": -7.885815284680575e-05, |
| "std": 0.04081321880221367, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18494504690170288, |
| "max": 0.043268244713544846, |
| "mean": -0.03681334853172302, |
| "std": 0.025581398978829384, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4577294886112213, |
| "max": 0.4868638217449188, |
| "mean": 4.411918780533597e-05, |
| "std": 0.054221056401729584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.286346971988678, |
| "max": 0.5518361330032349, |
| "mean": -0.0008815097389742732, |
| "std": 0.04783621430397034, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.29267972707748413, |
| "max": 0.3227570652961731, |
| "mean": 6.020641194481868e-06, |
| "std": 0.019972950220108032, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2912514805793762, |
| "max": 0.7601991891860962, |
| "mean": 0.6508588194847107, |
| "std": 0.05212089791893959, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.2437000423669815, |
| "max": 0.26162612438201904, |
| "mean": -5.554972631216515e-06, |
| "std": 0.039614368230104446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2675025463104248, |
| "max": 0.20013028383255005, |
| "mean": -0.0008774266461841762, |
| "std": 0.05176888778805733, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.27221566438674927, |
| "max": 0.25374382734298706, |
| "mean": 5.006398168916348e-06, |
| "std": 0.03871097415685654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.966026306152344, |
| "max": 15.947824478149414, |
| "mean": 0.03323008120059967, |
| "std": 1.989342451095581, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20656642317771912, |
| "max": 0.22588562965393066, |
| "mean": -7.24760175216943e-05, |
| "std": 0.040559086948633194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06937043368816376, |
| "max": 0.06317680329084396, |
| "mean": 0.000156470196088776, |
| "std": 0.014745255932211876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46550098061561584, |
| "max": 0.32025203108787537, |
| "mean": 1.966371200978756e-05, |
| "std": 0.04059458151459694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06405901163816452, |
| "max": 0.11548515409231186, |
| "mean": 0.0011954698711633682, |
| "std": 0.024709828197956085, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.37493425607681274, |
| "max": 0.9319035410881042, |
| "mean": 0.7510924339294434, |
| "std": 0.0401909314095974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.27919864654541016, |
| "max": 0.273176908493042, |
| "mean": -0.0001684028684394434, |
| "std": 0.041004277765750885, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19848693907260895, |
| "max": 0.05126062035560608, |
| "mean": -0.032024383544921875, |
| "std": 0.025078732520341873, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6584433317184448, |
| "max": 0.5357221961021423, |
| "mean": -4.880438791587949e-05, |
| "std": 0.05285734310746193, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19274669885635376, |
| "max": 0.5823217630386353, |
| "mean": -0.0005133696831762791, |
| "std": 0.041087545454502106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.4175601005554199, |
| "max": 0.37188875675201416, |
| "mean": 6.479064722952899e-06, |
| "std": 0.021628154441714287, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.2145100235939026, |
| "max": 0.7467755675315857, |
| "mean": 0.6495225429534912, |
| "std": 0.054342612624168396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20954373478889465, |
| "max": 0.19555190205574036, |
| "mean": 4.0139111661119387e-05, |
| "std": 0.03946155682206154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.32948848605155945, |
| "max": 0.2595402002334595, |
| "mean": -0.0032335962168872356, |
| "std": 0.05627242103219032, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.2058991640806198, |
| "max": 0.2547155022621155, |
| "mean": 5.40805995115079e-05, |
| "std": 0.03856402263045311, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.243993759155273, |
| "max": 6.932845115661621, |
| "mean": 0.048340216279029846, |
| "std": 1.385199785232544, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20978908240795135, |
| "max": 0.23056426644325256, |
| "mean": -4.742521468870109e-06, |
| "std": 0.04131828248500824, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04378769174218178, |
| "max": 0.0359850712120533, |
| "mean": -6.261238013394177e-06, |
| "std": 0.012797025963664055, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.39764100313186646, |
| "max": 0.34504374861717224, |
| "mean": -5.53192148800008e-05, |
| "std": 0.0423952080309391, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.05508939549326897, |
| "max": 0.06280933320522308, |
| "mean": 0.0003585501981433481, |
| "std": 0.018675601109862328, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3507746756076813, |
| "max": 1.0452601909637451, |
| "mean": 0.7896535992622375, |
| "std": 0.04874108359217644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3336845338344574, |
| "max": 0.38642778992652893, |
| "mean": -0.00016908602265175432, |
| "std": 0.041490186005830765, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.1574612259864807, |
| "max": 0.05922037363052368, |
| "mean": -0.03182276338338852, |
| "std": 0.025103161111474037, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6963140964508057, |
| "max": 0.46921107172966003, |
| "mean": -8.656673162477091e-05, |
| "std": 0.05180606618523598, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24794527888298035, |
| "max": 0.3287939429283142, |
| "mean": -0.00025959889171645045, |
| "std": 0.04145469143986702, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.28705933690071106, |
| "max": 0.3503926694393158, |
| "mean": -2.8700230814138195e-06, |
| "std": 0.024241898208856583, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19675415754318237, |
| "max": 0.7791337370872498, |
| "mean": 0.6702517867088318, |
| "std": 0.05866968631744385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22908443212509155, |
| "max": 0.2313445806503296, |
| "mean": -2.062591738649644e-05, |
| "std": 0.040440406650304794, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22002485394477844, |
| "max": 0.24098847806453705, |
| "mean": 0.00078444869723171, |
| "std": 0.0558483712375164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21667493879795074, |
| "max": 0.22645404934883118, |
| "mean": -7.211311458377168e-05, |
| "std": 0.03937484323978424, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.906242370605469, |
| "max": 9.069114685058594, |
| "mean": -0.0012534279376268387, |
| "std": 1.8484383821487427, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2695206105709076, |
| "max": 0.2589607834815979, |
| "mean": 4.368612644611858e-05, |
| "std": 0.03841120004653931, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05792244151234627, |
| "max": 0.05800376832485199, |
| "mean": 0.0003531992551870644, |
| "std": 0.014716269448399544, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.2641335129737854, |
| "max": 0.2883334755897522, |
| "mean": -6.170988490339369e-05, |
| "std": 0.03907797113060951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.043938618153333664, |
| "max": 0.037385016679763794, |
| "mean": -9.84332655207254e-05, |
| "std": 0.013347743079066277, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.3393842577934265, |
| "max": 1.0925544500350952, |
| "mean": 0.8639589548110962, |
| "std": 0.0638754740357399, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.4231780469417572, |
| "max": 0.41907352209091187, |
| "mean": 0.0003135594888590276, |
| "std": 0.04351302981376648, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21478679776191711, |
| "max": 0.1706700474023819, |
| "mean": -0.02944377437233925, |
| "std": 0.03187936916947365, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5987504720687866, |
| "max": 0.5598719120025635, |
| "mean": -0.00014867217396385968, |
| "std": 0.05346066504716873, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17880699038505554, |
| "max": 0.37724727392196655, |
| "mean": 0.0013524596579372883, |
| "std": 0.037310197949409485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.39442750811576843, |
| "max": 0.3689110279083252, |
| "mean": 3.764010398299433e-05, |
| "std": 0.028617940843105316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.29055094718933105, |
| "max": 0.8275657296180725, |
| "mean": 0.7055599689483643, |
| "std": 0.06785259395837784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9265406131744385, |
| "max": 1.0269172191619873, |
| "mean": -2.7786163627752103e-05, |
| "std": 0.04764207825064659, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8793070316314697, |
| "max": 0.8158283829689026, |
| "mean": -0.0003010375367011875, |
| "std": 0.09555298835039139, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.26992541551589966, |
| "max": 0.24092742800712585, |
| "mean": -2.246434632979799e-05, |
| "std": 0.03895093873143196, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.743555068969727, |
| "max": 22.852014541625977, |
| "mean": -0.09188304841518402, |
| "std": 4.070625305175781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22777004539966583, |
| "max": 0.2455480843782425, |
| "mean": -2.5490313419140875e-05, |
| "std": 0.03864210844039917, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.060185808688402176, |
| "max": 0.04548603296279907, |
| "mean": -0.00013778329594060779, |
| "std": 0.014688468538224697, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.33804869651794434, |
| "max": 0.3748103082180023, |
| "mean": 7.576927600894123e-06, |
| "std": 0.04082098975777626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.046251166611909866, |
| "max": 0.19543442130088806, |
| "mean": 0.00027753060567192733, |
| "std": 0.013553835451602936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.37363529205322266, |
| "max": 1.1304537057876587, |
| "mean": 0.8902342319488525, |
| "std": 0.06401188671588898, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.44750913977622986, |
| "max": 0.5426135659217834, |
| "mean": 2.5048013412742876e-05, |
| "std": 0.0455806739628315, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22384138405323029, |
| "max": 0.08764129132032394, |
| "mean": -0.03201291710138321, |
| "std": 0.03774724155664444, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7260749936103821, |
| "max": 0.688654899597168, |
| "mean": 3.5635155654745176e-05, |
| "std": 0.051793280988931656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.17447420954704285, |
| "max": 0.21816052496433258, |
| "mean": 3.443963942117989e-05, |
| "std": 0.03176717460155487, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.33968257904052734, |
| "max": 0.3729552924633026, |
| "mean": 4.328345676185563e-05, |
| "std": 0.034136127680540085, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3178211450576782, |
| "max": 1.2872322797775269, |
| "mean": 0.6015591025352478, |
| "std": 0.08348726481199265, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.28302425146102905, |
| "max": 0.26023271679878235, |
| "mean": -2.7253747703070985e-06, |
| "std": 0.0359804667532444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23563744127750397, |
| "max": 0.20571035146713257, |
| "mean": 0.00023820970091037452, |
| "std": 0.056028686463832855, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.43542271852493286, |
| "max": 0.3249562382698059, |
| "mean": 2.4268334527732804e-05, |
| "std": 0.034124359488487244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.546493053436279, |
| "max": 7.314059257507324, |
| "mean": -0.007369840517640114, |
| "std": 0.6993855834007263, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.34410950541496277, |
| "max": 0.36279547214508057, |
| "mean": 0.0001030894200084731, |
| "std": 0.04783707857131958, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07371430099010468, |
| "max": 0.060424793511629105, |
| "mean": 0.0009352926863357425, |
| "std": 0.01493847742676735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2562869191169739, |
| "max": 0.2867131233215332, |
| "mean": 4.736550181405619e-06, |
| "std": 0.04156505689024925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.0553305447101593, |
| "max": 0.06281695514917374, |
| "mean": 0.00012849000631831586, |
| "std": 0.007162065710872412, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49391981959342957, |
| "max": 1.220736026763916, |
| "mean": 1.0135732889175415, |
| "std": 0.11749263107776642, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0939218997955322, |
| "max": 1.0474658012390137, |
| "mean": -4.883138171862811e-05, |
| "std": 0.05241798609495163, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.223901629447937, |
| "max": 0.17314252257347107, |
| "mean": -0.027228916063904762, |
| "std": 0.03630804270505905, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8840344548225403, |
| "max": 0.9224310517311096, |
| "mean": -0.00014670705422759056, |
| "std": 0.053297851234674454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17102152109146118, |
| "max": 0.3797409236431122, |
| "mean": 0.003368864767253399, |
| "std": 0.0398765504360199, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7776780724525452, |
| "max": 0.7227001190185547, |
| "mean": 1.787853761925362e-05, |
| "std": 0.04615465924143791, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3386647403240204, |
| "max": 1.4281901121139526, |
| "mean": 0.9484964609146118, |
| "std": 0.20680245757102966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.745869517326355, |
| "max": 1.7045400142669678, |
| "mean": 0.00022709640325047076, |
| "std": 0.15870508551597595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.1994972229003906, |
| "max": 1.1010137796401978, |
| "mean": -0.009549295529723167, |
| "std": 0.20389875769615173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4210166335105896, |
| "max": 0.4279645085334778, |
| "mean": 6.39720747130923e-05, |
| "std": 0.04802015796303749, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.747936248779297, |
| "max": 19.543052673339844, |
| "mean": -0.24834343791007996, |
| "std": 4.777070999145508, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.3238843083381653, |
| "max": 0.4385298192501068, |
| "mean": -1.1759563676605467e-05, |
| "std": 0.04616716504096985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03387872874736786, |
| "max": 0.036932073533535004, |
| "mean": 0.0006410478381440043, |
| "std": 0.01291597355157137, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7035592198371887, |
| "max": 0.6685189604759216, |
| "mean": 4.281650763005018e-05, |
| "std": 0.05789238214492798, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07232622057199478, |
| "max": 0.06769084185361862, |
| "mean": -0.00013414367276709527, |
| "std": 0.012906934134662151, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.3805098831653595, |
| "max": 1.3928314447402954, |
| "mean": 1.0667389631271362, |
| "std": 0.21977593004703522, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6165266633033752, |
| "max": 0.7183749079704285, |
| "mean": 0.00011245780478930101, |
| "std": 0.05802787095308304, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21882832050323486, |
| "max": 0.2250150591135025, |
| "mean": 0.006199384108185768, |
| "std": 0.049713458865880966, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6297744512557983, |
| "max": 0.8895941972732544, |
| "mean": 1.2031738151563331e-05, |
| "std": 0.023544643074274063, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.506857693195343, |
| "max": 0.47375017404556274, |
| "mean": -0.003018573159351945, |
| "std": 0.06925369799137115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5381409525871277, |
| "max": 1.1801701784133911, |
| "mean": 0.7828266620635986, |
| "std": 0.09875727444887161, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2670648992061615, |
| "max": 0.21295404434204102, |
| "mean": -0.0002240903995698318, |
| "std": 0.054007235914468765, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23832593858242035, |
| "max": 0.014832383021712303, |
| "mean": -0.043932899832725525, |
| "std": 0.03429204970598221, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |