| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43014463782310486, |
| "max": 0.2980782687664032, |
| "mean": -0.002543725073337555, |
| "std": 0.04256265610456467, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.0628998726606369, |
| "max": 0.1072736531496048, |
| "mean": 0.0006290247547440231, |
| "std": 0.034041259437799454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.41270536184310913, |
| "max": 0.8369129300117493, |
| "mean": -0.00020170127390883863, |
| "std": 0.024111710488796234, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11501855403184891, |
| "max": 0.3208469748497009, |
| "mean": -0.0009418133413419127, |
| "std": 0.019536493346095085, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7886247634887695, |
| "max": 2.8676700592041016, |
| "mean": -0.0003673351602628827, |
| "std": 0.6154847145080566, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.27889013290405273, |
| "max": 0.38151732087135315, |
| "mean": 0.0004236791573930532, |
| "std": 0.04274853691458702, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.2219879925251007, |
| "max": 0.2091645449399948, |
| "mean": -0.004480332136154175, |
| "std": 0.040872007608413696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.42831405997276306, |
| "max": 0.47610175609588623, |
| "mean": 3.7659003737644525e-06, |
| "std": 0.024510981515049934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.3244315981864929, |
| "max": 0.15647757053375244, |
| "mean": -0.046661682426929474, |
| "std": 0.05150889977812767, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4104415476322174, |
| "max": 0.3546721041202545, |
| "mean": -0.00013054230657871813, |
| "std": 0.02360478602349758, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.22924789786338806, |
| "max": 0.2620227038860321, |
| "mean": -0.029105938971042633, |
| "std": 0.04928705468773842, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2546031177043915, |
| "max": 0.8185229301452637, |
| "mean": 0.5252923965454102, |
| "std": 0.08049347996711731, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.296941339969635, |
| "max": 0.2655627429485321, |
| "mean": -0.0004258690751157701, |
| "std": 0.03210259974002838, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09266690164804459, |
| "max": 0.12469176203012466, |
| "mean": 0.0006477286806330085, |
| "std": 0.025720255449414253, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.2905982434749603, |
| "max": 0.28104421496391296, |
| "mean": -7.510318391723558e-05, |
| "std": 0.03093179315328598, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.890929698944092, |
| "max": 5.805842876434326, |
| "mean": -0.009318170137703419, |
| "std": 1.2943130731582642, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.42498156428337097, |
| "max": 0.3436700105667114, |
| "mean": 9.804974979488179e-05, |
| "std": 0.029953550547361374, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.029002565890550613, |
| "max": 0.027599314227700233, |
| "mean": -0.0003237572673242539, |
| "std": 0.01257046777755022, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.45393431186676025, |
| "max": 0.44807320833206177, |
| "mean": 2.389570181549061e-05, |
| "std": 0.023853935301303864, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.0885927751660347, |
| "max": 0.09089276939630508, |
| "mean": 0.0022863608319312334, |
| "std": 0.019503755494952202, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.2667909264564514, |
| "max": 1.0541586875915527, |
| "mean": 0.5309650301933289, |
| "std": 0.10402658581733704, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5743634104728699, |
| "max": 0.6081749796867371, |
| "mean": -0.0004296167753636837, |
| "std": 0.03860084339976311, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18247899413108826, |
| "max": 0.04562002047896385, |
| "mean": -0.029428046196699142, |
| "std": 0.04256246238946915, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1666945219039917, |
| "max": 1.633580207824707, |
| "mean": 0.00032344614737667143, |
| "std": 0.027696726843714714, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16206279397010803, |
| "max": 0.20534056425094604, |
| "mean": -0.02111881598830223, |
| "std": 0.027917111292481422, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22404542565345764, |
| "max": 0.8422443866729736, |
| "mean": 0.4874877631664276, |
| "std": 0.07493799924850464, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.255166620016098, |
| "max": 0.305690199136734, |
| "mean": -6.7684013629332185e-06, |
| "std": 0.03347513824701309, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09524397552013397, |
| "max": 0.11034096777439117, |
| "mean": 6.5918720792979e-05, |
| "std": 0.026950189843773842, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.29684391617774963, |
| "max": 0.295682817697525, |
| "mean": 5.335842797649093e-05, |
| "std": 0.03254625201225281, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.156938552856445, |
| "max": 5.0772905349731445, |
| "mean": -0.014555896632373333, |
| "std": 1.1561553478240967, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.3448536694049835, |
| "max": 0.34325698018074036, |
| "mean": 7.860038749640808e-05, |
| "std": 0.0300619974732399, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03601115196943283, |
| "max": 0.03331650421023369, |
| "mean": -0.0001408920797985047, |
| "std": 0.013034623116254807, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.31532466411590576, |
| "max": 0.3747538924217224, |
| "mean": -2.0682646209024824e-05, |
| "std": 0.024059493094682693, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10526668280363083, |
| "max": 0.12198653072118759, |
| "mean": -0.001968209631741047, |
| "std": 0.0288400761783123, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.3114672601222992, |
| "max": 1.1185976266860962, |
| "mean": 0.6660763025283813, |
| "std": 0.09736555069684982, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.872668981552124, |
| "max": 0.6275054216384888, |
| "mean": 0.0016755885444581509, |
| "std": 0.04743882641196251, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.2710971236228943, |
| "max": 0.03426326811313629, |
| "mean": -0.0465819425880909, |
| "std": 0.04054969921708107, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.922234833240509, |
| "max": 0.9643772840499878, |
| "mean": 0.0010214494541287422, |
| "std": 0.04070669412612915, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14429129660129547, |
| "max": 0.07484762370586395, |
| "mean": -0.00908473040908575, |
| "std": 0.025672495365142822, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.2402428686618805, |
| "max": 0.711609423160553, |
| "mean": 0.44710344076156616, |
| "std": 0.05906940996646881, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.27207210659980774, |
| "max": 0.29753801226615906, |
| "mean": 9.350538675789721e-06, |
| "std": 0.035469669848680496, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11918215453624725, |
| "max": 0.1183757483959198, |
| "mean": 0.0007599537493661046, |
| "std": 0.027609599754214287, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2805421054363251, |
| "max": 0.2793859839439392, |
| "mean": -7.715764513704926e-05, |
| "std": 0.035099178552627563, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.506035566329956, |
| "max": 2.518012046813965, |
| "mean": 0.026713747531175613, |
| "std": 0.5862806439399719, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.22091814875602722, |
| "max": 0.27132153511047363, |
| "mean": 2.8913418645970523e-06, |
| "std": 0.0307327788323164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03352135419845581, |
| "max": 0.03120853193104267, |
| "mean": 0.00011218251165701076, |
| "std": 0.012406233698129654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.2351619005203247, |
| "max": 0.23147742450237274, |
| "mean": 5.6937635235954076e-05, |
| "std": 0.0256962887942791, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.1356453150510788, |
| "max": 0.1271977722644806, |
| "mean": -0.005494291428476572, |
| "std": 0.0399438738822937, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3544028699398041, |
| "max": 1.1697261333465576, |
| "mean": 0.7103750109672546, |
| "std": 0.10338432341814041, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6172477006912231, |
| "max": 0.5542004108428955, |
| "mean": 0.001160221640020609, |
| "std": 0.046119727194309235, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.18825410306453705, |
| "max": 0.024966172873973846, |
| "mean": -0.03482227772474289, |
| "std": 0.02857418917119503, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.130850911140442, |
| "max": 0.9707417488098145, |
| "mean": 0.0003595067828428, |
| "std": 0.042347487062215805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5971466898918152, |
| "max": 0.06270916759967804, |
| "mean": -0.004877141211181879, |
| "std": 0.02859053947031498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3752330243587494, |
| "max": 0.9386839866638184, |
| "mean": 0.5923458337783813, |
| "std": 0.06656130403280258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3911682367324829, |
| "max": 0.3688437342643738, |
| "mean": 7.119165093172342e-05, |
| "std": 0.037188753485679626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11875540018081665, |
| "max": 0.13628698885440826, |
| "mean": 0.0009287752327509224, |
| "std": 0.029227793216705322, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6185974478721619, |
| "max": 0.5083587169647217, |
| "mean": 1.5249222997226752e-05, |
| "std": 0.036442261189222336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.17552661895752, |
| "max": 8.776671409606934, |
| "mean": -0.1091664582490921, |
| "std": 1.6969325542449951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.27638494968414307, |
| "max": 0.23973813652992249, |
| "mean": 5.3197330998955294e-05, |
| "std": 0.03261549770832062, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.051992662250995636, |
| "max": 0.03946495056152344, |
| "mean": 9.150505502475426e-05, |
| "std": 0.012954742647707462, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23067787289619446, |
| "max": 0.23443163931369781, |
| "mean": -2.1657757315551862e-05, |
| "std": 0.029391853138804436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20401200652122498, |
| "max": 0.10544212907552719, |
| "mean": -0.004023304674774408, |
| "std": 0.0326065756380558, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.33983615040779114, |
| "max": 1.0106816291809082, |
| "mean": 0.7006407380104065, |
| "std": 0.09645594656467438, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5642791390419006, |
| "max": 0.832179069519043, |
| "mean": 0.00041513508767820895, |
| "std": 0.042302437126636505, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21134838461875916, |
| "max": 0.030589817091822624, |
| "mean": -0.032172758132219315, |
| "std": 0.026476319879293442, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7536408305168152, |
| "max": 0.717832088470459, |
| "mean": -9.409409358340781e-06, |
| "std": 0.03684220835566521, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.2631220519542694, |
| "max": 0.10570736974477768, |
| "mean": -0.003029324347153306, |
| "std": 0.028848078101873398, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28446710109710693, |
| "max": 0.6937389373779297, |
| "mean": 0.49939653277397156, |
| "std": 0.04629269987344742, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27887189388275146, |
| "max": 0.23408503830432892, |
| "mean": -0.00011133109364891425, |
| "std": 0.03876320272684097, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15426576137542725, |
| "max": 0.1266399770975113, |
| "mean": -0.0022300498094409704, |
| "std": 0.0333842970430851, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41348376870155334, |
| "max": 0.6593844294548035, |
| "mean": -1.978595719265286e-05, |
| "std": 0.039100244641304016, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.232041358947754, |
| "max": 4.715827465057373, |
| "mean": -0.020488303154706955, |
| "std": 1.0068391561508179, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24481239914894104, |
| "max": 0.2074868232011795, |
| "mean": 4.380439349915832e-05, |
| "std": 0.03396626561880112, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03449943661689758, |
| "max": 0.044728994369506836, |
| "mean": -1.8020247807726264e-05, |
| "std": 0.012624197639524937, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20050014555454254, |
| "max": 0.20566238462924957, |
| "mean": -2.9678063583560288e-05, |
| "std": 0.03102380409836769, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.19964830577373505, |
| "max": 0.11326169967651367, |
| "mean": -0.00291792256757617, |
| "std": 0.03448895364999771, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.36708179116249084, |
| "max": 1.0548574924468994, |
| "mean": 0.6704699397087097, |
| "std": 0.06616173684597015, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.397816002368927, |
| "max": 0.5021188855171204, |
| "mean": -3.856579860439524e-05, |
| "std": 0.041137274354696274, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12784262001514435, |
| "max": 0.02675941213965416, |
| "mean": -0.030531462281942368, |
| "std": 0.02184327319264412, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.4485797882080078, |
| "max": 0.43235480785369873, |
| "mean": 8.378911297768354e-05, |
| "std": 0.034896139055490494, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.26721277832984924, |
| "max": 0.07248232513666153, |
| "mean": -0.0011095060035586357, |
| "std": 0.023109637200832367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.287344753742218, |
| "max": 0.6839542388916016, |
| "mean": 0.5244242548942566, |
| "std": 0.047291453927755356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22201856970787048, |
| "max": 0.22311273217201233, |
| "mean": 1.577789407747332e-05, |
| "std": 0.038952890783548355, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13627174496650696, |
| "max": 0.1090594157576561, |
| "mean": 0.00023713918926659971, |
| "std": 0.029215561226010323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.3747805953025818, |
| "max": 0.43678468465805054, |
| "mean": -9.573410352459177e-06, |
| "std": 0.03928905352950096, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.840266227722168, |
| "max": 4.992228984832764, |
| "mean": 0.009751387871801853, |
| "std": 0.8444771766662598, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22314536571502686, |
| "max": 0.21986283361911774, |
| "mean": -2.0974857761757448e-07, |
| "std": 0.034413520246744156, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.043581560254096985, |
| "max": 0.03578736633062363, |
| "mean": -0.00025875651044771075, |
| "std": 0.012076529674232006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21286383271217346, |
| "max": 0.18843913078308105, |
| "mean": -1.6783855244284496e-05, |
| "std": 0.03154028207063675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18049854040145874, |
| "max": 0.12063688784837723, |
| "mean": -0.0024107899516820908, |
| "std": 0.04124762490391731, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4223836064338684, |
| "max": 0.9401367902755737, |
| "mean": 0.6626168489456177, |
| "std": 0.05654710531234741, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.3711914122104645, |
| "max": 0.4754900634288788, |
| "mean": -8.231064566643909e-05, |
| "std": 0.04089626669883728, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.2078404426574707, |
| "max": 0.02713177166879177, |
| "mean": -0.030231105163693428, |
| "std": 0.021318932995200157, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3397354185581207, |
| "max": 0.7327741384506226, |
| "mean": 8.48791969474405e-05, |
| "std": 0.03477150574326515, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.23985552787780762, |
| "max": 0.050368692725896835, |
| "mean": -0.0011948456522077322, |
| "std": 0.02045026607811451, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3060871660709381, |
| "max": 0.6523372530937195, |
| "mean": 0.5249941945075989, |
| "std": 0.04590437561273575, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30396750569343567, |
| "max": 0.2171545922756195, |
| "mean": 7.000747427809983e-05, |
| "std": 0.03949857875704765, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14921154081821442, |
| "max": 0.1312280148267746, |
| "mean": 0.00034826344926841557, |
| "std": 0.030445020645856857, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.2569451630115509, |
| "max": 0.20191657543182373, |
| "mean": 3.105865835095756e-05, |
| "std": 0.03948771581053734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.332984685897827, |
| "max": 2.372544527053833, |
| "mean": -0.026222502812743187, |
| "std": 0.44942858815193176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.1888340413570404, |
| "max": 0.21024198830127716, |
| "mean": 3.7197845813352615e-05, |
| "std": 0.03479824960231781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.031675707548856735, |
| "max": 0.035443130880594254, |
| "mean": -0.00020022659737151116, |
| "std": 0.012285580858588219, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18818390369415283, |
| "max": 0.17026524245738983, |
| "mean": -6.799850234529004e-05, |
| "std": 0.032174814492464066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13918116688728333, |
| "max": 0.13709498941898346, |
| "mean": -0.0025172303430736065, |
| "std": 0.05128452926874161, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4672186076641083, |
| "max": 0.9546743631362915, |
| "mean": 0.6688124537467957, |
| "std": 0.05250026285648346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.32424914836883545, |
| "max": 0.3096342980861664, |
| "mean": -1.5644945960957557e-06, |
| "std": 0.04095214605331421, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12461961060762405, |
| "max": 0.02530832216143608, |
| "mean": -0.03069971315562725, |
| "std": 0.019789544865489006, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.43944308161735535, |
| "max": 0.4446093440055847, |
| "mean": 9.534660784993321e-05, |
| "std": 0.035124197602272034, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22425536811351776, |
| "max": 0.051573775708675385, |
| "mean": -0.001182063017040491, |
| "std": 0.018455415964126587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.3393731713294983, |
| "max": 0.737841010093689, |
| "mean": 0.5586089491844177, |
| "std": 0.04119626432657242, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.2723452150821686, |
| "max": 0.2782283425331116, |
| "mean": 1.9915583834517747e-05, |
| "std": 0.04106247052550316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13683027029037476, |
| "max": 0.1396752893924713, |
| "mean": 0.0004885591333732009, |
| "std": 0.026614630594849586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.49012690782546997, |
| "max": 0.35547417402267456, |
| "mean": 8.882825932232663e-05, |
| "std": 0.04070047289133072, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.2938547134399414, |
| "max": 1.7426533699035645, |
| "mean": -0.021057037636637688, |
| "std": 0.49975258111953735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.21735826134681702, |
| "max": 0.19773884117603302, |
| "mean": -4.063967935508117e-05, |
| "std": 0.03423747047781944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.041265569627285004, |
| "max": 0.03861430287361145, |
| "mean": -0.00014519633259624243, |
| "std": 0.012876993976533413, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17728237807750702, |
| "max": 0.18350861966609955, |
| "mean": 4.7603076382074505e-05, |
| "std": 0.031560394912958145, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.1796274185180664, |
| "max": 0.18359197676181793, |
| "mean": -0.0022178757935762405, |
| "std": 0.05480958893895149, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.47430306673049927, |
| "max": 1.0235347747802734, |
| "mean": 0.645234227180481, |
| "std": 0.05006485432386398, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.2717384696006775, |
| "max": 0.3092706799507141, |
| "mean": 0.0001124507180065848, |
| "std": 0.04068849980831146, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10565188527107239, |
| "max": 0.026852920651435852, |
| "mean": -0.029502389952540398, |
| "std": 0.017905903980135918, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.33881059288978577, |
| "max": 0.3287763297557831, |
| "mean": 5.716992018278688e-05, |
| "std": 0.03441813588142395, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.1814029961824417, |
| "max": 0.04198184236884117, |
| "mean": -0.0010715797543525696, |
| "std": 0.017202889546751976, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.32546839118003845, |
| "max": 0.6852879524230957, |
| "mean": 0.5111152529716492, |
| "std": 0.036710962653160095, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23360855877399445, |
| "max": 0.22551532089710236, |
| "mean": -3.5930093872593716e-05, |
| "std": 0.039181701838970184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11516069620847702, |
| "max": 0.13141536712646484, |
| "mean": 0.00015141721814870834, |
| "std": 0.02916705049574375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.3523465394973755, |
| "max": 0.2849816083908081, |
| "mean": 7.249596819747239e-06, |
| "std": 0.039250195026397705, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.126643180847168, |
| "max": 3.538667678833008, |
| "mean": -0.011556778103113174, |
| "std": 0.681910514831543, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.2112656831741333, |
| "max": 0.20894697308540344, |
| "mean": 3.47470777342096e-05, |
| "std": 0.03448949381709099, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03565378487110138, |
| "max": 0.0480014868080616, |
| "mean": 0.0007942374795675278, |
| "std": 0.012850471772253513, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21031072735786438, |
| "max": 0.19297289848327637, |
| "mean": -1.2874927506345557e-06, |
| "std": 0.03169998526573181, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18637274205684662, |
| "max": 0.17692941427230835, |
| "mean": -0.0028488910757005215, |
| "std": 0.05860321223735809, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.47467249631881714, |
| "max": 1.0397725105285645, |
| "mean": 0.6513394117355347, |
| "std": 0.049329087138175964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.248422771692276, |
| "max": 0.32902756333351135, |
| "mean": 0.00018066739721689373, |
| "std": 0.04057690501213074, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12427264451980591, |
| "max": 0.024594629183411598, |
| "mean": -0.030488643795251846, |
| "std": 0.017578164115548134, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.4205840826034546, |
| "max": 0.4813268184661865, |
| "mean": 2.129650965798646e-06, |
| "std": 0.035403117537498474, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15161579847335815, |
| "max": 0.043303120881319046, |
| "mean": 3.9640130125917494e-05, |
| "std": 0.014866231009364128, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.3155551552772522, |
| "max": 0.6806549429893494, |
| "mean": 0.5528165102005005, |
| "std": 0.04051704332232475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.2062118798494339, |
| "max": 0.21964126825332642, |
| "mean": 3.0860355764161795e-05, |
| "std": 0.038303423672914505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.1376407891511917, |
| "max": 0.11259414255619049, |
| "mean": 2.069001493509859e-05, |
| "std": 0.02579990215599537, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.40213435888290405, |
| "max": 0.3705216944217682, |
| "mean": 2.6252395400661044e-05, |
| "std": 0.03818526491522789, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.765413761138916, |
| "max": 2.86456298828125, |
| "mean": 0.0011342763900756836, |
| "std": 0.5163310766220093, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20278441905975342, |
| "max": 0.1972842514514923, |
| "mean": 2.9531782274716534e-05, |
| "std": 0.034300558269023895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.05089922249317169, |
| "max": 0.03997639939188957, |
| "mean": -0.00041936602792702615, |
| "std": 0.013420597650110722, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19621425867080688, |
| "max": 0.20147208869457245, |
| "mean": -1.2328569937380962e-05, |
| "std": 0.0318082757294178, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19283677637577057, |
| "max": 0.1948237270116806, |
| "mean": -0.002969849156215787, |
| "std": 0.06253352016210556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.34950727224349976, |
| "max": 1.081899642944336, |
| "mean": 0.6671000123023987, |
| "std": 0.05490493029356003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22493921220302582, |
| "max": 0.2511034309864044, |
| "mean": 0.0003591308486647904, |
| "std": 0.04076593369245529, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09088904410600662, |
| "max": 0.04371574521064758, |
| "mean": -0.030075963586568832, |
| "std": 0.01758558303117752, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.35314324498176575, |
| "max": 0.303651362657547, |
| "mean": -4.348178117652424e-05, |
| "std": 0.03712818771600723, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16180230677127838, |
| "max": 0.0634349063038826, |
| "mean": -8.249300299212337e-05, |
| "std": 0.019394585862755775, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34883353114128113, |
| "max": 0.7206243872642517, |
| "mean": 0.5422865748405457, |
| "std": 0.03884800896048546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21920670568943024, |
| "max": 0.22291362285614014, |
| "mean": -1.1165878277097363e-05, |
| "std": 0.039236169308423996, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11826413869857788, |
| "max": 0.17058128118515015, |
| "mean": 0.0002835137420333922, |
| "std": 0.02510087564587593, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.2464587390422821, |
| "max": 0.3006129264831543, |
| "mean": -3.662023664219305e-05, |
| "std": 0.03893572464585304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.4999661445617676, |
| "max": 3.709076166152954, |
| "mean": 0.015840880572795868, |
| "std": 0.7814859747886658, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.2185182124376297, |
| "max": 0.23746132850646973, |
| "mean": -1.3619632227346301e-05, |
| "std": 0.03630794584751129, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04712348431348801, |
| "max": 0.05133059248328209, |
| "mean": 0.00048102246364578605, |
| "std": 0.01351132895797491, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21373434364795685, |
| "max": 0.2173190861940384, |
| "mean": 5.650868115480989e-05, |
| "std": 0.033619917929172516, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21108141541481018, |
| "max": 0.23115544021129608, |
| "mean": -0.005106039810925722, |
| "std": 0.06184696406126022, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36205485463142395, |
| "max": 1.099104642868042, |
| "mean": 0.6992122530937195, |
| "std": 0.05326760187745094, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23436696827411652, |
| "max": 0.24465103447437286, |
| "mean": 0.00046349139302037656, |
| "std": 0.04127480834722519, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09793505817651749, |
| "max": 0.0681939497590065, |
| "mean": -0.03142588585615158, |
| "std": 0.0180974081158638, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.3012528717517853, |
| "max": 0.3511028289794922, |
| "mean": -8.16234532976523e-05, |
| "std": 0.04028059542179108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15210135281085968, |
| "max": 0.14944450557231903, |
| "mean": 0.00025588623248040676, |
| "std": 0.023021480068564415, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 1.0, |
| "max": 1.0, |
| "mean": 1.0, |
| "std": 0.0, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031249936670064926, |
| "max": 0.031249839812517166, |
| "mean": -1.9292721844976768e-05, |
| "std": 0.01804409734904766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031226642429828644, |
| "max": 0.03100142627954483, |
| "mean": -0.0010842883493751287, |
| "std": 0.01795371063053608, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031249966472387314, |
| "max": 0.031249895691871643, |
| "mean": 3.5441100862954045e-06, |
| "std": 0.018044503405690193, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031156372278928757, |
| "max": 0.031184475868940353, |
| "mean": 0.0003338930255267769, |
| "std": 0.018065759912133217, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 1.0, |
| "max": 1.0, |
| "mean": 1.0, |
| "std": 0.0, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.031249985098838806, |
| "max": 0.031249992549419403, |
| "mean": -8.39352924231207e-06, |
| "std": 0.018043218180537224, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03124961629509926, |
| "max": 0.031239181756973267, |
| "mean": 0.00015365774743258953, |
| "std": 0.017994258552789688, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3829966187477112, |
| "max": 0.718121349811554, |
| "mean": 0.5806018114089966, |
| "std": 0.03862323611974716, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23782959580421448, |
| "max": 0.1963561624288559, |
| "mean": 2.6626767066773027e-05, |
| "std": 0.03746971860527992, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11848776042461395, |
| "max": 0.1658152937889099, |
| "mean": 0.0009899433935061097, |
| "std": 0.027532605454325676, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.2458610236644745, |
| "max": 0.5000857710838318, |
| "mean": -5.0437982281437144e-05, |
| "std": 0.037627607583999634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.936108350753784, |
| "max": 3.7635273933410645, |
| "mean": -0.003571532666683197, |
| "std": 0.6807447671890259, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.2272127866744995, |
| "max": 0.25125452876091003, |
| "mean": -1.1669091691146605e-05, |
| "std": 0.03743912652134895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07160257548093796, |
| "max": 0.08056868612766266, |
| "mean": -0.0005193912656977773, |
| "std": 0.015654100105166435, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22808189690113068, |
| "max": 0.25764524936676025, |
| "mean": -2.8624439437408e-05, |
| "std": 0.03542578965425491, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.2000962197780609, |
| "max": 0.21490387618541718, |
| "mean": -0.0055319443345069885, |
| "std": 0.0682973712682724, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4052681028842926, |
| "max": 1.1870543956756592, |
| "mean": 0.7378469705581665, |
| "std": 0.05485502630472183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.22090063989162445, |
| "max": 0.24591459333896637, |
| "mean": 0.0005211709067225456, |
| "std": 0.041342560201883316, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10329551994800568, |
| "max": 0.02418467588722706, |
| "mean": -0.03265417367219925, |
| "std": 0.0188569538295269, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.44879788160324097, |
| "max": 0.421781986951828, |
| "mean": -0.00043243536492809653, |
| "std": 0.046903904527425766, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.25108596682548523, |
| "max": 0.46939900517463684, |
| "mean": 0.003194585908204317, |
| "std": 0.04450792446732521, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3169757127761841, |
| "max": 0.33316904306411743, |
| "mean": -2.5288825781899504e-05, |
| "std": 0.021290883421897888, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3246179223060608, |
| "max": 0.6840593218803406, |
| "mean": 0.5709414482116699, |
| "std": 0.04453985393047333, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16449199616909027, |
| "max": 0.17385058104991913, |
| "mean": -4.8540678108111024e-05, |
| "std": 0.033184703439474106, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18657186627388, |
| "max": 0.14269262552261353, |
| "mean": 3.6818586522713304e-05, |
| "std": 0.029670175164937973, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.3801823556423187, |
| "max": 0.24568894505500793, |
| "mean": -1.0017960448749363e-05, |
| "std": 0.0327659472823143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6502017974853516, |
| "max": 3.2850754261016846, |
| "mean": -0.014260413125157356, |
| "std": 0.9845133423805237, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.2349099963903427, |
| "max": 0.2473423033952713, |
| "mean": -1.7784623196348548e-05, |
| "std": 0.04170290008187294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07268015295267105, |
| "max": 0.1542970985174179, |
| "mean": 0.000663664482999593, |
| "std": 0.02515619620680809, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2664458751678467, |
| "max": 0.2483866959810257, |
| "mean": -1.5342577171395533e-05, |
| "std": 0.040143273770809174, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18931904435157776, |
| "max": 0.19443899393081665, |
| "mean": -0.0012288358993828297, |
| "std": 0.06666287034749985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32919859886169434, |
| "max": 0.997564435005188, |
| "mean": 0.7190552949905396, |
| "std": 0.051983967423439026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.2313733994960785, |
| "max": 0.24550800025463104, |
| "mean": 0.00018263014499098063, |
| "std": 0.04090628772974014, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11402574181556702, |
| "max": 0.018650896847248077, |
| "mean": -0.0424647182226181, |
| "std": 0.0188254714012146, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.3894314467906952, |
| "max": 0.4067791998386383, |
| "mean": -2.1846279196324758e-05, |
| "std": 0.048540692776441574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.692162811756134, |
| "max": 0.4120035469532013, |
| "mean": 0.000852768833283335, |
| "std": 0.060242246836423874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": 0.0, |
| "max": 1.0, |
| "mean": 0.00048828125, |
| "std": 0.0220916960388422, |
| "sparsity": 0.99951171875, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 1.0, |
| "max": 1.0, |
| "mean": 1.0, |
| "std": 0.0, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.031249970197677612, |
| "max": 0.031249817460775375, |
| "mean": -2.1022657165303826e-05, |
| "std": 0.018035436049103737, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.03122086077928543, |
| "max": 0.031233571469783783, |
| "mean": -0.0006771883927285671, |
| "std": 0.01782997138798237, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03124987706542015, |
| "max": 0.031249921768903732, |
| "mean": -8.839062502374873e-06, |
| "std": 0.01803446188569069, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031232360750436783, |
| "max": 0.031245984137058258, |
| "mean": -0.0007298353011719882, |
| "std": 0.017944591119885445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 1.0, |
| "max": 1.0, |
| "mean": 1.0, |
| "std": 0.0, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03125, |
| "max": 0.031249988824129105, |
| "mean": 3.591749646147946e-06, |
| "std": 0.018040824681520462, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.031234480440616608, |
| "max": 0.031246982514858246, |
| "mean": 0.0001957040512934327, |
| "std": 0.018076537176966667, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23450319468975067, |
| "max": 0.2724616229534149, |
| "mean": 6.948144346097251e-06, |
| "std": 0.01881224475800991, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32128843665122986, |
| "max": 0.6922435760498047, |
| "mean": 0.5815606117248535, |
| "std": 0.045744746923446655, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18168264627456665, |
| "max": 0.1974717229604721, |
| "mean": -1.171275016531581e-05, |
| "std": 0.03318728506565094, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16043128073215485, |
| "max": 0.1292782723903656, |
| "mean": -0.0010662535205483437, |
| "std": 0.034117527306079865, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.3318951725959778, |
| "max": 0.31116846203804016, |
| "mean": -1.0326401024940424e-05, |
| "std": 0.03223801404237747, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.791203022003174, |
| "max": 8.74953842163086, |
| "mean": 0.09337067604064941, |
| "std": 1.61784029006958, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23363685607910156, |
| "max": 0.24183623492717743, |
| "mean": 4.133234324399382e-05, |
| "std": 0.0408620610833168, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07588791847229004, |
| "max": 0.0656837597489357, |
| "mean": 0.00047856790479272604, |
| "std": 0.01940334029495716, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.2455652505159378, |
| "max": 0.2337566763162613, |
| "mean": -2.8880367608508095e-06, |
| "std": 0.03943672403693199, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16261433064937592, |
| "max": 0.1605682373046875, |
| "mean": 0.0016338212881237268, |
| "std": 0.06525633484125137, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5568146705627441, |
| "max": 0.9421050548553467, |
| "mean": 0.7127699851989746, |
| "std": 0.03979077190160751, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22831875085830688, |
| "max": 0.2548784911632538, |
| "mean": -4.536488631856628e-05, |
| "std": 0.040581412613391876, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13459284603595734, |
| "max": 0.02228192612528801, |
| "mean": -0.04134010896086693, |
| "std": 0.018355557695031166, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4211236536502838, |
| "max": 0.3922184407711029, |
| "mean": -4.3558138713706285e-06, |
| "std": 0.04779110848903656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6065256595611572, |
| "max": 0.6503778696060181, |
| "mean": 0.0015810506884008646, |
| "std": 0.05679204687476158, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.2516687214374542, |
| "max": 0.3206498920917511, |
| "mean": -6.057634891476482e-06, |
| "std": 0.0196156594902277, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.35995498299598694, |
| "max": 0.6810278296470642, |
| "mean": 0.5706292986869812, |
| "std": 0.042767371982336044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22037938237190247, |
| "max": 0.1769036501646042, |
| "mean": -3.467117130639963e-05, |
| "std": 0.03430242836475372, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16339237987995148, |
| "max": 0.23269455134868622, |
| "mean": 0.00036311167059466243, |
| "std": 0.03283863142132759, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.2634328007698059, |
| "max": 0.23954781889915466, |
| "mean": -5.2383133152034134e-05, |
| "std": 0.03390158340334892, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.847443580627441, |
| "max": 5.083292484283447, |
| "mean": 0.043835077434778214, |
| "std": 1.227935552597046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24653136730194092, |
| "max": 0.25027644634246826, |
| "mean": 7.213905337266624e-05, |
| "std": 0.04399324953556061, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06254159659147263, |
| "max": 0.054444003850221634, |
| "mean": 0.000650427769869566, |
| "std": 0.017183585092425346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.28619009256362915, |
| "max": 0.2717132866382599, |
| "mean": -4.993668699171394e-05, |
| "std": 0.04299163073301315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16040603816509247, |
| "max": 0.17025713622570038, |
| "mean": -0.0028844610787928104, |
| "std": 0.05926158279180527, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5196964740753174, |
| "max": 0.9310137629508972, |
| "mean": 0.7133955955505371, |
| "std": 0.03807961940765381, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23809659481048584, |
| "max": 0.24939550459384918, |
| "mean": 0.00046480150194838643, |
| "std": 0.04046152904629707, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14403879642486572, |
| "max": 0.041449662297964096, |
| "mean": -0.03967723995447159, |
| "std": 0.02051496133208275, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5321223735809326, |
| "max": 0.582199215888977, |
| "mean": 5.9441426856210455e-06, |
| "std": 0.04886837303638458, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5183588862419128, |
| "max": 0.49274152517318726, |
| "mean": 0.0023598431143909693, |
| "std": 0.053401440382003784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.27355626225471497, |
| "max": 0.31514689326286316, |
| "mean": 1.8169534996559378e-06, |
| "std": 0.020052826032042503, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36634165048599243, |
| "max": 0.7102516293525696, |
| "mean": 0.5930806994438171, |
| "std": 0.04571138322353363, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21087931096553802, |
| "max": 0.1994456797838211, |
| "mean": 3.07354457618203e-05, |
| "std": 0.034868594259023666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.1869715005159378, |
| "max": 0.20369935035705566, |
| "mean": 0.0009553421987220645, |
| "std": 0.0314984992146492, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.28932973742485046, |
| "max": 0.33943668007850647, |
| "mean": -4.7415778681170195e-05, |
| "std": 0.034589733928442, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.8712191581726074, |
| "max": 3.3820998668670654, |
| "mean": 0.014444351196289062, |
| "std": 0.8576834797859192, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.2242382913827896, |
| "max": 0.24965918064117432, |
| "mean": -4.0143440855899826e-06, |
| "std": 0.04223589971661568, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05498581379652023, |
| "max": 0.046769097447395325, |
| "mean": -1.842428173404187e-05, |
| "std": 0.015840334817767143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2928566634654999, |
| "max": 0.29091376066207886, |
| "mean": -7.36157790015568e-06, |
| "std": 0.04195090010762215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12467863410711288, |
| "max": 0.25901108980178833, |
| "mean": -0.003233879804611206, |
| "std": 0.05313729867339134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4561373293399811, |
| "max": 0.8428487777709961, |
| "mean": 0.7054461240768433, |
| "std": 0.03489769622683525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5113534331321716, |
| "max": 0.3484715223312378, |
| "mean": 0.0003426253970246762, |
| "std": 0.04020649194717407, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18678922951221466, |
| "max": 0.03952203318476677, |
| "mean": -0.03937358409166336, |
| "std": 0.02131999284029007, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5436691045761108, |
| "max": 0.5556817054748535, |
| "mean": -7.17876828275621e-05, |
| "std": 0.05074293538928032, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5110356211662292, |
| "max": 0.6633175015449524, |
| "mean": 0.002444919664412737, |
| "std": 0.04948664829134941, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3323739171028137, |
| "max": 0.2654549777507782, |
| "mean": 3.673961600725306e-06, |
| "std": 0.019390413537621498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.32227811217308044, |
| "max": 0.7648001313209534, |
| "mean": 0.6509190201759338, |
| "std": 0.04508262872695923, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.24930793046951294, |
| "max": 0.21936655044555664, |
| "mean": -2.44708098762203e-06, |
| "std": 0.036502547562122345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32666686177253723, |
| "max": 0.2868551015853882, |
| "mean": -0.0006774846115149558, |
| "std": 0.03851696848869324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3097042739391327, |
| "max": 0.3694048821926117, |
| "mean": 6.485832273028791e-05, |
| "std": 0.03624315932393074, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.71013069152832, |
| "max": 5.798623085021973, |
| "mean": 0.03792855516076088, |
| "std": 1.41161048412323, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22137394547462463, |
| "max": 0.20554855465888977, |
| "mean": -7.500727951992303e-05, |
| "std": 0.042491503059864044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07746972888708115, |
| "max": 0.05126894265413284, |
| "mean": -0.0009250898147001863, |
| "std": 0.016401393339037895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.33084556460380554, |
| "max": 0.32904890179634094, |
| "mean": -4.916631951346062e-06, |
| "std": 0.042798250913619995, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2845572233200073, |
| "max": 0.11143017560243607, |
| "mean": -0.0012043914757668972, |
| "std": 0.04699280112981796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.48666608333587646, |
| "max": 0.885034441947937, |
| "mean": 0.7373895049095154, |
| "std": 0.03794779255986214, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3611343502998352, |
| "max": 0.27392831444740295, |
| "mean": 5.1206770876888186e-05, |
| "std": 0.04065323248505592, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.2472306787967682, |
| "max": 0.046531591564416885, |
| "mean": -0.03925502672791481, |
| "std": 0.023223698139190674, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.62546706199646, |
| "max": 0.596234142780304, |
| "mean": -6.186795508256182e-05, |
| "std": 0.0531260222196579, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7086492776870728, |
| "max": 0.2654070556163788, |
| "mean": 0.0009191531571559608, |
| "std": 0.05119417607784271, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.34331265091896057, |
| "max": 0.30340248346328735, |
| "mean": 2.3374013835564256e-07, |
| "std": 0.019139692187309265, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.3500247001647949, |
| "max": 0.7813002467155457, |
| "mean": 0.6387312412261963, |
| "std": 0.048984214663505554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20559599995613098, |
| "max": 0.20657846331596375, |
| "mean": -5.995870742481202e-05, |
| "std": 0.03769858554005623, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25827330350875854, |
| "max": 0.26797717809677124, |
| "mean": -0.00040583324152976274, |
| "std": 0.04458905756473541, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.35375165939331055, |
| "max": 0.32213273644447327, |
| "mean": -7.335219379456248e-06, |
| "std": 0.03720685839653015, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.253459930419922, |
| "max": 4.198183536529541, |
| "mean": -0.0263908039778471, |
| "std": 1.0056793689727783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23853513598442078, |
| "max": 0.24350698292255402, |
| "mean": -2.5575776817277074e-05, |
| "std": 0.04321583732962608, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06232254579663277, |
| "max": 0.05653427913784981, |
| "mean": 0.0003516775614116341, |
| "std": 0.014141896739602089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.437425822019577, |
| "max": 0.3736904561519623, |
| "mean": 1.4616346561524551e-05, |
| "std": 0.044127896428108215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09596914798021317, |
| "max": 0.17601557075977325, |
| "mean": -0.0006586366798728704, |
| "std": 0.03512872755527496, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.42178472876548767, |
| "max": 1.06712007522583, |
| "mean": 0.7484290599822998, |
| "std": 0.04182668402791023, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.26583534479141235, |
| "max": 0.29665902256965637, |
| "mean": -7.891673885751516e-05, |
| "std": 0.04081389307975769, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18455219268798828, |
| "max": 0.043140046298503876, |
| "mean": -0.03679502755403519, |
| "std": 0.0255513247102499, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.45756417512893677, |
| "max": 0.4861648976802826, |
| "mean": 4.3982381612295285e-05, |
| "std": 0.05422103777527809, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.2858409285545349, |
| "max": 0.5508930087089539, |
| "mean": -0.0008807203266769648, |
| "std": 0.047792647033929825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2925868332386017, |
| "max": 0.32265621423721313, |
| "mean": 6.008186119288439e-06, |
| "std": 0.0199727825820446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2913132309913635, |
| "max": 0.7585903406143188, |
| "mean": 0.6507112979888916, |
| "std": 0.05193017050623894, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.24352194368839264, |
| "max": 0.26151588559150696, |
| "mean": -5.6967542150232475e-06, |
| "std": 0.03961416333913803, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.26712363958358765, |
| "max": 0.19983239471912384, |
| "mean": -0.0008771903812885284, |
| "std": 0.0517287477850914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2718246877193451, |
| "max": 0.25335949659347534, |
| "mean": 5.239124220679514e-06, |
| "std": 0.03871086984872818, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.94522476196289, |
| "max": 15.922240257263184, |
| "mean": 0.03318937495350838, |
| "std": 1.9867888689041138, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20649555325508118, |
| "max": 0.22559243440628052, |
| "mean": -7.256461685756221e-05, |
| "std": 0.040558841079473495, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06932304799556732, |
| "max": 0.06304260343313217, |
| "mean": 0.0001579949603183195, |
| "std": 0.014740646816790104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.4653640687465668, |
| "max": 0.3200652003288269, |
| "mean": 1.952598540810868e-05, |
| "std": 0.04059439152479172, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06398282200098038, |
| "max": 0.11537733674049377, |
| "mean": 0.0011978133115917444, |
| "std": 0.02469516545534134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.3749999403953552, |
| "max": 0.9300609230995178, |
| "mean": 0.7510109543800354, |
| "std": 0.040018972009420395, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.27868181467056274, |
| "max": 0.27277180552482605, |
| "mean": -0.00016834630514495075, |
| "std": 0.041004978120326996, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19812321662902832, |
| "max": 0.05135354399681091, |
| "mean": -0.032012395560741425, |
| "std": 0.025048717856407166, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.65754234790802, |
| "max": 0.5349372029304504, |
| "mean": -5.049940591561608e-05, |
| "std": 0.052857208997011185, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.1923648864030838, |
| "max": 0.5813060998916626, |
| "mean": -0.0005128913326188922, |
| "std": 0.041049525141716, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.417529821395874, |
| "max": 0.3719121813774109, |
| "mean": 6.524643140437547e-06, |
| "std": 0.021627992391586304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21460720896720886, |
| "max": 0.7452309131622314, |
| "mean": 0.6493626832962036, |
| "std": 0.054172683507204056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20914840698242188, |
| "max": 0.19524669647216797, |
| "mean": 4.0109844121616334e-05, |
| "std": 0.03945964202284813, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.32907912135124207, |
| "max": 0.25925326347351074, |
| "mean": -0.003227418288588524, |
| "std": 0.05623279884457588, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.20563212037086487, |
| "max": 0.25434860587120056, |
| "mean": 5.404070907388814e-05, |
| "std": 0.038562316447496414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.2339768409729, |
| "max": 6.921723365783691, |
| "mean": 0.04828859120607376, |
| "std": 1.383695363998413, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20957675576210022, |
| "max": 0.23022468388080597, |
| "mean": -4.7416378947673365e-06, |
| "std": 0.04131784662604332, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.043760623782873154, |
| "max": 0.03593071922659874, |
| "mean": -6.6086213337257504e-06, |
| "std": 0.012794941663742065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.3974460959434509, |
| "max": 0.3449029326438904, |
| "mean": -5.5259803048102185e-05, |
| "std": 0.0423947237432003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.055080167949199677, |
| "max": 0.06271716207265854, |
| "mean": 0.0003585012163966894, |
| "std": 0.018664730712771416, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3508152663707733, |
| "max": 1.0430189371109009, |
| "mean": 0.789574146270752, |
| "std": 0.048565711826086044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3336288034915924, |
| "max": 0.38612979650497437, |
| "mean": -0.00016904372023418546, |
| "std": 0.041490498930215836, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15726615488529205, |
| "max": 0.05897233635187149, |
| "mean": -0.031808022409677505, |
| "std": 0.02507229521870613, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6961155533790588, |
| "max": 0.4685930609703064, |
| "mean": -8.521115523763001e-05, |
| "std": 0.05180642008781433, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24746476113796234, |
| "max": 0.32834842801094055, |
| "mean": -0.00026278701261617243, |
| "std": 0.041423212736845016, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2869253158569336, |
| "max": 0.35028234124183655, |
| "mean": -2.780619524855865e-06, |
| "std": 0.02424117736518383, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.1968069076538086, |
| "max": 0.7775169014930725, |
| "mean": 0.6701230406761169, |
| "std": 0.058515764772892, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.2286878526210785, |
| "max": 0.23117558658123016, |
| "mean": -2.085552659991663e-05, |
| "std": 0.04044000059366226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.2196890264749527, |
| "max": 0.24058501422405243, |
| "mean": 0.0007775035337544978, |
| "std": 0.05580567941069603, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21652470529079437, |
| "max": 0.2261732518672943, |
| "mean": -7.23175035091117e-05, |
| "std": 0.03937419131398201, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.891955375671387, |
| "max": 9.054566383361816, |
| "mean": -0.0012135691940784454, |
| "std": 1.846129059791565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2690034806728363, |
| "max": 0.25858405232429504, |
| "mean": 4.355451528681442e-05, |
| "std": 0.03841076418757439, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.057884324342012405, |
| "max": 0.05789237469434738, |
| "mean": 0.0003543176280800253, |
| "std": 0.014708762988448143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.263511061668396, |
| "max": 0.288027822971344, |
| "mean": -6.177674367791042e-05, |
| "std": 0.03907754644751549, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.044037725776433945, |
| "max": 0.037295691668987274, |
| "mean": -9.799870167626068e-05, |
| "std": 0.013339235447347164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.339274525642395, |
| "max": 1.0903433561325073, |
| "mean": 0.8638954162597656, |
| "std": 0.06374805420637131, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.4230613112449646, |
| "max": 0.41900894045829773, |
| "mean": 0.00031366912298835814, |
| "std": 0.043512988835573196, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21445079147815704, |
| "max": 0.17045123875141144, |
| "mean": -0.029427748173475266, |
| "std": 0.03184095025062561, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5979012846946716, |
| "max": 0.559224545955658, |
| "mean": -0.00014804149395786226, |
| "std": 0.053461432456970215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17863567173480988, |
| "max": 0.3767751455307007, |
| "mean": 0.0013495876919478178, |
| "std": 0.037288032472133636, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.3942491412162781, |
| "max": 0.3687455952167511, |
| "mean": 3.7661615351680666e-05, |
| "std": 0.028617454692721367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2907008230686188, |
| "max": 0.8258129358291626, |
| "mean": 0.7054593563079834, |
| "std": 0.06773429363965988, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9265665411949158, |
| "max": 1.0269814729690552, |
| "mean": -2.791242877719924e-05, |
| "std": 0.04764382541179657, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8779393434524536, |
| "max": 0.8145599365234375, |
| "mean": -0.0002924790605902672, |
| "std": 0.09544122219085693, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.27007606625556946, |
| "max": 0.24068056046962738, |
| "mean": -2.2448431991506368e-05, |
| "std": 0.038949914276599884, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.705463409423828, |
| "max": 22.81535530090332, |
| "mean": -0.09178592264652252, |
| "std": 4.064526081085205, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.2275296449661255, |
| "max": 0.2455320507287979, |
| "mean": -2.5536401153658517e-05, |
| "std": 0.03864150494337082, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06007588282227516, |
| "max": 0.045354753732681274, |
| "mean": -0.00013596308417618275, |
| "std": 0.014683394692838192, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.33782336115837097, |
| "max": 0.3746013939380646, |
| "mean": 7.420163456117734e-06, |
| "std": 0.04082043468952179, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.046125710010528564, |
| "max": 0.19506430625915527, |
| "mean": 0.0002738517359830439, |
| "std": 0.013541821390390396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.373764306306839, |
| "max": 1.1280238628387451, |
| "mean": 0.8901123404502869, |
| "std": 0.06384868174791336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.44741326570510864, |
| "max": 0.5422499775886536, |
| "mean": 2.5218110749847256e-05, |
| "std": 0.045580700039863586, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22342835366725922, |
| "max": 0.08723597973585129, |
| "mean": -0.03199537843465805, |
| "std": 0.03770318627357483, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7254156470298767, |
| "max": 0.6879446506500244, |
| "mean": 3.628328340710141e-05, |
| "std": 0.05179440602660179, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.174102783203125, |
| "max": 0.2178839147090912, |
| "mean": 3.535003634169698e-05, |
| "std": 0.03175075352191925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.33916032314300537, |
| "max": 0.37271323800086975, |
| "mean": 4.308380448492244e-05, |
| "std": 0.034135378897190094, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3176645338535309, |
| "max": 1.2846463918685913, |
| "mean": 0.6014195084571838, |
| "std": 0.08323279023170471, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2829808294773102, |
| "max": 0.26017650961875916, |
| "mean": -3.0644375783595024e-06, |
| "std": 0.035980723798274994, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23540745675563812, |
| "max": 0.20547473430633545, |
| "mean": 0.0002399118966422975, |
| "std": 0.056001532822847366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.43518391251564026, |
| "max": 0.32444700598716736, |
| "mean": 2.422756006126292e-05, |
| "std": 0.03412417694926262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.537700176239014, |
| "max": 7.30228853225708, |
| "mean": -0.007349951192736626, |
| "std": 0.6983441114425659, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.34386035799980164, |
| "max": 0.3621582090854645, |
| "mean": 0.00010323335300199687, |
| "std": 0.04783642664551735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07365774363279343, |
| "max": 0.060269735753536224, |
| "mean": 0.0009362755226902664, |
| "std": 0.014931198209524155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2561565041542053, |
| "max": 0.2865042984485626, |
| "mean": 4.9739428504835814e-06, |
| "std": 0.04156460985541344, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.055231235921382904, |
| "max": 0.06271004676818848, |
| "mean": 0.00012724015687126666, |
| "std": 0.0071450709365308285, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49412763118743896, |
| "max": 1.2182179689407349, |
| "mean": 1.0133787393569946, |
| "std": 0.11725164949893951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0939558744430542, |
| "max": 1.0474863052368164, |
| "mean": -4.8846173740457743e-05, |
| "std": 0.052417904138565063, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22328178584575653, |
| "max": 0.172784686088562, |
| "mean": -0.02721056528389454, |
| "std": 0.0362662672996521, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8832080960273743, |
| "max": 0.9217195510864258, |
| "mean": -0.00014604278840124607, |
| "std": 0.05329865962266922, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.1707809567451477, |
| "max": 0.3790228068828583, |
| "mean": 0.003364440519362688, |
| "std": 0.03984135016798973, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7773804068565369, |
| "max": 0.7221406698226929, |
| "mean": 1.8065227777697146e-05, |
| "std": 0.04615423083305359, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.33866187930107117, |
| "max": 1.425328254699707, |
| "mean": 0.9481796622276306, |
| "std": 0.20640140771865845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7458044290542603, |
| "max": 1.704500436782837, |
| "mean": 0.00022708994220010936, |
| "std": 0.15870554745197296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.19757080078125, |
| "max": 1.0991984605789185, |
| "mean": -0.009535851888358593, |
| "std": 0.2035919725894928, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4207988381385803, |
| "max": 0.4279989004135132, |
| "mean": 6.386132736224681e-05, |
| "std": 0.04802023991942406, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.71625328063965, |
| "max": 19.51169776916504, |
| "mean": -0.24800625443458557, |
| "std": 4.769559860229492, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.3236338496208191, |
| "max": 0.438272625207901, |
| "mean": -1.1853735486511141e-05, |
| "std": 0.04616710543632507, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03371698036789894, |
| "max": 0.03678824380040169, |
| "mean": 0.0006397695397026837, |
| "std": 0.0129077835008502, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7031863331794739, |
| "max": 0.6687424182891846, |
| "mean": 4.257483305991627e-05, |
| "std": 0.057892125099897385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.0722241997718811, |
| "max": 0.0676589161157608, |
| "mean": -0.0001341316383332014, |
| "std": 0.012878631241619587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38035547733306885, |
| "max": 1.3902052640914917, |
| "mean": 1.066498041152954, |
| "std": 0.21949008107185364, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6164002418518066, |
| "max": 0.7182905673980713, |
| "mean": 0.00011321296915411949, |
| "std": 0.05802781134843826, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.2184617668390274, |
| "max": 0.22462666034698486, |
| "mean": 0.006169781554490328, |
| "std": 0.04965030029416084, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6297575831413269, |
| "max": 0.8895801901817322, |
| "mean": 1.2445923857740127e-05, |
| "std": 0.023545311763882637, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.506031334400177, |
| "max": 0.47297078371047974, |
| "mean": -0.0030135007109493017, |
| "std": 0.0691458210349083, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5383259057998657, |
| "max": 1.1772801876068115, |
| "mean": 0.7824772596359253, |
| "std": 0.09824033081531525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.26664498448371887, |
| "max": 0.2126948982477188, |
| "mean": -0.00022273289505392313, |
| "std": 0.05400582030415535, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23798410594463348, |
| "max": 0.014864158816635609, |
| "mean": -0.04389958456158638, |
| "std": 0.03423725813627243, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |