| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43005406856536865, |
| "max": 0.29851898550987244, |
| "mean": -0.0025509949773550034, |
| "std": 0.042555101215839386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06313250213861465, |
| "max": 0.10729768127202988, |
| "mean": 0.0006133262650109828, |
| "std": 0.03408696502447128, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.41268208622932434, |
| "max": 0.8365541696548462, |
| "mean": -0.00020702443725895137, |
| "std": 0.02410811372101307, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11502047628164291, |
| "max": 0.3207014203071594, |
| "mean": -0.00093841488705948, |
| "std": 0.019534854218363762, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7852821350097656, |
| "max": 2.8634164333343506, |
| "mean": -0.00036539402208290994, |
| "std": 0.615379810333252, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.27854230999946594, |
| "max": 0.38152772188186646, |
| "mean": 0.0004230512131471187, |
| "std": 0.042748332023620605, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22163018584251404, |
| "max": 0.20894938707351685, |
| "mean": -0.004489985294640064, |
| "std": 0.040880318731069565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4279509484767914, |
| "max": 0.47543206810951233, |
| "mean": 3.1694014523964142e-06, |
| "std": 0.02450772561132908, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32420721650123596, |
| "max": 0.15700779855251312, |
| "mean": -0.04670684412121773, |
| "std": 0.051544804126024246, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4101617932319641, |
| "max": 0.3544142544269562, |
| "mean": -0.00012779857206624, |
| "std": 0.02359919063746929, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.2289954274892807, |
| "max": 0.26173391938209534, |
| "mean": -0.029131349176168442, |
| "std": 0.04930002987384796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.25456100702285767, |
| "max": 0.818419873714447, |
| "mean": 0.5253804922103882, |
| "std": 0.08069705218076706, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.2965428829193115, |
| "max": 0.26520034670829773, |
| "mean": -0.00042467008461244404, |
| "std": 0.03210080415010452, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09260489046573639, |
| "max": 0.1250484734773636, |
| "mean": 0.0006493350956588984, |
| "std": 0.025727085769176483, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.2901724576950073, |
| "max": 0.281167596578598, |
| "mean": -7.525501860072836e-05, |
| "std": 0.030932163819670677, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.8939008712768555, |
| "max": 5.80875825881958, |
| "mean": -0.009307368658483028, |
| "std": 1.2948225736618042, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4246821701526642, |
| "max": 0.34353208541870117, |
| "mean": 9.80871482170187e-05, |
| "std": 0.029952067881822586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.02886669710278511, |
| "max": 0.027609167620539665, |
| "mean": -0.0003159984771627933, |
| "std": 0.01256631314754486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4538891911506653, |
| "max": 0.4482215344905853, |
| "mean": 2.2922111384104937e-05, |
| "std": 0.02385348081588745, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08867117762565613, |
| "max": 0.09104129672050476, |
| "mean": 0.0022725451271981, |
| "std": 0.019507737830281258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.26674631237983704, |
| "max": 1.054079532623291, |
| "mean": 0.5310790538787842, |
| "std": 0.10425138473510742, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5743944644927979, |
| "max": 0.6082407832145691, |
| "mean": -0.00042930786730721593, |
| "std": 0.03859541565179825, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18188051879405975, |
| "max": 0.04570186883211136, |
| "mean": -0.029450394213199615, |
| "std": 0.04259800165891647, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1662050485610962, |
| "max": 1.6339434385299683, |
| "mean": 0.00032052083406597376, |
| "std": 0.027692945674061775, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16221286356449127, |
| "max": 0.2055274099111557, |
| "mean": -0.021118517965078354, |
| "std": 0.027932317927479744, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22425268590450287, |
| "max": 0.8419703841209412, |
| "mean": 0.48751628398895264, |
| "std": 0.0750974491238594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.2551511526107788, |
| "max": 0.30577754974365234, |
| "mean": -8.399176294915378e-06, |
| "std": 0.03346917778253555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09521990269422531, |
| "max": 0.11036473512649536, |
| "mean": 6.435990508180112e-05, |
| "std": 0.026954451575875282, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.2969436049461365, |
| "max": 0.29559123516082764, |
| "mean": 5.0998860388062894e-05, |
| "std": 0.032539013773202896, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.159433841705322, |
| "max": 5.079733371734619, |
| "mean": -0.014565235003829002, |
| "std": 1.156693696975708, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.3445141315460205, |
| "max": 0.3432990610599518, |
| "mean": 7.890153938205913e-05, |
| "std": 0.03005831316113472, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.03612125664949417, |
| "max": 0.03314004838466644, |
| "mean": -0.00014305136573966593, |
| "std": 0.013020108453929424, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3150654435157776, |
| "max": 0.3748987019062042, |
| "mean": -2.0872395907645114e-05, |
| "std": 0.02405514195561409, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10531895607709885, |
| "max": 0.12192098051309586, |
| "mean": -0.0019657753873616457, |
| "std": 0.028842739760875702, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.3119339942932129, |
| "max": 1.1190955638885498, |
| "mean": 0.6662184000015259, |
| "std": 0.09769617766141891, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8722184300422668, |
| "max": 0.6274752616882324, |
| "mean": 0.0016759471036493778, |
| "std": 0.047436658293008804, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27076128125190735, |
| "max": 0.034267961978912354, |
| "mean": -0.046592649072408676, |
| "std": 0.040578801184892654, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9206072688102722, |
| "max": 0.96403568983078, |
| "mean": 0.0010221146512776613, |
| "std": 0.040701672434806824, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14442752301692963, |
| "max": 0.0748896598815918, |
| "mean": -0.009088763035833836, |
| "std": 0.02569626271724701, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.23972344398498535, |
| "max": 0.7111932635307312, |
| "mean": 0.44715946912765503, |
| "std": 0.05921364948153496, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.27250099182128906, |
| "max": 0.297283798456192, |
| "mean": 8.777939001447521e-06, |
| "std": 0.03547067567706108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11882907897233963, |
| "max": 0.1182771623134613, |
| "mean": 0.0007498766062781215, |
| "std": 0.027608048170804977, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2806638181209564, |
| "max": 0.27924486994743347, |
| "mean": -7.666053716093302e-05, |
| "std": 0.03510000556707382, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5072221755981445, |
| "max": 2.5192060470581055, |
| "mean": 0.026715079322457314, |
| "std": 0.586592435836792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.22091323137283325, |
| "max": 0.2714807987213135, |
| "mean": 2.762420081126038e-06, |
| "std": 0.030731365084648132, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03329985961318016, |
| "max": 0.031178824603557587, |
| "mean": 0.00011736361193470657, |
| "std": 0.012398799881339073, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.2350921630859375, |
| "max": 0.23149597644805908, |
| "mean": 5.688454257324338e-05, |
| "std": 0.025696979835629463, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13562175631523132, |
| "max": 0.1278066188097, |
| "mean": -0.00549966748803854, |
| "std": 0.039964329451322556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3545263111591339, |
| "max": 1.1705567836761475, |
| "mean": 0.7105071544647217, |
| "std": 0.10373809188604355, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6171801686286926, |
| "max": 0.5549061298370361, |
| "mean": 0.0011606733314692974, |
| "std": 0.04611368104815483, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.1888936311006546, |
| "max": 0.024856731295585632, |
| "mean": -0.034840360283851624, |
| "std": 0.028601042926311493, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1303929090499878, |
| "max": 0.9700294137001038, |
| "mean": 0.00035928928991779685, |
| "std": 0.04234178736805916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5973078012466431, |
| "max": 0.06291170418262482, |
| "mean": -0.004878643434494734, |
| "std": 0.028604039922356606, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3753509521484375, |
| "max": 0.9391864538192749, |
| "mean": 0.5924164056777954, |
| "std": 0.06680406630039215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.391277939081192, |
| "max": 0.36899876594543457, |
| "mean": 7.035685848677531e-05, |
| "std": 0.03718537837266922, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11886083334684372, |
| "max": 0.1363811194896698, |
| "mean": 0.0009265001863241196, |
| "std": 0.029201578348875046, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6185654401779175, |
| "max": 0.5083082914352417, |
| "mean": 1.5324059859267436e-05, |
| "std": 0.0364382304251194, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.179115295410156, |
| "max": 8.780653953552246, |
| "mean": -0.10920821875333786, |
| "std": 1.697803258895874, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.27624833583831787, |
| "max": 0.23940874636173248, |
| "mean": 5.239578240434639e-05, |
| "std": 0.0326123982667923, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05171733349561691, |
| "max": 0.039454903453588486, |
| "mean": 9.008367487695068e-05, |
| "std": 0.012963240966200829, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.2306506633758545, |
| "max": 0.23440538346767426, |
| "mean": -2.216407301602885e-05, |
| "std": 0.02938910946249962, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.2041204422712326, |
| "max": 0.1051875501871109, |
| "mean": -0.004020026419311762, |
| "std": 0.03262867406010628, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3396590054035187, |
| "max": 1.0105489492416382, |
| "mean": 0.7007004022598267, |
| "std": 0.0967300534248352, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5642524361610413, |
| "max": 0.8327149152755737, |
| "mean": 0.0004152198671363294, |
| "std": 0.04229423776268959, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21180973947048187, |
| "max": 0.030382230877876282, |
| "mean": -0.032180484384298325, |
| "std": 0.02649112045764923, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7539102435112, |
| "max": 0.7183676958084106, |
| "mean": -1.6375699487980455e-05, |
| "std": 0.03683510050177574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.26317542791366577, |
| "max": 0.10612691938877106, |
| "mean": -0.003012202214449644, |
| "std": 0.028860073536634445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28410062193870544, |
| "max": 0.6937515735626221, |
| "mean": 0.49938827753067017, |
| "std": 0.04646085575222969, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27815356850624084, |
| "max": 0.233821839094162, |
| "mean": -0.00011090396583313122, |
| "std": 0.03875657916069031, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15374495089054108, |
| "max": 0.126325324177742, |
| "mean": -0.0022300099954009056, |
| "std": 0.033342309296131134, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.4138854146003723, |
| "max": 0.6591927409172058, |
| "mean": -1.8888074919232167e-05, |
| "std": 0.03909528627991676, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.2339067459106445, |
| "max": 4.718007564544678, |
| "mean": -0.020461430773139, |
| "std": 1.007363200187683, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.2449360489845276, |
| "max": 0.207246333360672, |
| "mean": 4.3898020521737635e-05, |
| "std": 0.033962249755859375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03454353287816048, |
| "max": 0.04481153190135956, |
| "mean": -1.8621416529640555e-05, |
| "std": 0.01263485848903656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20073898136615753, |
| "max": 0.20600160956382751, |
| "mean": -2.920800579886418e-05, |
| "std": 0.0310201458632946, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.1997092068195343, |
| "max": 0.11323567479848862, |
| "mean": -0.002894954290241003, |
| "std": 0.0345144160091877, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.36691704392433167, |
| "max": 1.0552048683166504, |
| "mean": 0.670504629611969, |
| "std": 0.06634049117565155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.39792558550834656, |
| "max": 0.5017094612121582, |
| "mean": -3.8320780731737614e-05, |
| "std": 0.04113030061125755, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12866847217082977, |
| "max": 0.026868799701333046, |
| "mean": -0.030530910938978195, |
| "std": 0.02187257632613182, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.4486997127532959, |
| "max": 0.4325278401374817, |
| "mean": 7.570705201942474e-05, |
| "std": 0.03489042818546295, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.26739102602005005, |
| "max": 0.07290376722812653, |
| "mean": -0.001090540667064488, |
| "std": 0.023126306012272835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28740835189819336, |
| "max": 0.6838006973266602, |
| "mean": 0.5244842767715454, |
| "std": 0.04748576506972313, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22222448885440826, |
| "max": 0.22337274253368378, |
| "mean": 1.5597350284224376e-05, |
| "std": 0.038948558270931244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.1362549066543579, |
| "max": 0.1092236116528511, |
| "mean": 0.00024021141871344298, |
| "std": 0.029209597036242485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37488552927970886, |
| "max": 0.43708565831184387, |
| "mean": -9.820145351113752e-06, |
| "std": 0.039285808801651, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8422415256500244, |
| "max": 4.994611740112305, |
| "mean": 0.009733816608786583, |
| "std": 0.8449002504348755, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22278591990470886, |
| "max": 0.21995313465595245, |
| "mean": -2.4143082555383444e-07, |
| "std": 0.03440921753644943, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.04355766996741295, |
| "max": 0.03580183535814285, |
| "mean": -0.0002584094472695142, |
| "std": 0.012078197672963142, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21266809105873108, |
| "max": 0.18842695653438568, |
| "mean": -1.707848787191324e-05, |
| "std": 0.03153562918305397, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18067854642868042, |
| "max": 0.12067519873380661, |
| "mean": -0.0023923253174871206, |
| "std": 0.04126231372356415, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.42283520102500916, |
| "max": 0.9399095773696899, |
| "mean": 0.6626414060592651, |
| "std": 0.056763265281915665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.37058448791503906, |
| "max": 0.4756770133972168, |
| "mean": -8.219464507419616e-05, |
| "std": 0.040889278054237366, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20835021138191223, |
| "max": 0.027245184406638145, |
| "mean": -0.03023524209856987, |
| "std": 0.02135040983557701, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3404720425605774, |
| "max": 0.7332155108451843, |
| "mean": 8.202612661989406e-05, |
| "std": 0.03476588428020477, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2399250864982605, |
| "max": 0.050362419337034225, |
| "mean": -0.0011862949468195438, |
| "std": 0.020457014441490173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.306090772151947, |
| "max": 0.6522687077522278, |
| "mean": 0.5250887274742126, |
| "std": 0.0460890494287014, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.3040372133255005, |
| "max": 0.21722179651260376, |
| "mean": 7.015860319370404e-05, |
| "std": 0.0394948311150074, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14904865622520447, |
| "max": 0.1309719830751419, |
| "mean": 0.0003389039193280041, |
| "std": 0.03043319098651409, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.2568168342113495, |
| "max": 0.20181529223918915, |
| "mean": 3.114001810899936e-05, |
| "std": 0.039484698325395584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.3340678215026855, |
| "max": 2.373654365539551, |
| "mean": -0.026232335716485977, |
| "std": 0.4496069550514221, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18832948803901672, |
| "max": 0.2102191150188446, |
| "mean": 3.7190951843513176e-05, |
| "std": 0.03479335457086563, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03177480027079582, |
| "max": 0.03555988520383835, |
| "mean": -0.00019898739992640913, |
| "std": 0.012286651879549026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.1882997751235962, |
| "max": 0.16997897624969482, |
| "mean": -6.833271618233994e-05, |
| "std": 0.03217003867030144, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13938407599925995, |
| "max": 0.1373613476753235, |
| "mean": -0.0025095485616475344, |
| "std": 0.051287971436977386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4670821726322174, |
| "max": 0.9539185762405396, |
| "mean": 0.6688235998153687, |
| "std": 0.05267348513007164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.3240530490875244, |
| "max": 0.30894580483436584, |
| "mean": -9.802424756344408e-07, |
| "std": 0.04094521328806877, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12482603639364243, |
| "max": 0.025560826063156128, |
| "mean": -0.030691375955939293, |
| "std": 0.01981331594288349, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.4391370117664337, |
| "max": 0.4447336196899414, |
| "mean": 9.505114576313645e-05, |
| "std": 0.03511868044734001, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22435998916625977, |
| "max": 0.051745057106018066, |
| "mean": -0.0011790611315518618, |
| "std": 0.018466567620635033, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.339127779006958, |
| "max": 0.7379522323608398, |
| "mean": 0.5586450695991516, |
| "std": 0.041346412152051926, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27276721596717834, |
| "max": 0.2783542275428772, |
| "mean": 2.0316545487730764e-05, |
| "std": 0.04105677455663681, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13677620887756348, |
| "max": 0.13981792330741882, |
| "mean": 0.0004895473830401897, |
| "std": 0.026616644114255905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.4901849925518036, |
| "max": 0.3555382788181305, |
| "mean": 8.898908708943054e-05, |
| "std": 0.04069453105330467, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.2957122325897217, |
| "max": 1.7441315650939941, |
| "mean": -0.02107611857354641, |
| "std": 0.5000779628753662, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.2175905406475067, |
| "max": 0.19755098223686218, |
| "mean": -4.055129102198407e-05, |
| "std": 0.03423253819346428, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.041273877024650574, |
| "max": 0.038862332701683044, |
| "mean": -0.0001397906889906153, |
| "std": 0.012886369600892067, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17747005820274353, |
| "max": 0.1828984022140503, |
| "mean": 4.791315950569697e-05, |
| "std": 0.03155587986111641, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.17983144521713257, |
| "max": 0.1835365742444992, |
| "mean": -0.0022142226807773113, |
| "std": 0.054839469492435455, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4742608368396759, |
| "max": 1.0234043598175049, |
| "mean": 0.645187497138977, |
| "std": 0.050187092274427414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.2714308202266693, |
| "max": 0.3094487190246582, |
| "mean": 0.00011228019138798118, |
| "std": 0.04068155214190483, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.1052371934056282, |
| "max": 0.026651456952095032, |
| "mean": -0.029516855254769325, |
| "std": 0.017926618456840515, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.33875298500061035, |
| "max": 0.3289111852645874, |
| "mean": 5.248367233434692e-05, |
| "std": 0.03441265597939491, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.1814928501844406, |
| "max": 0.04225185513496399, |
| "mean": -0.0010585930431261659, |
| "std": 0.017206743359565735, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.325328528881073, |
| "max": 0.6851887106895447, |
| "mean": 0.5111891627311707, |
| "std": 0.03689680993556976, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.2336086481809616, |
| "max": 0.2251969277858734, |
| "mean": -3.625164390541613e-05, |
| "std": 0.039176031947135925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11540839821100235, |
| "max": 0.13177232444286346, |
| "mean": 0.00015377491945400834, |
| "std": 0.029171116650104523, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.35232973098754883, |
| "max": 0.2849805951118469, |
| "mean": 6.946377197891707e-06, |
| "std": 0.0392446406185627, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.128444194793701, |
| "max": 3.5404324531555176, |
| "mean": -0.011580632999539375, |
| "std": 0.6822744011878967, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21085655689239502, |
| "max": 0.20925314724445343, |
| "mean": 3.461689630057663e-05, |
| "std": 0.03448476642370224, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03582029417157173, |
| "max": 0.0481770783662796, |
| "mean": 0.000791961036156863, |
| "std": 0.012865905649960041, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.2102348804473877, |
| "max": 0.19295428693294525, |
| "mean": -1.266141225642059e-06, |
| "std": 0.03169584646821022, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18637949228286743, |
| "max": 0.17694726586341858, |
| "mean": -0.0028348618652671576, |
| "std": 0.058624111115932465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.47455769777297974, |
| "max": 1.0399035215377808, |
| "mean": 0.6513059735298157, |
| "std": 0.049517374485731125, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.2480839341878891, |
| "max": 0.32886141538619995, |
| "mean": 0.00018076057313010097, |
| "std": 0.040569957345724106, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12484849989414215, |
| "max": 0.024815550073981285, |
| "mean": -0.030500907450914383, |
| "std": 0.01760847680270672, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.42022550106048584, |
| "max": 0.4810453951358795, |
| "mean": -1.3774351828033105e-06, |
| "std": 0.03539680689573288, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15139424800872803, |
| "max": 0.04337864741683006, |
| "mean": 4.9671380111249164e-05, |
| "std": 0.014884358271956444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.3155671954154968, |
| "max": 0.6806262135505676, |
| "mean": 0.5528896450996399, |
| "std": 0.04069091007113457, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20614612102508545, |
| "max": 0.2194698005914688, |
| "mean": 3.180014027748257e-05, |
| "std": 0.038299210369586945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13776730000972748, |
| "max": 0.11263402551412582, |
| "mean": 2.7509784558787942e-05, |
| "std": 0.02582019381225109, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.4022030830383301, |
| "max": 0.3703415095806122, |
| "mean": 2.5775392714422196e-05, |
| "std": 0.03817988187074661, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.767340898513794, |
| "max": 2.8659963607788086, |
| "mean": 0.0011514686048030853, |
| "std": 0.5165835022926331, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20330490171909332, |
| "max": 0.1975128948688507, |
| "mean": 2.9661892767762765e-05, |
| "std": 0.03429696336388588, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.05067470669746399, |
| "max": 0.03985888883471489, |
| "mean": -0.0004201547708362341, |
| "std": 0.013416973873972893, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19610381126403809, |
| "max": 0.20185545086860657, |
| "mean": -1.2482038982852828e-05, |
| "std": 0.031804922968149185, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19282294809818268, |
| "max": 0.19485345482826233, |
| "mean": -0.0029612130019813776, |
| "std": 0.06253436952829361, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.3490590453147888, |
| "max": 1.081492304801941, |
| "mean": 0.6670613884925842, |
| "std": 0.05502287670969963, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22548414766788483, |
| "max": 0.2509278655052185, |
| "mean": 0.00035874126479029655, |
| "std": 0.04075963795185089, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.0911286398768425, |
| "max": 0.043736688792705536, |
| "mean": -0.03008149564266205, |
| "std": 0.017609886825084686, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.3527411222457886, |
| "max": 0.30355900526046753, |
| "mean": -4.3905802158406004e-05, |
| "std": 0.037122152745723724, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16155573725700378, |
| "max": 0.06323426961898804, |
| "mean": -8.016945503186435e-05, |
| "std": 0.019409824162721634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34882256388664246, |
| "max": 0.7205829620361328, |
| "mean": 0.5423275232315063, |
| "std": 0.03903055191040039, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21910026669502258, |
| "max": 0.2230084389448166, |
| "mean": -1.1230863492528442e-05, |
| "std": 0.03923042118549347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11831706017255783, |
| "max": 0.17028944194316864, |
| "mean": 0.0002854751655831933, |
| "std": 0.02510806918144226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24612674117088318, |
| "max": 0.3002479076385498, |
| "mean": -3.693345206556842e-05, |
| "std": 0.03892989829182625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.501706838607788, |
| "max": 3.7109532356262207, |
| "mean": 0.015846284106373787, |
| "std": 0.7818700075149536, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.2186352014541626, |
| "max": 0.2372058928012848, |
| "mean": -1.3363219295570161e-05, |
| "std": 0.03630276769399643, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04711708053946495, |
| "max": 0.05125221982598305, |
| "mean": 0.00047675782116129994, |
| "std": 0.013513283804059029, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.2137574851512909, |
| "max": 0.2170482724905014, |
| "mean": 5.6474542361684144e-05, |
| "std": 0.033615030348300934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21112386882305145, |
| "max": 0.23111283779144287, |
| "mean": -0.005101324524730444, |
| "std": 0.06186835095286369, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36194419860839844, |
| "max": 1.0987720489501953, |
| "mean": 0.6991980671882629, |
| "std": 0.05339714512228966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23452329635620117, |
| "max": 0.24459832906723022, |
| "mean": 0.0004634420620277524, |
| "std": 0.041268572211265564, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09795372933149338, |
| "max": 0.0681690126657486, |
| "mean": -0.031430941075086594, |
| "std": 0.018122123554348946, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.3014773726463318, |
| "max": 0.3510685861110687, |
| "mean": -8.210168743971735e-05, |
| "std": 0.04027429223060608, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.15211886167526245, |
| "max": 0.14952634274959564, |
| "mean": 0.0002581052831374109, |
| "std": 0.023030627518892288, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9992543458938599, |
| "max": 1.000257968902588, |
| "mean": 0.9997284412384033, |
| "std": 0.00024261184444185346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031257662922143936, |
| "max": 0.03125471994280815, |
| "mean": -1.929123027366586e-05, |
| "std": 0.018041206523776054, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.03122766688466072, |
| "max": 0.030988017097115517, |
| "mean": -0.0010841797338798642, |
| "std": 0.01795079931616783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031254444271326065, |
| "max": 0.031258873641490936, |
| "mean": 3.5479256439430173e-06, |
| "std": 0.018041614443063736, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031154906377196312, |
| "max": 0.03117496706545353, |
| "mean": 0.0003339025133755058, |
| "std": 0.018063001334667206, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0006141028716228902, |
| "max": 0.0004136512288823724, |
| "mean": 1.3743268709731638e-06, |
| "std": 0.0001376789587084204, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9981284141540527, |
| "max": 1.001622200012207, |
| "mean": 0.9998474717140198, |
| "std": 0.0006079401355236769, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.032770540565252304, |
| "max": 0.032834719866514206, |
| "mean": -6.686397682642564e-06, |
| "std": 0.01804281771183014, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.032758843153715134, |
| "max": 0.03259320184588432, |
| "mean": -0.00013118298375047743, |
| "std": 0.017956331372261047, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.001173654804006219, |
| "max": 0.0011514672078192234, |
| "mean": 3.6397079838934587e-07, |
| "std": 0.00021431130880955607, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0005246364744380116, |
| "max": 0.000398451229557395, |
| "mean": 2.265020839331555e-06, |
| "std": 0.0001267467887373641, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.38304001092910767, |
| "max": 0.717822790145874, |
| "mean": 0.5806512236595154, |
| "std": 0.03879348561167717, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.2381902039051056, |
| "max": 0.1962050199508667, |
| "mean": 2.6112733394256793e-05, |
| "std": 0.03746553510427475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11878937482833862, |
| "max": 0.16630207002162933, |
| "mean": 0.0009804379660636187, |
| "std": 0.027551008388400078, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.24597673118114471, |
| "max": 0.499647855758667, |
| "mean": -5.027425504522398e-05, |
| "std": 0.03762295842170715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.9381461143493652, |
| "max": 3.7654519081115723, |
| "mean": -0.003569968044757843, |
| "std": 0.6810594201087952, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22724951803684235, |
| "max": 0.25177428126335144, |
| "mean": -1.1575086318771355e-05, |
| "std": 0.037434518337249756, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07160108536481857, |
| "max": 0.08055920898914337, |
| "mean": -0.0005123723531141877, |
| "std": 0.015660181641578674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22791653871536255, |
| "max": 0.25741860270500183, |
| "mean": -2.8733527869917452e-05, |
| "std": 0.035421404987573624, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20038263499736786, |
| "max": 0.21485595405101776, |
| "mean": -0.005531632341444492, |
| "std": 0.06833721697330475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4051814377307892, |
| "max": 1.186793327331543, |
| "mean": 0.7378474473953247, |
| "std": 0.055015575140714645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.2207704335451126, |
| "max": 0.24539422988891602, |
| "mean": 0.0005212163086980581, |
| "std": 0.04133594036102295, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10323301702737808, |
| "max": 0.02423531748354435, |
| "mean": -0.03266426920890808, |
| "std": 0.018886635079979897, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.44897761940956116, |
| "max": 0.42180517315864563, |
| "mean": -0.0004341494059190154, |
| "std": 0.04689624160528183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.25117069482803345, |
| "max": 0.46963006258010864, |
| "mean": 0.003201500279828906, |
| "std": 0.044517986476421356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3168058395385742, |
| "max": 0.3330129086971283, |
| "mean": -2.5202643882948905e-05, |
| "std": 0.021287493407726288, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.32449325919151306, |
| "max": 0.6839006543159485, |
| "mean": 0.5709657073020935, |
| "std": 0.04467146471142769, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16424405574798584, |
| "max": 0.1741371899843216, |
| "mean": -4.883421570411883e-05, |
| "std": 0.033180903643369675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18656986951828003, |
| "max": 0.14275068044662476, |
| "mean": 4.2517087422311306e-05, |
| "std": 0.029676001518964767, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.3805179297924042, |
| "max": 0.24586445093154907, |
| "mean": -9.98385530692758e-06, |
| "std": 0.03276193141937256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6520333290100098, |
| "max": 3.2866697311401367, |
| "mean": -0.01423930749297142, |
| "std": 0.984977662563324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23466402292251587, |
| "max": 0.24725867807865143, |
| "mean": -1.800561039999593e-05, |
| "std": 0.04169729724526405, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07259472459554672, |
| "max": 0.15434128046035767, |
| "mean": 0.0006652789888903499, |
| "std": 0.02516855113208294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2662595510482788, |
| "max": 0.24813267588615417, |
| "mean": -1.5347548469435424e-05, |
| "std": 0.04013809189200401, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18939754366874695, |
| "max": 0.19454091787338257, |
| "mean": -0.0012339097447693348, |
| "std": 0.06667902320623398, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32912713289260864, |
| "max": 0.9980567097663879, |
| "mean": 0.7191190719604492, |
| "std": 0.05222564935684204, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23154447972774506, |
| "max": 0.2451959252357483, |
| "mean": 0.00018269156862515956, |
| "std": 0.04089995473623276, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11424808949232101, |
| "max": 0.01902252808213234, |
| "mean": -0.04247482866048813, |
| "std": 0.018848657608032227, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.3893679976463318, |
| "max": 0.4069530963897705, |
| "mean": -2.1458035917021334e-05, |
| "std": 0.04853350669145584, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6924692392349243, |
| "max": 0.4121605455875397, |
| "mean": 0.0008477990049868822, |
| "std": 0.06026294827461243, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0010412124684080482, |
| "max": 1.00050687789917, |
| "mean": 0.00048820613301359117, |
| "std": 0.02208906039595604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9985182881355286, |
| "max": 1.000278115272522, |
| "mean": 0.9996296167373657, |
| "std": 0.0004832371196243912, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03125324100255966, |
| "max": 0.03125615417957306, |
| "mean": -2.1021265638410114e-05, |
| "std": 0.01803254708647728, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.03121461719274521, |
| "max": 0.031231539323925972, |
| "mean": -0.0006769909523427486, |
| "std": 0.017827048897743225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03125639632344246, |
| "max": 0.031260956078767776, |
| "mean": -8.831522791297175e-06, |
| "std": 0.018031572923064232, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.03123198263347149, |
| "max": 0.031244853511452675, |
| "mean": -0.0007297562551684678, |
| "std": 0.017941949889063835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.0004176551883574575, |
| "max": 0.0003318839881103486, |
| "mean": -3.140859689665376e-06, |
| "std": 0.00011632459791144356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9979198575019836, |
| "max": 1.0014318227767944, |
| "mean": 0.9994964599609375, |
| "std": 0.0006108160014264286, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03245294839143753, |
| "max": 0.032378438860177994, |
| "mean": -1.7318175196123775e-06, |
| "std": 0.018028022721409798, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.03213566541671753, |
| "max": 0.03115900792181492, |
| "mean": -0.0003739359090104699, |
| "std": 0.018043629825115204, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0012771300971508026, |
| "max": 0.0011123745935037732, |
| "mean": -8.958944022197102e-07, |
| "std": 0.00020973320351913571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.00034164811950176954, |
| "max": 0.0002967154432553798, |
| "mean": -3.7618522128468612e-06, |
| "std": 0.00010472961730556563, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.2341979742050171, |
| "max": 0.27227067947387695, |
| "mean": 6.760874839528697e-06, |
| "std": 0.01880943961441517, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32133588194847107, |
| "max": 0.6926518678665161, |
| "mean": 0.5816141963005066, |
| "std": 0.04592034965753555, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.1816624104976654, |
| "max": 0.19737666845321655, |
| "mean": -1.1567326509975828e-05, |
| "std": 0.03318365663290024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16045045852661133, |
| "max": 0.12930794060230255, |
| "mean": -0.0010751842055469751, |
| "std": 0.03413202986121178, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.3320204019546509, |
| "max": 0.31095007061958313, |
| "mean": -1.016673104459187e-05, |
| "std": 0.032234374433755875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.795230388641357, |
| "max": 8.753500938415527, |
| "mean": 0.09339793026447296, |
| "std": 1.6184653043746948, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23359645903110504, |
| "max": 0.2416210174560547, |
| "mean": 4.149888991378248e-05, |
| "std": 0.04085618630051613, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07583926618099213, |
| "max": 0.06566201150417328, |
| "mean": 0.0004832554841414094, |
| "std": 0.01940709352493286, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24546822905540466, |
| "max": 0.23373769223690033, |
| "mean": -3.0527116905432194e-06, |
| "std": 0.03943083807826042, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16301113367080688, |
| "max": 0.16089561581611633, |
| "mean": 0.0016276519745588303, |
| "std": 0.06527570635080338, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.556946873664856, |
| "max": 0.9415686726570129, |
| "mean": 0.7127838134765625, |
| "std": 0.03996752202510834, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22765818238258362, |
| "max": 0.25477662682533264, |
| "mean": -4.5632557885255665e-05, |
| "std": 0.04057467356324196, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.1348292976617813, |
| "max": 0.022138668224215508, |
| "mean": -0.04134812578558922, |
| "std": 0.01838543266057968, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.42094686627388, |
| "max": 0.3921053111553192, |
| "mean": -4.4014304876327515e-06, |
| "std": 0.04778384044766426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6069029569625854, |
| "max": 0.6509266495704651, |
| "mean": 0.0015840512933209538, |
| "std": 0.05682184174656868, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.25153595209121704, |
| "max": 0.320549339056015, |
| "mean": -6.0848738030472305e-06, |
| "std": 0.019612807780504227, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.35961607098579407, |
| "max": 0.6813214421272278, |
| "mean": 0.570705771446228, |
| "std": 0.04296967759728432, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22012382745742798, |
| "max": 0.17660681903362274, |
| "mean": -3.47153763868846e-05, |
| "std": 0.03429870679974556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.1630830317735672, |
| "max": 0.23280400037765503, |
| "mean": 0.00036220261245034635, |
| "std": 0.03281139209866524, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.263581246137619, |
| "max": 0.23967352509498596, |
| "mean": -5.2856208640150726e-05, |
| "std": 0.03389754518866539, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.849710464477539, |
| "max": 5.085712909698486, |
| "mean": 0.043873172253370285, |
| "std": 1.2286995649337769, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24600939452648163, |
| "max": 0.25006523728370667, |
| "mean": 7.234106305986643e-05, |
| "std": 0.04398686811327934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06254445016384125, |
| "max": 0.054417435079813004, |
| "mean": 0.0006422345177270472, |
| "std": 0.017186632379889488, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.28586557507514954, |
| "max": 0.2718929648399353, |
| "mean": -5.018173033022322e-05, |
| "std": 0.0429849736392498, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.1608622968196869, |
| "max": 0.17021305859088898, |
| "mean": -0.0028866538777947426, |
| "std": 0.05928993597626686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.519731879234314, |
| "max": 0.9308202266693115, |
| "mean": 0.7133743166923523, |
| "std": 0.03828318044543266, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23790688812732697, |
| "max": 0.24848711490631104, |
| "mean": 0.00046475647832266986, |
| "std": 0.04045366868376732, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14495447278022766, |
| "max": 0.04111183062195778, |
| "mean": -0.039693139493465424, |
| "std": 0.020540453493595123, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5317410826683044, |
| "max": 0.581489622592926, |
| "mean": 5.736372258979827e-06, |
| "std": 0.04885946586728096, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5184876322746277, |
| "max": 0.4928899109363556, |
| "mean": 0.002365314168855548, |
| "std": 0.05342720076441765, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.27367857098579407, |
| "max": 0.3154536187648773, |
| "mean": 2.0265892999304924e-06, |
| "std": 0.020049458369612694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36605367064476013, |
| "max": 0.7104601860046387, |
| "mean": 0.5931398272514343, |
| "std": 0.04595194756984711, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21068720519542694, |
| "max": 0.19896060228347778, |
| "mean": 3.061807728954591e-05, |
| "std": 0.03486604616045952, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18698948621749878, |
| "max": 0.20358456671237946, |
| "mean": 0.0009543596534058452, |
| "std": 0.03149386867880821, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.2894982397556305, |
| "max": 0.339619904756546, |
| "mean": -4.7122804971877486e-05, |
| "std": 0.034586917608976364, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.8732118606567383, |
| "max": 3.3837733268737793, |
| "mean": 0.014458216726779938, |
| "std": 0.8580982089042664, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.224315345287323, |
| "max": 0.24964982271194458, |
| "mean": -3.871130957122659e-06, |
| "std": 0.042229585349559784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.055275678634643555, |
| "max": 0.04663092643022537, |
| "mean": -1.647317549213767e-05, |
| "std": 0.015846259891986847, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2928326427936554, |
| "max": 0.29024964570999146, |
| "mean": -7.346136044361629e-06, |
| "std": 0.04194441810250282, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12488731741905212, |
| "max": 0.2587108016014099, |
| "mean": -0.0032421478535979986, |
| "std": 0.05317580699920654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4563259780406952, |
| "max": 0.8424069881439209, |
| "mean": 0.7054323554039001, |
| "std": 0.03509839251637459, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5117396712303162, |
| "max": 0.34794938564300537, |
| "mean": 0.00034281908301636577, |
| "std": 0.04019879177212715, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.1857415735721588, |
| "max": 0.03958635777235031, |
| "mean": -0.03938839212059975, |
| "std": 0.021348465234041214, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5434486865997314, |
| "max": 0.5551662445068359, |
| "mean": -7.160313543863595e-05, |
| "std": 0.050734180957078934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5112110376358032, |
| "max": 0.6635048389434814, |
| "mean": 0.002443352248519659, |
| "std": 0.04949941858649254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3325079083442688, |
| "max": 0.2651371359825134, |
| "mean": 3.4327572393522132e-06, |
| "std": 0.019386671483516693, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.3219457268714905, |
| "max": 0.7650159597396851, |
| "mean": 0.6510248780250549, |
| "std": 0.04531543329358101, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.24919819831848145, |
| "max": 0.21938340365886688, |
| "mean": -2.0984125512768514e-06, |
| "std": 0.03650059178471565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32654333114624023, |
| "max": 0.2866538465023041, |
| "mean": -0.0006891752709634602, |
| "std": 0.03852362558245659, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.30977994203567505, |
| "max": 0.36965611577033997, |
| "mean": 6.506919453386217e-05, |
| "std": 0.03624110668897629, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.713971138000488, |
| "max": 5.803556442260742, |
| "mean": 0.03793709725141525, |
| "std": 1.412732481956482, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22124992311000824, |
| "max": 0.20528917014598846, |
| "mean": -7.50878534745425e-05, |
| "std": 0.042485084384679794, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07763200253248215, |
| "max": 0.05141681060194969, |
| "mean": -0.0009281833190470934, |
| "std": 0.01641252264380455, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.33066344261169434, |
| "max": 0.32909321784973145, |
| "mean": -4.5878937271481846e-06, |
| "std": 0.04279147461056709, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2844299376010895, |
| "max": 0.1119050681591034, |
| "mean": -0.001205054228194058, |
| "std": 0.0470142662525177, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.48612144589424133, |
| "max": 0.8848820328712463, |
| "mean": 0.7373377084732056, |
| "std": 0.03814017400145531, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.36209484934806824, |
| "max": 0.2740732431411743, |
| "mean": 5.125169991515577e-05, |
| "std": 0.04064430668950081, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.2473653107881546, |
| "max": 0.046401649713516235, |
| "mean": -0.03926541656255722, |
| "std": 0.02327280305325985, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6253157258033752, |
| "max": 0.5961773991584778, |
| "mean": -6.133734132163227e-05, |
| "std": 0.0531163364648819, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7087676525115967, |
| "max": 0.2656005322933197, |
| "mean": 0.0009179539047181606, |
| "std": 0.05120791867375374, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3432927131652832, |
| "max": 0.3036082684993744, |
| "mean": 1.7233912785741268e-07, |
| "std": 0.01913507841527462, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34983396530151367, |
| "max": 0.78127521276474, |
| "mean": 0.6388033628463745, |
| "std": 0.04922258108854294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20482076704502106, |
| "max": 0.20643775165081024, |
| "mean": -5.993415470584296e-05, |
| "std": 0.037695497274398804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.2582729458808899, |
| "max": 0.2677401304244995, |
| "mean": -0.0004000938788522035, |
| "std": 0.04457787051796913, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3535555303096771, |
| "max": 0.3218846917152405, |
| "mean": -7.005222414591117e-06, |
| "std": 0.03720390424132347, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.2560133934021, |
| "max": 4.200046062469482, |
| "mean": -0.026399940252304077, |
| "std": 1.0062882900238037, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.2381831258535385, |
| "max": 0.24307270348072052, |
| "mean": -2.52762038144283e-05, |
| "std": 0.0432097353041172, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.0622570626437664, |
| "max": 0.05666593089699745, |
| "mean": 0.0003454152902122587, |
| "std": 0.014151728712022305, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.43709275126457214, |
| "max": 0.37350907921791077, |
| "mean": 1.4359582564793527e-05, |
| "std": 0.04412123188376427, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09637399762868881, |
| "max": 0.17579396069049835, |
| "mean": -0.00066028768196702, |
| "std": 0.035156894475221634, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4216686189174652, |
| "max": 1.067047357559204, |
| "mean": 0.7483223080635071, |
| "std": 0.04198553413152695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.26631179451942444, |
| "max": 0.2965000867843628, |
| "mean": -7.944944081827998e-05, |
| "std": 0.040804266929626465, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.1849687099456787, |
| "max": 0.04366198182106018, |
| "mean": -0.03681465983390808, |
| "std": 0.025593994185328484, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4571255147457123, |
| "max": 0.4859236776828766, |
| "mean": 4.341108797234483e-05, |
| "std": 0.05420951172709465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.28613921999931335, |
| "max": 0.5508683919906616, |
| "mean": -0.0008792161825112998, |
| "std": 0.04781510680913925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2926841676235199, |
| "max": 0.3227182626724243, |
| "mean": 6.155195478640962e-06, |
| "std": 0.019968634471297264, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.29101473093032837, |
| "max": 0.7585480213165283, |
| "mean": 0.6508181095123291, |
| "std": 0.05212597921490669, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.24345757067203522, |
| "max": 0.2612913250923157, |
| "mean": -6.02660793447285e-06, |
| "std": 0.03961166366934776, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2671639025211334, |
| "max": 0.19983193278312683, |
| "mean": -0.0008803074015304446, |
| "std": 0.05174032971262932, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2718494236469269, |
| "max": 0.25337839126586914, |
| "mean": 4.495690518524498e-06, |
| "std": 0.0387086495757103, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.951557159423828, |
| "max": 15.930760383605957, |
| "mean": 0.03321323171257973, |
| "std": 1.9877210855484009, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.2069142907857895, |
| "max": 0.225667342543602, |
| "mean": -7.223337888717651e-05, |
| "std": 0.04055356606841087, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06923694908618927, |
| "max": 0.06314270943403244, |
| "mean": 0.00015547810471616685, |
| "std": 0.0147401699796319, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.4649372100830078, |
| "max": 0.3204408884048462, |
| "mean": 1.968499054783024e-05, |
| "std": 0.04058866575360298, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06409196555614471, |
| "max": 0.11513285338878632, |
| "mean": 0.0011910968460142612, |
| "std": 0.024711282923817635, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.374662309885025, |
| "max": 0.9300851821899414, |
| "mean": 0.7508615255355835, |
| "std": 0.04013195261359215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2791317403316498, |
| "max": 0.2725660502910614, |
| "mean": -0.00016837481234688312, |
| "std": 0.040994856506586075, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.1984652727842331, |
| "max": 0.05115879327058792, |
| "mean": -0.03202404826879501, |
| "std": 0.02509358339011669, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6568311452865601, |
| "max": 0.5346067547798157, |
| "mean": -4.890329364570789e-05, |
| "std": 0.052846092730760574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19282352924346924, |
| "max": 0.5817168354988098, |
| "mean": -0.0005141475703567266, |
| "std": 0.04106360301375389, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41765207052230835, |
| "max": 0.3718544840812683, |
| "mean": 6.159986696729902e-06, |
| "std": 0.02162080444395542, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21428614854812622, |
| "max": 0.7470263838768005, |
| "mean": 0.6495206356048584, |
| "std": 0.05435969680547714, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20919783413410187, |
| "max": 0.19538012146949768, |
| "mean": 4.023606743430719e-05, |
| "std": 0.03946175053715706, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.32906630635261536, |
| "max": 0.25917014479637146, |
| "mean": -0.003227022010833025, |
| "std": 0.05624230206012726, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.20558328926563263, |
| "max": 0.2543526589870453, |
| "mean": 5.4226169595494866e-05, |
| "std": 0.038564346730709076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.239154815673828, |
| "max": 6.927591800689697, |
| "mean": 0.04829341918230057, |
| "std": 1.3845902681350708, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20949970185756683, |
| "max": 0.22989487648010254, |
| "mean": -5.106569460622268e-06, |
| "std": 0.0413125716149807, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04377944767475128, |
| "max": 0.035965293645858765, |
| "mean": 6.696500349789858e-07, |
| "std": 0.012799888849258423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.39747685194015503, |
| "max": 0.3446802794933319, |
| "mean": -5.5516902648378164e-05, |
| "std": 0.0423889197409153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.05503125116229057, |
| "max": 0.06271757930517197, |
| "mean": 0.00036430457839742303, |
| "std": 0.018672339618206024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.35033905506134033, |
| "max": 1.0429264307022095, |
| "mean": 0.7893730998039246, |
| "std": 0.048677314072847366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3334490656852722, |
| "max": 0.38581615686416626, |
| "mean": -0.00016950252756942064, |
| "std": 0.0414799265563488, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.1571725308895111, |
| "max": 0.059094030410051346, |
| "mean": -0.031832072883844376, |
| "std": 0.025125639513134956, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6957246661186218, |
| "max": 0.4681403636932373, |
| "mean": -8.918362436816096e-05, |
| "std": 0.051792457699775696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24794545769691467, |
| "max": 0.32831111550331116, |
| "mean": -0.000254548795055598, |
| "std": 0.04142748937010765, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.286994069814682, |
| "max": 0.35009774565696716, |
| "mean": -2.1362816369219217e-06, |
| "std": 0.0242360457777977, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.1966284215450287, |
| "max": 0.7790648937225342, |
| "mean": 0.6702556014060974, |
| "std": 0.058683399111032486, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22847537696361542, |
| "max": 0.23085317015647888, |
| "mean": -1.998914376599714e-05, |
| "std": 0.04043750837445259, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.2196640521287918, |
| "max": 0.2406841218471527, |
| "mean": 0.0007778428844176233, |
| "std": 0.05581061542034149, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21546684205532074, |
| "max": 0.22625623643398285, |
| "mean": -7.170689787017182e-05, |
| "std": 0.039373625069856644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.899069786071777, |
| "max": 9.061844825744629, |
| "mean": -0.0012379959225654602, |
| "std": 1.8475514650344849, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2690274119377136, |
| "max": 0.2585972249507904, |
| "mean": 4.365673885331489e-05, |
| "std": 0.038405876606702805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05762965977191925, |
| "max": 0.057730112224817276, |
| "mean": 0.00035032647429034114, |
| "std": 0.014716975390911102, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.2643204629421234, |
| "max": 0.28830888867378235, |
| "mean": -6.177595059853047e-05, |
| "std": 0.03907199949026108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04382891580462456, |
| "max": 0.03727584704756737, |
| "mean": -8.995864482130855e-05, |
| "std": 0.013357071205973625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.3394246995449066, |
| "max": 1.0903522968292236, |
| "mean": 0.8637199997901917, |
| "std": 0.06381762027740479, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.4231264889240265, |
| "max": 0.41881492733955383, |
| "mean": 0.00031262467382475734, |
| "std": 0.04350043460726738, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21452167630195618, |
| "max": 0.1706276834011078, |
| "mean": -0.029481077566742897, |
| "std": 0.03191966935992241, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5986213088035583, |
| "max": 0.5590333342552185, |
| "mean": -0.00015086884377524257, |
| "std": 0.05344516038894653, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17835262417793274, |
| "max": 0.3764508068561554, |
| "mean": 0.0013586997520178556, |
| "std": 0.03730103746056557, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.3942283093929291, |
| "max": 0.3688967823982239, |
| "mean": 3.6990095395594835e-05, |
| "std": 0.028617417439818382, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2902565002441406, |
| "max": 0.8266182541847229, |
| "mean": 0.7055412530899048, |
| "std": 0.06787826120853424, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9262580275535583, |
| "max": 1.0264337062835693, |
| "mean": -2.6147403332288377e-05, |
| "std": 0.04762481153011322, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8780329823493958, |
| "max": 0.8147000074386597, |
| "mean": -0.0003064283519051969, |
| "std": 0.09549984335899353, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.2694474458694458, |
| "max": 0.2405342310667038, |
| "mean": -2.2794924007030204e-05, |
| "std": 0.03895170986652374, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.725736618041992, |
| "max": 22.834732055664062, |
| "mean": -0.09184679388999939, |
| "std": 4.068049430847168, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22741694748401642, |
| "max": 0.2447165697813034, |
| "mean": -2.5723496946739033e-05, |
| "std": 0.03863721713423729, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06024840846657753, |
| "max": 0.04582807794213295, |
| "mean": -0.00014292271225713193, |
| "std": 0.014692682772874832, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.337954580783844, |
| "max": 0.3742024004459381, |
| "mean": 7.330418156925589e-06, |
| "std": 0.04081300273537636, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04640491306781769, |
| "max": 0.19541829824447632, |
| "mean": 0.00027370243333280087, |
| "std": 0.013559137471020222, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.3744112551212311, |
| "max": 1.1277745962142944, |
| "mean": 0.8900341987609863, |
| "std": 0.06396359950304031, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.4476565718650818, |
| "max": 0.5421170592308044, |
| "mean": 2.477337693562731e-05, |
| "std": 0.04556567594408989, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.2238994538784027, |
| "max": 0.0882241502404213, |
| "mean": -0.03201638162136078, |
| "std": 0.03775238245725632, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7243073582649231, |
| "max": 0.6882233619689941, |
| "mean": 3.4276417864020914e-05, |
| "std": 0.05177783966064453, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.17440874874591827, |
| "max": 0.2182954102754593, |
| "mean": 4.099373472854495e-05, |
| "std": 0.0317707397043705, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.33985471725463867, |
| "max": 0.3734351098537445, |
| "mean": 4.3027404899476096e-05, |
| "std": 0.03413975238800049, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.31756407022476196, |
| "max": 1.2844599485397339, |
| "mean": 0.6014232039451599, |
| "std": 0.08331646770238876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2830894887447357, |
| "max": 0.260119765996933, |
| "mean": -2.825315732479794e-06, |
| "std": 0.03598077595233917, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23531799018383026, |
| "max": 0.20526045560836792, |
| "mean": 0.00023797567700967193, |
| "std": 0.05601158365607262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.43513408303260803, |
| "max": 0.324799120426178, |
| "mean": 2.434128509776201e-05, |
| "std": 0.03413143381476402, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.539924144744873, |
| "max": 7.305825233459473, |
| "mean": -0.007350243628025055, |
| "std": 0.6986610889434814, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.3433971107006073, |
| "max": 0.36268630623817444, |
| "mean": 0.00010339625441702083, |
| "std": 0.047828007489442825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07370211184024811, |
| "max": 0.06033240258693695, |
| "mean": 0.0009340607211925089, |
| "std": 0.014942350797355175, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2555631995201111, |
| "max": 0.28619974851608276, |
| "mean": 4.566820280160755e-06, |
| "std": 0.04155479371547699, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05527225881814957, |
| "max": 0.0627666711807251, |
| "mean": 0.00013802105968352407, |
| "std": 0.0071632144972682, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49384805560112, |
| "max": 1.2211062908172607, |
| "mean": 1.0134272575378418, |
| "std": 0.11744718253612518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.093487024307251, |
| "max": 1.046884298324585, |
| "mean": -4.944120883010328e-05, |
| "std": 0.052408553659915924, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22308824956417084, |
| "max": 0.17253872752189636, |
| "mean": -0.027238916605710983, |
| "std": 0.036325786262750626, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8834213018417358, |
| "max": 0.921511173248291, |
| "mean": -0.00014601324801333249, |
| "std": 0.05328161269426346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17091798782348633, |
| "max": 0.3795103430747986, |
| "mean": 0.0033677970059216022, |
| "std": 0.039878927171230316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7767993211746216, |
| "max": 0.7229223251342773, |
| "mean": 1.8964092305395752e-05, |
| "std": 0.04616083949804306, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3385705351829529, |
| "max": 1.4257850646972656, |
| "mean": 0.948320209980011, |
| "std": 0.20674099028110504, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7456356287002563, |
| "max": 1.7042957544326782, |
| "mean": 0.00022721664572600275, |
| "std": 0.1586850881576538, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.1983858346939087, |
| "max": 1.0988513231277466, |
| "mean": -0.009531477466225624, |
| "std": 0.20368283987045288, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4208756983280182, |
| "max": 0.4265652298927307, |
| "mean": 6.4577761804685e-05, |
| "std": 0.0480157844722271, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.72553825378418, |
| "max": 19.520837783813477, |
| "mean": -0.2481747567653656, |
| "std": 4.772479057312012, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32345694303512573, |
| "max": 0.4378505349159241, |
| "mean": -1.1984889169980306e-05, |
| "std": 0.04616131633520126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03403974324464798, |
| "max": 0.03704509884119034, |
| "mean": 0.0006423466256819665, |
| "std": 0.012919273227453232, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7029122710227966, |
| "max": 0.6650063395500183, |
| "mean": 4.321677261032164e-05, |
| "std": 0.05788154527544975, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07217518985271454, |
| "max": 0.06747341901063919, |
| "mean": -0.00013201506226323545, |
| "std": 0.012908914126455784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.38026899099349976, |
| "max": 1.3915380239486694, |
| "mean": 1.0665700435638428, |
| "std": 0.2197078913450241, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6161525845527649, |
| "max": 0.7168518304824829, |
| "mean": 0.00011199730215594172, |
| "std": 0.058020394295454025, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21944588422775269, |
| "max": 0.22491848468780518, |
| "mean": 0.00621908949688077, |
| "std": 0.049715615808963776, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6295903325080872, |
| "max": 0.8891246914863586, |
| "mean": 1.184111533802934e-05, |
| "std": 0.023527733981609344, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5063257217407227, |
| "max": 0.4734645485877991, |
| "mean": -0.0030142185278236866, |
| "std": 0.06923094391822815, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.537803590297699, |
| "max": 1.1795684099197388, |
| "mean": 0.7827014327049255, |
| "std": 0.09878505766391754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2665232717990875, |
| "max": 0.21241135895252228, |
| "mean": -0.00022294482914730906, |
| "std": 0.05399605259299278, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23782978951931, |
| "max": 0.014834473840892315, |
| "mean": -0.04395260661840439, |
| "std": 0.034306950867176056, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |