| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.43031466007232666, |
| "max": 0.298143208026886, |
| "mean": -0.0025431362446397543, |
| "std": 0.042562514543533325, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.0631568506360054, |
| "max": 0.10771193355321884, |
| "mean": 0.0006426331819966435, |
| "std": 0.03407834470272064, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4127056896686554, |
| "max": 0.8369137644767761, |
| "mean": -0.00020141302957199514, |
| "std": 0.024111632257699966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.11548846960067749, |
| "max": 0.3221578299999237, |
| "mean": -0.0009410656057298183, |
| "std": 0.019580261781811714, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7946255207061768, |
| "max": 2.873370885848999, |
| "mean": -0.0003634353051893413, |
| "std": 0.6154844164848328, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.2794482707977295, |
| "max": 0.38173243403434753, |
| "mean": 0.0004242636787239462, |
| "std": 0.042748358100652695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22289495170116425, |
| "max": 0.21001911163330078, |
| "mean": -0.004489608108997345, |
| "std": 0.040950216352939606, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4283224046230316, |
| "max": 0.4761110544204712, |
| "mean": 3.962942628277233e-06, |
| "std": 0.02451062761247158, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32575708627700806, |
| "max": 0.1571168750524521, |
| "mean": -0.04673216491937637, |
| "std": 0.051645807921886444, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4105567932128906, |
| "max": 0.3547790050506592, |
| "mean": -0.0001310346560785547, |
| "std": 0.02360442653298378, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.23018451035022736, |
| "max": 0.2630932033061981, |
| "mean": -0.029156308621168137, |
| "std": 0.04940544068813324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2545531988143921, |
| "max": 0.8213090300559998, |
| "mean": 0.5256362557411194, |
| "std": 0.08106369525194168, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.2971626818180084, |
| "max": 0.26604607701301575, |
| "mean": -0.0004256928223185241, |
| "std": 0.03210251033306122, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09291917830705643, |
| "max": 0.1250312328338623, |
| "mean": 0.0006477435817942023, |
| "std": 0.025753259658813477, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.29085373878479004, |
| "max": 0.28159603476524353, |
| "mean": -7.506589463446289e-05, |
| "std": 0.030931703746318817, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.906967639923096, |
| "max": 5.821649074554443, |
| "mean": -0.009350163862109184, |
| "std": 1.296647071838379, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.42530331015586853, |
| "max": 0.3440260589122772, |
| "mean": 9.807322931010276e-05, |
| "std": 0.02995346300303936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.029081525281071663, |
| "max": 0.02767445333302021, |
| "mean": -0.00032374687725678086, |
| "std": 0.012576405890285969, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.45424115657806396, |
| "max": 0.4482896625995636, |
| "mean": 2.3885608243290335e-05, |
| "std": 0.02385384775698185, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08883396536111832, |
| "max": 0.09114022552967072, |
| "mean": 0.00228882092051208, |
| "std": 0.01952745020389557, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.26677191257476807, |
| "max": 1.0577468872070312, |
| "mean": 0.53135746717453, |
| "std": 0.10473316162824631, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5746102333068848, |
| "max": 0.6084363460540771, |
| "mean": -0.00043127068784087896, |
| "std": 0.03860073536634445, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18297578394412994, |
| "max": 0.0456179715692997, |
| "mean": -0.029477983713150024, |
| "std": 0.042657021433115005, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.1673263311386108, |
| "max": 1.6341116428375244, |
| "mean": 0.00032315164571627975, |
| "std": 0.02769668586552143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16250400245189667, |
| "max": 0.20589958131313324, |
| "mean": -0.02113456465303898, |
| "std": 0.027959568426012993, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22410069406032562, |
| "max": 0.8451111912727356, |
| "mean": 0.48777928948402405, |
| "std": 0.07542530447244644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.25582441687583923, |
| "max": 0.30595168471336365, |
| "mean": -6.705071427859366e-06, |
| "std": 0.03347504884004593, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09550327807664871, |
| "max": 0.11064136773347855, |
| "mean": 6.668796413578093e-05, |
| "std": 0.026976482942700386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.2973037660121918, |
| "max": 0.29644775390625, |
| "mean": 5.341449286788702e-05, |
| "std": 0.032546162605285645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.17097806930542, |
| "max": 5.091113090515137, |
| "mean": -0.01462231483310461, |
| "std": 1.1586002111434937, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.34501704573631287, |
| "max": 0.34340131282806396, |
| "mean": 7.8546792792622e-05, |
| "std": 0.030061908066272736, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.036109186708927155, |
| "max": 0.03340720757842064, |
| "mean": -0.00014173206000123173, |
| "std": 0.013041709549725056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3156168460845947, |
| "max": 0.3752053380012512, |
| "mean": -2.0681722162407823e-05, |
| "std": 0.02405940182507038, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10555326193571091, |
| "max": 0.12231862545013428, |
| "mean": -0.0019678983371704817, |
| "std": 0.028872456401586533, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.3113996386528015, |
| "max": 1.1224051713943481, |
| "mean": 0.6664633750915527, |
| "std": 0.0980152115225792, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8727887272834778, |
| "max": 0.6275914907455444, |
| "mean": 0.0016750607173889875, |
| "std": 0.047438763082027435, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27183517813682556, |
| "max": 0.034259725362062454, |
| "mean": -0.046628981828689575, |
| "std": 0.04063701629638672, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9230329394340515, |
| "max": 0.9648618102073669, |
| "mean": 0.0010213888017460704, |
| "std": 0.04070665314793587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14468412101268768, |
| "max": 0.07505139708518982, |
| "mean": -0.009096229448914528, |
| "std": 0.025706371292471886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.24036771059036255, |
| "max": 0.7140315771102905, |
| "mean": 0.4473647475242615, |
| "std": 0.05951203405857086, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.27264565229415894, |
| "max": 0.29809534549713135, |
| "mean": 9.332510671811178e-06, |
| "std": 0.03546958044171333, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11950661987066269, |
| "max": 0.11869802325963974, |
| "mean": 0.0007616454968228936, |
| "std": 0.02764517441391945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2813079059123993, |
| "max": 0.28023794293403625, |
| "mean": -7.719700079178438e-05, |
| "std": 0.0350990891456604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5128581523895264, |
| "max": 2.524867296218872, |
| "mean": 0.026786239817738533, |
| "std": 0.5873143672943115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.2213059961795807, |
| "max": 0.2717853784561157, |
| "mean": 2.9610819183290005e-06, |
| "std": 0.030732687562704086, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.03361261636018753, |
| "max": 0.03129349276423454, |
| "mean": 0.00011305588122922927, |
| "std": 0.012413612566888332, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23544403910636902, |
| "max": 0.23186075687408447, |
| "mean": 5.69116891711019e-05, |
| "std": 0.025696195662021637, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13601461052894592, |
| "max": 0.12754406034946442, |
| "mean": -0.005499254446476698, |
| "std": 0.03998684883117676, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.35436785221099854, |
| "max": 1.1737076044082642, |
| "mean": 0.7108283638954163, |
| "std": 0.10403098911046982, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6176053881645203, |
| "max": 0.5545136332511902, |
| "mean": 0.0011602240847423673, |
| "std": 0.04611964151263237, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.18876661360263824, |
| "max": 0.024967461824417114, |
| "mean": -0.03485583886504173, |
| "std": 0.028641268610954285, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1323436498641968, |
| "max": 0.9720706939697266, |
| "mean": 0.00035946519346907735, |
| "std": 0.042347442358732224, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.598772406578064, |
| "max": 0.06287988275289536, |
| "mean": -0.004880873020738363, |
| "std": 0.028635544702410698, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3750710189342499, |
| "max": 0.9418790340423584, |
| "mean": 0.5926927328109741, |
| "std": 0.06721659004688263, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3915771543979645, |
| "max": 0.3692559599876404, |
| "mean": 7.123942486941814e-05, |
| "std": 0.03718866407871246, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11907870322465897, |
| "max": 0.13665802776813507, |
| "mean": 0.0009319179225713015, |
| "std": 0.02926611341536045, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.619708240032196, |
| "max": 0.5092929005622864, |
| "mean": 1.5245183021761477e-05, |
| "std": 0.03644217178225517, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.197783470153809, |
| "max": 8.800565719604492, |
| "mean": -0.10938873887062073, |
| "std": 1.7007076740264893, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.2768491804599762, |
| "max": 0.2400088757276535, |
| "mean": 5.314283407642506e-05, |
| "std": 0.032615404576063156, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05213421210646629, |
| "max": 0.03957239165902138, |
| "mean": 9.133941057370976e-05, |
| "std": 0.012963276356458664, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23089444637298584, |
| "max": 0.2348451018333435, |
| "mean": -2.176157067879103e-05, |
| "std": 0.029391760006546974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20456741750240326, |
| "max": 0.10572919249534607, |
| "mean": -0.00402758177369833, |
| "std": 0.03263704851269722, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3400026261806488, |
| "max": 1.0141218900680542, |
| "mean": 0.7010252475738525, |
| "std": 0.09696138650178909, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5654259324073792, |
| "max": 0.8335409760475159, |
| "mean": 0.0004151407047174871, |
| "std": 0.04230234771966934, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.2119237780570984, |
| "max": 0.030580509454011917, |
| "mean": -0.03220224380493164, |
| "std": 0.026535935699939728, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7552511096000671, |
| "max": 0.7191816568374634, |
| "mean": -9.422379662282765e-06, |
| "std": 0.036842163652181625, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.26383838057518005, |
| "max": 0.10599514842033386, |
| "mean": -0.0030335707124322653, |
| "std": 0.028880203142762184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28429752588272095, |
| "max": 0.6961002945899963, |
| "mean": 0.49966490268707275, |
| "std": 0.046708256006240845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.27950623631477356, |
| "max": 0.23444026708602905, |
| "mean": -0.0001112212921725586, |
| "std": 0.03876311331987381, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15468573570251465, |
| "max": 0.12698474526405334, |
| "mean": -0.0022345406468957663, |
| "std": 0.033433251082897186, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41459208726882935, |
| "max": 0.6603645086288452, |
| "mean": -1.977803731278982e-05, |
| "std": 0.03910015523433685, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.243562698364258, |
| "max": 4.728666305541992, |
| "mean": -0.020446542650461197, |
| "std": 1.0085786581039429, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24519944190979004, |
| "max": 0.2077825665473938, |
| "mean": 4.388581874081865e-05, |
| "std": 0.033966176211833954, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.034593358635902405, |
| "max": 0.04485077038407326, |
| "mean": -1.7529440810903907e-05, |
| "std": 0.012629235163331032, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20095357298851013, |
| "max": 0.20613527297973633, |
| "mean": -2.959615085273981e-05, |
| "std": 0.03102371282875538, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.20019184052944183, |
| "max": 0.11357004940509796, |
| "mean": -0.0029205437749624252, |
| "std": 0.034529101103544235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.36704930663108826, |
| "max": 1.058448076248169, |
| "mean": 0.6707465052604675, |
| "std": 0.0665469765663147, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.3986629843711853, |
| "max": 0.5028019547462463, |
| "mean": -3.858951822621748e-05, |
| "std": 0.04113718494772911, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12819068133831024, |
| "max": 0.026764869689941406, |
| "mean": -0.03055746480822563, |
| "std": 0.021891731768846512, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.44944334030151367, |
| "max": 0.43338072299957275, |
| "mean": 8.373618766199797e-05, |
| "std": 0.03489609435200691, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.2679402530193329, |
| "max": 0.07267966121435165, |
| "mean": -0.0011121004354208708, |
| "std": 0.023136794567108154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.2874027192592621, |
| "max": 0.6862822771072388, |
| "mean": 0.5247019529342651, |
| "std": 0.047706179320812225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22247114777565002, |
| "max": 0.2237931489944458, |
| "mean": 1.5673409507144243e-05, |
| "std": 0.03895280137658119, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13664273917675018, |
| "max": 0.10935632139444351, |
| "mean": 0.00023680762387812138, |
| "std": 0.029263831675052643, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37552782893180847, |
| "max": 0.43765556812286377, |
| "mean": -9.529509043204598e-06, |
| "std": 0.0392889641225338, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8507211208343506, |
| "max": 5.005820274353027, |
| "mean": 0.00975782610476017, |
| "std": 0.8459950685501099, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.2234737128019333, |
| "max": 0.22026528418064117, |
| "mean": -2.2568747226614505e-07, |
| "std": 0.03441343083977699, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.043700210750103, |
| "max": 0.0358847938477993, |
| "mean": -0.0002585579641163349, |
| "std": 0.012083812616765499, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21352721750736237, |
| "max": 0.1891147494316101, |
| "mean": -1.673133192525711e-05, |
| "std": 0.031540192663669586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18098995089530945, |
| "max": 0.12096531689167023, |
| "mean": -0.0024120290763676167, |
| "std": 0.04128490760922432, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4226054847240448, |
| "max": 0.9433368444442749, |
| "mean": 0.6629081964492798, |
| "std": 0.056974004954099655, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.37151336669921875, |
| "max": 0.4759024977684021, |
| "mean": -8.223902113968506e-05, |
| "std": 0.040896181017160416, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20840628445148468, |
| "max": 0.02712824009358883, |
| "mean": -0.030254749581217766, |
| "std": 0.02136547490954399, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3405216336250305, |
| "max": 0.7342746257781982, |
| "mean": 8.478653035126626e-05, |
| "std": 0.03477146103978157, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2405085265636444, |
| "max": 0.05050582066178322, |
| "mean": -0.0011980931740254164, |
| "std": 0.02047325111925602, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.305998831987381, |
| "max": 0.6545577049255371, |
| "mean": 0.525275707244873, |
| "std": 0.0462840236723423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30443698167800903, |
| "max": 0.2175063043832779, |
| "mean": 6.991640839260072e-05, |
| "std": 0.03949848935008049, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.1496177613735199, |
| "max": 0.1315852701663971, |
| "mean": 0.00034793667146004736, |
| "std": 0.030498284846544266, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.25779959559440613, |
| "max": 0.2024526447057724, |
| "mean": 3.095036663580686e-05, |
| "std": 0.039487626403570175, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.3393359184265137, |
| "max": 2.3790037631988525, |
| "mean": -0.02626325562596321, |
| "std": 0.4501512348651886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.189274862408638, |
| "max": 0.2107497602701187, |
| "mean": 3.7229168810881674e-05, |
| "std": 0.03479816019535065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03176194056868553, |
| "max": 0.035539623349905014, |
| "mean": -0.00020054224296472967, |
| "std": 0.012292396277189255, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18866902589797974, |
| "max": 0.17066700756549835, |
| "mean": -6.797373498557135e-05, |
| "std": 0.032174721360206604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13956007361412048, |
| "max": 0.13746821880340576, |
| "mean": -0.0025175614282488823, |
| "std": 0.0513296015560627, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4674248695373535, |
| "max": 0.957923948764801, |
| "mean": 0.6691091656684875, |
| "std": 0.052978649735450745, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.32444727420806885, |
| "max": 0.3098219633102417, |
| "mean": -1.5040723155834712e-06, |
| "std": 0.040952056646347046, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12495888024568558, |
| "max": 0.025304077193140984, |
| "mean": -0.03072468377649784, |
| "std": 0.019833404570817947, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.44051459431648254, |
| "max": 0.44567734003067017, |
| "mean": 9.530649549560621e-05, |
| "std": 0.03512415289878845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.2248658984899521, |
| "max": 0.05171418562531471, |
| "mean": -0.0011846581473946571, |
| "std": 0.018478091806173325, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.33937862515449524, |
| "max": 0.7403524518013, |
| "mean": 0.5588580369949341, |
| "std": 0.041548021137714386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.27266961336135864, |
| "max": 0.2785436511039734, |
| "mean": 1.9886707377736457e-05, |
| "std": 0.041062381118535995, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13720278441905975, |
| "max": 0.1400555521249771, |
| "mean": 0.0004891848657280207, |
| "std": 0.026654429733753204, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.4912598729133606, |
| "max": 0.3564285337924957, |
| "mean": 8.880282985046506e-05, |
| "std": 0.040700383484363556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.3000996112823486, |
| "max": 1.7473976612091064, |
| "mean": -0.021102074533700943, |
| "std": 0.5005303025245667, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.21771195530891418, |
| "max": 0.19800876080989838, |
| "mean": -4.054907913086936e-05, |
| "std": 0.03423738107085228, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.04137791320681572, |
| "max": 0.03871942684054375, |
| "mean": -0.00014505225408356637, |
| "std": 0.012883453629910946, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.1777161806821823, |
| "max": 0.1839223951101303, |
| "mean": 4.761077434523031e-05, |
| "std": 0.03156030550599098, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.1801164597272873, |
| "max": 0.18409180641174316, |
| "mean": -0.002218745881691575, |
| "std": 0.05486130341887474, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.4742484390735626, |
| "max": 1.027018666267395, |
| "mean": 0.6454694271087646, |
| "std": 0.050571199506521225, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.27197960019111633, |
| "max": 0.3094431757926941, |
| "mean": 0.00011241070023970678, |
| "std": 0.0406884104013443, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10593951493501663, |
| "max": 0.026867138221859932, |
| "mean": -0.02952626720070839, |
| "std": 0.0179454255849123, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.339232474565506, |
| "max": 0.32961946725845337, |
| "mean": 5.7173179811798036e-05, |
| "std": 0.03441809490323067, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.1818968504667282, |
| "max": 0.04209613800048828, |
| "mean": -0.001073765684850514, |
| "std": 0.017224203795194626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.3253825902938843, |
| "max": 0.6876205801963806, |
| "mean": 0.5113766193389893, |
| "std": 0.03712678700685501, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.2340041846036911, |
| "max": 0.22588428854942322, |
| "mean": -3.603727600420825e-05, |
| "std": 0.03918161243200302, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11547420918941498, |
| "max": 0.13177312910556793, |
| "mean": 0.00015100545715540648, |
| "std": 0.029211556538939476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.353280246257782, |
| "max": 0.28580334782600403, |
| "mean": 7.311312401725445e-06, |
| "std": 0.03925010561943054, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.137877941131592, |
| "max": 3.5483016967773438, |
| "mean": -0.011621923185884953, |
| "std": 0.6833143830299377, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21149367094039917, |
| "max": 0.20919673144817352, |
| "mean": 3.474394543445669e-05, |
| "std": 0.034489404410123825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.0357508510351181, |
| "max": 0.048132169991731644, |
| "mean": 0.0007945147808641195, |
| "std": 0.012859269045293331, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21085014939308167, |
| "max": 0.19338075816631317, |
| "mean": -1.279619482374983e-06, |
| "std": 0.03169989585876465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.18688012659549713, |
| "max": 0.17741110920906067, |
| "mean": -0.0028487846720963717, |
| "std": 0.05866115912795067, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4747392237186432, |
| "max": 1.0433117151260376, |
| "mean": 0.6515810489654541, |
| "std": 0.04988763853907585, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.2485654354095459, |
| "max": 0.32921651005744934, |
| "mean": 0.00018060754518955946, |
| "std": 0.04057681933045387, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12461096793413162, |
| "max": 0.024597609415650368, |
| "mean": -0.030512426048517227, |
| "std": 0.017616724595427513, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.42169103026390076, |
| "max": 0.4825250208377838, |
| "mean": 2.1487815047294134e-06, |
| "std": 0.03540307283401489, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15202857553958893, |
| "max": 0.04342101141810417, |
| "mean": 3.956547880079597e-05, |
| "std": 0.014885293319821358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.315530002117157, |
| "max": 0.6829717755317688, |
| "mean": 0.5530707240104675, |
| "std": 0.04085434973239899, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20659124851226807, |
| "max": 0.2201390564441681, |
| "mean": 3.096506407018751e-05, |
| "std": 0.03830333426594734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.1380155086517334, |
| "max": 0.11290067434310913, |
| "mean": 2.059592225123197e-05, |
| "std": 0.025836361572146416, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.40320274233818054, |
| "max": 0.37160059809684753, |
| "mean": 2.6222376618534327e-05, |
| "std": 0.03818517550826073, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.775665044784546, |
| "max": 2.872361421585083, |
| "mean": 0.0011700298637151718, |
| "std": 0.5173272490501404, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.2030554711818695, |
| "max": 0.19753621518611908, |
| "mean": 2.9474727853084914e-05, |
| "std": 0.03430046886205673, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.05103779584169388, |
| "max": 0.04008523374795914, |
| "mean": -0.000419780844822526, |
| "std": 0.013429902493953705, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19672255218029022, |
| "max": 0.20196260511875153, |
| "mean": -1.2339524801063817e-05, |
| "std": 0.03180818632245064, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19336175918579102, |
| "max": 0.19535411894321442, |
| "mean": -0.0029691390227526426, |
| "std": 0.06259549409151077, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.34919390082359314, |
| "max": 1.0855821371078491, |
| "mean": 0.6673611998558044, |
| "std": 0.055458005517721176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22532346844673157, |
| "max": 0.2517567276954651, |
| "mean": 0.0003590356500353664, |
| "std": 0.04076584428548813, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09113647788763046, |
| "max": 0.04372163116931915, |
| "mean": -0.030099857598543167, |
| "std": 0.01762346550822258, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.3537713587284088, |
| "max": 0.3043927252292633, |
| "mean": -4.351784446043894e-05, |
| "std": 0.03712814301252365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.1622427999973297, |
| "max": 0.0636076033115387, |
| "mean": -8.386171248275787e-05, |
| "std": 0.019415445625782013, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34875378012657166, |
| "max": 0.7230772972106934, |
| "mean": 0.542546272277832, |
| "std": 0.03922481834888458, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.21956898272037506, |
| "max": 0.22326983511447906, |
| "mean": -1.1109572369605303e-05, |
| "std": 0.03923607990145683, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11858610808849335, |
| "max": 0.1710456758737564, |
| "mean": 0.00028452256810851395, |
| "std": 0.025138530880212784, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24716253578662872, |
| "max": 0.30147185921669006, |
| "mean": -3.647191624622792e-05, |
| "std": 0.03893563523888588, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5094945430755615, |
| "max": 3.7191741466522217, |
| "mean": 0.015858110040426254, |
| "std": 0.7832505702972412, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21879123151302338, |
| "max": 0.2377484291791916, |
| "mean": -1.353577317786403e-05, |
| "std": 0.03630785644054413, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04725177586078644, |
| "max": 0.05147033557295799, |
| "mean": 0.00048084836453199387, |
| "std": 0.01352026965469122, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21421915292739868, |
| "max": 0.21782870590686798, |
| "mean": 5.651723040500656e-05, |
| "std": 0.03361982852220535, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.2116560935974121, |
| "max": 0.23178474605083466, |
| "mean": -0.005108034238219261, |
| "std": 0.06190710514783859, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.3619433343410492, |
| "max": 1.1028457880020142, |
| "mean": 0.6994728446006775, |
| "std": 0.05383099243044853, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.2347707897424698, |
| "max": 0.24507476389408112, |
| "mean": 0.00046346502494998276, |
| "std": 0.041274722665548325, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.098201684653759, |
| "max": 0.06837960332632065, |
| "mean": -0.031449105590581894, |
| "std": 0.01813678629696369, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.3019881546497345, |
| "max": 0.351855993270874, |
| "mean": -8.162805897882208e-05, |
| "std": 0.040280550718307495, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.1525154411792755, |
| "max": 0.14985136687755585, |
| "mean": 0.0002546610194258392, |
| "std": 0.02304759994149208, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9988279342651367, |
| "max": 1.0030174255371094, |
| "mean": 1.0003814697265625, |
| "std": 0.0010646688751876354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.03128192201256752, |
| "max": 0.031278640031814575, |
| "mean": -1.9287415852886625e-05, |
| "std": 0.01804400235414505, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031218387186527252, |
| "max": 0.03101835958659649, |
| "mean": -0.0010843591298907995, |
| "std": 0.01795342192053795, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.031292207539081573, |
| "max": 0.03128044679760933, |
| "mean": 3.544726496329531e-06, |
| "std": 0.018044408410787582, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031148849055171013, |
| "max": 0.031187163665890694, |
| "mean": 0.000333936681272462, |
| "std": 0.01806570589542389, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9988681674003601, |
| "max": 1.0030490159988403, |
| "mean": 1.0004115104675293, |
| "std": 0.0010549556463956833, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.031293854117393494, |
| "max": 0.03129155561327934, |
| "mean": -8.391638402827084e-06, |
| "std": 0.018043123185634613, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.031248562037944794, |
| "max": 0.03123636171221733, |
| "mean": 0.00015367052401416004, |
| "std": 0.017994463443756104, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.382835328578949, |
| "max": 0.7205657362937927, |
| "mean": 0.5808628797531128, |
| "std": 0.03902854025363922, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23823925852775574, |
| "max": 0.1967414915561676, |
| "mean": 2.6552535928203724e-05, |
| "std": 0.03746962919831276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11881034076213837, |
| "max": 0.16626670956611633, |
| "mean": 0.000991516513749957, |
| "std": 0.027575215324759483, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.24632981419563293, |
| "max": 0.5012024641036987, |
| "mean": -5.04429881402757e-05, |
| "std": 0.03762752190232277, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.946824312210083, |
| "max": 3.773773670196533, |
| "mean": -0.0035694693215191364, |
| "std": 0.6819667816162109, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22745896875858307, |
| "max": 0.2515793740749359, |
| "mean": -1.1545061170181725e-05, |
| "std": 0.03743903711438179, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07179750502109528, |
| "max": 0.0807880237698555, |
| "mean": -0.0005204002372920513, |
| "std": 0.015668606385588646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22822564840316772, |
| "max": 0.25826144218444824, |
| "mean": -2.862494147848338e-05, |
| "std": 0.03542570024728775, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.2006409764289856, |
| "max": 0.21548894047737122, |
| "mean": -0.005540885496884584, |
| "std": 0.06836719810962677, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.40525123476982117, |
| "max": 1.1910948753356934, |
| "mean": 0.7381879091262817, |
| "std": 0.05550322309136391, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.2213674634695053, |
| "max": 0.2461645007133484, |
| "mean": 0.0005210727686062455, |
| "std": 0.04134247452020645, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10357673466205597, |
| "max": 0.02419574372470379, |
| "mean": -0.03268023580312729, |
| "std": 0.01890200562775135, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.44974827766418457, |
| "max": 0.42273736000061035, |
| "mean": -0.00043248123256489635, |
| "std": 0.046903859823942184, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2517695128917694, |
| "max": 0.4706769287586212, |
| "mean": 0.003199656493961811, |
| "std": 0.04457153007388115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.3170992434024811, |
| "max": 0.333298921585083, |
| "mean": -2.5289473342127167e-05, |
| "std": 0.021290816366672516, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.32478415966033936, |
| "max": 0.6863877177238464, |
| "mean": 0.5711605548858643, |
| "std": 0.04484730586409569, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.1647796630859375, |
| "max": 0.17416934669017792, |
| "mean": -4.8634105041855946e-05, |
| "std": 0.03318461403250694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.1870798021554947, |
| "max": 0.14308109879493713, |
| "mean": 3.898901923093945e-05, |
| "std": 0.02971462905406952, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.38088855147361755, |
| "max": 0.2463647872209549, |
| "mean": -9.938010407495312e-06, |
| "std": 0.03276585787534714, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6601390838623047, |
| "max": 3.2940189838409424, |
| "mean": -0.01424746960401535, |
| "std": 0.9857901930809021, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.2351982444524765, |
| "max": 0.24773260951042175, |
| "mean": -1.7793041479308158e-05, |
| "std": 0.04170281067490578, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07287801802158356, |
| "max": 0.15471716225147247, |
| "mean": 0.0006660239887423813, |
| "std": 0.025180837139487267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.26665613055229187, |
| "max": 0.24858269095420837, |
| "mean": -1.5366244042525068e-05, |
| "std": 0.04014318436384201, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.18983444571495056, |
| "max": 0.1949683576822281, |
| "mean": -0.0012304731644690037, |
| "std": 0.06671547889709473, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32925084233283997, |
| "max": 1.0009599924087524, |
| "mean": 0.7193903923034668, |
| "std": 0.052590519189834595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23175209760665894, |
| "max": 0.24594298005104065, |
| "mean": 0.00018278483184985816, |
| "std": 0.04090619832277298, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11433617770671844, |
| "max": 0.018662281334400177, |
| "mean": -0.04249466210603714, |
| "std": 0.01887579821050167, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.3903564512729645, |
| "max": 0.4076610803604126, |
| "mean": -2.190250415878836e-05, |
| "std": 0.04854064807295799, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.694047212600708, |
| "max": 0.413125216960907, |
| "mean": 0.000851891003549099, |
| "std": 0.06033211946487427, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": 0.0, |
| "max": 0.9999971389770508, |
| "mean": 0.0004882798530161381, |
| "std": 0.022091632708907127, |
| "sparsity": 0.99951171875, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9987401366233826, |
| "max": 1.0030049085617065, |
| "mean": 1.0003970861434937, |
| "std": 0.0010890224948525429, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03128720819950104, |
| "max": 0.03127748519182205, |
| "mean": -2.1021871361881495e-05, |
| "std": 0.018035341054201126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031208951026201248, |
| "max": 0.0312366746366024, |
| "mean": -0.0006772055057808757, |
| "std": 0.01782999187707901, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03128505125641823, |
| "max": 0.0312827005982399, |
| "mean": -8.840423106448725e-06, |
| "std": 0.01803436689078808, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031223762780427933, |
| "max": 0.031257808208465576, |
| "mean": -0.0007298105047084391, |
| "std": 0.017944179475307465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.9988026022911072, |
| "max": 1.0031852722167969, |
| "mean": 1.0003986358642578, |
| "std": 0.0010702211875468493, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.03128661960363388, |
| "max": 0.03128815069794655, |
| "mean": 3.5941102396463975e-06, |
| "std": 0.01804072968661785, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.03123682737350464, |
| "max": 0.03124977834522724, |
| "mean": 0.00019563926616683602, |
| "std": 0.018076641485095024, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.2346186488866806, |
| "max": 0.27259576320648193, |
| "mean": 6.985836080275476e-06, |
| "std": 0.01881217770278454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.3213435411453247, |
| "max": 0.6945998072624207, |
| "mean": 0.5817909240722656, |
| "std": 0.04608319699764252, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18191689252853394, |
| "max": 0.19781433045864105, |
| "mean": -1.1746024938474875e-05, |
| "std": 0.03318719565868378, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16086804866790771, |
| "max": 0.1296302229166031, |
| "mean": -0.0010684699518606067, |
| "std": 0.034163739532232285, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.33239439129829407, |
| "max": 0.31163647770881653, |
| "mean": -1.0337707863072865e-05, |
| "std": 0.03223792091012001, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.812414169311523, |
| "max": 8.773359298706055, |
| "mean": 0.09355923533439636, |
| "std": 1.6210812330245972, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23393671214580536, |
| "max": 0.24211150407791138, |
| "mean": 4.141662793699652e-05, |
| "std": 0.04086197167634964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07609452307224274, |
| "max": 0.06586258113384247, |
| "mean": 0.00047865102533251047, |
| "std": 0.01942458190023899, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24614335596561432, |
| "max": 0.23432280123233795, |
| "mean": -2.907749149017036e-06, |
| "std": 0.03943663462996483, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16305704414844513, |
| "max": 0.1610053926706314, |
| "mean": 0.0016310829669237137, |
| "std": 0.06529799103736877, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5568323135375977, |
| "max": 0.9453117847442627, |
| "mean": 0.7130987644195557, |
| "std": 0.040391918271780014, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.2288832664489746, |
| "max": 0.25533148646354675, |
| "mean": -4.5479209802579135e-05, |
| "std": 0.04058132693171501, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13495926558971405, |
| "max": 0.022289777174592018, |
| "mean": -0.0413689985871315, |
| "std": 0.018403179943561554, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4220907390117645, |
| "max": 0.3925161063671112, |
| "mean": -4.4413791329134256e-06, |
| "std": 0.04779106378555298, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6081769466400146, |
| "max": 0.652148425579071, |
| "mean": 0.001585810212418437, |
| "std": 0.05687166377902031, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.2517467141151428, |
| "max": 0.32074928283691406, |
| "mean": -6.074779776099604e-06, |
| "std": 0.019615592435002327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.36013174057006836, |
| "max": 0.6833459138870239, |
| "mean": 0.570884644985199, |
| "std": 0.04308824613690376, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22070643305778503, |
| "max": 0.17717598378658295, |
| "mean": -3.468842260190286e-05, |
| "std": 0.03430233895778656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16383720934391022, |
| "max": 0.23332805931568146, |
| "mean": 0.0003637468325905502, |
| "std": 0.032890770584344864, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.26396337151527405, |
| "max": 0.2400342971086502, |
| "mean": -5.2375002269400284e-05, |
| "std": 0.03390149027109146, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.860640525817871, |
| "max": 5.097131252288818, |
| "mean": 0.04391013830900192, |
| "std": 1.2302772998809814, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24682015180587769, |
| "max": 0.25062263011932373, |
| "mean": 7.221732084872201e-05, |
| "std": 0.043993160128593445, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06271186470985413, |
| "max": 0.05459222570061684, |
| "mean": 0.0006507715443149209, |
| "std": 0.017198268324136734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2868800759315491, |
| "max": 0.271938681602478, |
| "mean": -4.989939043298364e-05, |
| "std": 0.04299154132604599, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16084273159503937, |
| "max": 0.1707206517457962, |
| "mean": -0.002884692046791315, |
| "std": 0.059305742383003235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.51964271068573, |
| "max": 0.9341827630996704, |
| "mean": 0.7137263417243958, |
| "std": 0.038649603724479675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23825131356716156, |
| "max": 0.24959467351436615, |
| "mean": 0.00046492042019963264, |
| "std": 0.04046143591403961, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14443093538284302, |
| "max": 0.04144603759050369, |
| "mean": -0.039705902338027954, |
| "std": 0.020563002675771713, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5333583354949951, |
| "max": 0.5836927890777588, |
| "mean": 5.9018666433985345e-06, |
| "std": 0.048868328332901, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5197700262069702, |
| "max": 0.4940829873085022, |
| "mean": 0.0023609776981174946, |
| "std": 0.05347929149866104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.27364596724510193, |
| "max": 0.3152502179145813, |
| "mean": 1.8441196516505443e-06, |
| "std": 0.02005275897681713, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36628827452659607, |
| "max": 0.7126691937446594, |
| "mean": 0.5933467149734497, |
| "std": 0.046086061745882034, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21118636429309845, |
| "max": 0.19975997507572174, |
| "mean": 3.079167436226271e-05, |
| "std": 0.0348685048520565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18748052418231964, |
| "max": 0.2042539119720459, |
| "mean": 0.000956728239543736, |
| "std": 0.03154991194605827, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.28994736075401306, |
| "max": 0.3401152789592743, |
| "mean": -4.7362642362713814e-05, |
| "std": 0.03458964452147484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.881758451461792, |
| "max": 3.3913075923919678, |
| "mean": 0.014463461004197598, |
| "std": 0.8590267896652222, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22456875443458557, |
| "max": 0.2500464916229248, |
| "mean": -3.998348802269902e-06, |
| "std": 0.042235810309648514, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05513551086187363, |
| "max": 0.046896424144506454, |
| "mean": -1.89729908015579e-05, |
| "std": 0.01585385575890541, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2930184602737427, |
| "max": 0.2910744249820709, |
| "mean": -7.35160028852988e-06, |
| "std": 0.041950810700654984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12501806020736694, |
| "max": 0.2597162425518036, |
| "mean": -0.003234931267797947, |
| "std": 0.05317143350839615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.4562249779701233, |
| "max": 0.8457176685333252, |
| "mean": 0.705817699432373, |
| "std": 0.035453151911497116, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5114459991455078, |
| "max": 0.3485345244407654, |
| "mean": 0.0003425978356972337, |
| "std": 0.04020640254020691, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.1872977465391159, |
| "max": 0.039509162306785583, |
| "mean": -0.03940243646502495, |
| "std": 0.02136845327913761, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5449100136756897, |
| "max": 0.5570695400238037, |
| "mean": -7.181215914897621e-05, |
| "std": 0.05074289068579674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5124268531799316, |
| "max": 0.6651233434677124, |
| "mean": 0.002447479637339711, |
| "std": 0.04955451935529709, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.33246591687202454, |
| "max": 0.2658751308917999, |
| "mean": 3.69829467672389e-06, |
| "std": 0.019390346482396126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.3222673833370209, |
| "max": 0.7674033641815186, |
| "mean": 0.6512042284011841, |
| "std": 0.04545491561293602, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.2496984899044037, |
| "max": 0.21969059109687805, |
| "mean": -2.5450863176956773e-06, |
| "std": 0.03650245815515518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32755619287490845, |
| "max": 0.28763604164123535, |
| "mean": -0.0006797901587560773, |
| "std": 0.03858839347958565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.3103632628917694, |
| "max": 0.3702820837497711, |
| "mean": 6.481494347099215e-05, |
| "std": 0.03624306991696358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.7229533195495605, |
| "max": 5.8144097328186035, |
| "mean": 0.03798435255885124, |
| "std": 1.4144145250320435, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.2220195233821869, |
| "max": 0.20613467693328857, |
| "mean": -7.503894448745996e-05, |
| "std": 0.04249141365289688, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07768063247203827, |
| "max": 0.051408518105745316, |
| "mean": -0.0009253580356016755, |
| "std": 0.01641588658094406, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.3309858441352844, |
| "max": 0.3291884660720825, |
| "mean": -4.9612558541412e-06, |
| "std": 0.04279816150665283, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2853319048881531, |
| "max": 0.11173354089260101, |
| "mean": -0.001206716988235712, |
| "std": 0.04702756926417351, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.48654904961586, |
| "max": 0.88804692029953, |
| "mean": 0.7376827001571655, |
| "std": 0.03842971473932266, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3613007962703705, |
| "max": 0.27439025044441223, |
| "mean": 5.118318586028181e-05, |
| "std": 0.04065314307808876, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.2479037493467331, |
| "max": 0.046517688781023026, |
| "mean": -0.039281267672777176, |
| "std": 0.023276478052139282, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6269151568412781, |
| "max": 0.5976049900054932, |
| "mean": -6.191668217070401e-05, |
| "std": 0.053125977516174316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7105785608291626, |
| "max": 0.26612961292266846, |
| "mean": 0.0009194647427648306, |
| "std": 0.051263753324747086, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3433726131916046, |
| "max": 0.3034554719924927, |
| "mean": 2.0521497390291188e-07, |
| "std": 0.019139625132083893, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.3501395285129547, |
| "max": 0.783959686756134, |
| "mean": 0.6390355825424194, |
| "std": 0.049371764063835144, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20602361857891083, |
| "max": 0.20698852837085724, |
| "mean": -5.9928101109107956e-05, |
| "std": 0.037698496133089066, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.25897642970085144, |
| "max": 0.268706738948822, |
| "mean": -0.00040520128095522523, |
| "std": 0.044660814106464386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.35453060269355774, |
| "max": 0.3229123651981354, |
| "mean": -7.312092748179566e-06, |
| "std": 0.03720676898956299, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.267762184143066, |
| "max": 4.20961332321167, |
| "mean": -0.026448804885149002, |
| "std": 1.0076419115066528, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23904970288276672, |
| "max": 0.24397821724414825, |
| "mean": -2.552817386458628e-05, |
| "std": 0.04321575164794922, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06249221786856651, |
| "max": 0.05668818950653076, |
| "mean": 0.0003517880686558783, |
| "std": 0.01415390707552433, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.43751028180122375, |
| "max": 0.3737626075744629, |
| "mean": 1.4619375178881455e-05, |
| "std": 0.04412780702114105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.0962304174900055, |
| "max": 0.1764947772026062, |
| "mean": -0.0006597821484319866, |
| "std": 0.03515012562274933, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4218544661998749, |
| "max": 1.0707522630691528, |
| "mean": 0.7486886978149414, |
| "std": 0.04222184792160988, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2660444378852844, |
| "max": 0.2971097230911255, |
| "mean": -7.88940378697589e-05, |
| "std": 0.04081380367279053, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18505463004112244, |
| "max": 0.04312760382890701, |
| "mean": -0.03682396560907364, |
| "std": 0.025607850402593613, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4577805697917938, |
| "max": 0.48729538917541504, |
| "mean": 4.396865551825613e-05, |
| "std": 0.05422099307179451, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.2866191267967224, |
| "max": 0.5523927807807922, |
| "mean": -0.0008822673698887229, |
| "std": 0.04786074161529541, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.29266098141670227, |
| "max": 0.3227379322052002, |
| "mean": 6.034013495082036e-06, |
| "std": 0.01997271552681923, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2912082076072693, |
| "max": 0.7611724734306335, |
| "mean": 0.6509549617767334, |
| "std": 0.05223819240927696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.2437622845172882, |
| "max": 0.2617740035057068, |
| "mean": -5.626710844808258e-06, |
| "std": 0.03961407393217087, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2678508758544922, |
| "max": 0.20037643611431122, |
| "mean": -0.0008778825285844505, |
| "std": 0.051807109266519547, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2725064158439636, |
| "max": 0.2540656328201294, |
| "mean": 5.306316325004445e-06, |
| "std": 0.03871078044176102, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.980466842651367, |
| "max": 15.965588569641113, |
| "mean": 0.03327019512653351, |
| "std": 1.9910999536514282, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.20688198506832123, |
| "max": 0.22597242891788483, |
| "mean": -7.254729280248284e-05, |
| "std": 0.04055875167250633, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.069511778652668, |
| "max": 0.06321422755718231, |
| "mean": 0.00015925483603496104, |
| "std": 0.01475309394299984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46553534269332886, |
| "max": 0.32018300890922546, |
| "mean": 1.9559764041332528e-05, |
| "std": 0.040594302117824554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06415701657533646, |
| "max": 0.11569144576787949, |
| "mean": 0.0011994449887424707, |
| "std": 0.024716829881072044, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.37491846084594727, |
| "max": 0.9332267045974731, |
| "mean": 0.7511833310127258, |
| "std": 0.04030444473028183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2793797552585602, |
| "max": 0.2735174894332886, |
| "mean": -0.00016838237934280187, |
| "std": 0.04100488871335983, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19866259396076202, |
| "max": 0.05138175189495087, |
| "mean": -0.03203893452882767, |
| "std": 0.025100193917751312, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6591871976852417, |
| "max": 0.5361859798431396, |
| "mean": -5.0474118324927986e-05, |
| "std": 0.0528571642935276, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.19288860261440277, |
| "max": 0.582888662815094, |
| "mean": -0.0005087298923172057, |
| "std": 0.0411086231470108, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41760918498039246, |
| "max": 0.3719828724861145, |
| "mean": 6.52037670079153e-06, |
| "std": 0.02162792719900608, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21464084088802338, |
| "max": 0.7477675080299377, |
| "mean": 0.6495819687843323, |
| "std": 0.054441265761852264, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20966503024101257, |
| "max": 0.1956944614648819, |
| "mean": 4.008584801340476e-05, |
| "std": 0.039459552615880966, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.32997503876686096, |
| "max": 0.25995907187461853, |
| "mean": -0.0032368863467127085, |
| "std": 0.05632346495985985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.20606832206249237, |
| "max": 0.2548881471157074, |
| "mean": 5.397828499553725e-05, |
| "std": 0.03856222704052925, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.250948905944824, |
| "max": 6.940567493438721, |
| "mean": 0.048394568264484406, |
| "std": 1.3862435817718506, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20990008115768433, |
| "max": 0.23062950372695923, |
| "mean": -4.797322617378086e-06, |
| "std": 0.04131775721907616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.043879762291908264, |
| "max": 0.03602854162454605, |
| "mean": -6.735368515364826e-06, |
| "std": 0.012802576646208763, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.3975800573825836, |
| "max": 0.3450191617012024, |
| "mean": -5.543587758438662e-05, |
| "std": 0.04239463433623314, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.055230122059583664, |
| "max": 0.06288789957761765, |
| "mean": 0.00035758066223934293, |
| "std": 0.018682915717363358, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.35092663764953613, |
| "max": 1.0465692281723022, |
| "mean": 0.7897400856018066, |
| "std": 0.04884057492017746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.33373889327049255, |
| "max": 0.3863142132759094, |
| "mean": -0.00016909500118345022, |
| "std": 0.04149040952324867, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15769430994987488, |
| "max": 0.059132885187864304, |
| "mean": -0.03183465823531151, |
| "std": 0.025120330974459648, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6965411305427551, |
| "max": 0.46967917680740356, |
| "mean": -8.504216384608299e-05, |
| "std": 0.05180637910962105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24813847243785858, |
| "max": 0.3292423188686371, |
| "mean": -0.00026213712408207357, |
| "std": 0.041475165635347366, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2870347499847412, |
| "max": 0.3504159152507782, |
| "mean": -2.7635057904262794e-06, |
| "std": 0.024241114035248756, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.1968143880367279, |
| "max": 0.7801634073257446, |
| "mean": 0.67032390832901, |
| "std": 0.058765437453985214, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22936369478702545, |
| "max": 0.23155838251113892, |
| "mean": -2.0868072169832885e-05, |
| "std": 0.0404399111866951, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22028712928295135, |
| "max": 0.2412400096654892, |
| "mean": 0.0007798401638865471, |
| "std": 0.05588255077600479, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21693190932273865, |
| "max": 0.2265695184469223, |
| "mean": -7.217879465315491e-05, |
| "std": 0.039374105632305145, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.916163444519043, |
| "max": 9.079217910766602, |
| "mean": -0.0012825923040509224, |
| "std": 1.8500556945800781, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.269673228263855, |
| "max": 0.2592774033546448, |
| "mean": 4.366856592241675e-05, |
| "std": 0.038410674780607224, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05804191157221794, |
| "max": 0.05804998800158501, |
| "mean": 0.0003545111685525626, |
| "std": 0.014721807092428207, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.2641296982765198, |
| "max": 0.2882002294063568, |
| "mean": -6.158516043797135e-05, |
| "std": 0.039077457040548325, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.044157613068819046, |
| "max": 0.03739722818136215, |
| "mean": -9.842761210165918e-05, |
| "std": 0.013352800160646439, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.3394981026649475, |
| "max": 1.0940546989440918, |
| "mean": 0.8640274405479431, |
| "std": 0.06395779550075531, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.42318135499954224, |
| "max": 0.41912782192230225, |
| "mean": 0.0003136250888928771, |
| "std": 0.04351290315389633, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.215034618973732, |
| "max": 0.17091527581214905, |
| "mean": -0.02945549227297306, |
| "std": 0.031898606568574905, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5991082191467285, |
| "max": 0.5603575706481934, |
| "mean": -0.0001479926722822711, |
| "std": 0.05346138775348663, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17912201583385468, |
| "max": 0.3778008818626404, |
| "mean": 0.0013520645443350077, |
| "std": 0.037332892417907715, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.3943796157836914, |
| "max": 0.3688676655292511, |
| "mean": 3.761224070331082e-05, |
| "std": 0.028617393225431442, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2908812463283539, |
| "max": 0.8286238312721252, |
| "mean": 0.7055914402008057, |
| "std": 0.06791043281555176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9266071319580078, |
| "max": 1.0270264148712158, |
| "mean": -2.7955527912126854e-05, |
| "std": 0.0476437471807003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8803294897079468, |
| "max": 0.8167775273323059, |
| "mean": -0.0002962773141916841, |
| "std": 0.09563106298446655, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.27031898498535156, |
| "max": 0.24110636115074158, |
| "mean": -2.252469494123943e-05, |
| "std": 0.03894982486963272, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.770000457763672, |
| "max": 22.87746810913086, |
| "mean": -0.09194529056549072, |
| "std": 4.074869632720947, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.22796331346035004, |
| "max": 0.2458551675081253, |
| "mean": -2.5422079488635063e-05, |
| "std": 0.038641415536403656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.060239437967538834, |
| "max": 0.045478228479623795, |
| "mean": -0.00013640533143188804, |
| "std": 0.01469514612108469, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.33809611201286316, |
| "max": 0.3752952516078949, |
| "mean": 7.530758921348024e-06, |
| "std": 0.040820345282554626, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04625128582119942, |
| "max": 0.1955953687429428, |
| "mean": 0.0002734389272518456, |
| "std": 0.013558450154960155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.37381020188331604, |
| "max": 1.1318634748458862, |
| "mean": 0.8903213143348694, |
| "std": 0.0641312375664711, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.447549432516098, |
| "max": 0.5427570939064026, |
| "mean": 2.5110648493864574e-05, |
| "std": 0.04558061435818672, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22403661906719208, |
| "max": 0.08747347444295883, |
| "mean": -0.03202786669135094, |
| "std": 0.037772756069898605, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7269205451011658, |
| "max": 0.6894555687904358, |
| "mean": 3.6393928894540295e-05, |
| "std": 0.05179436132311821, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.1745767593383789, |
| "max": 0.21847710013389587, |
| "mean": 3.5673321690410376e-05, |
| "std": 0.03179144486784935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.339706152677536, |
| "max": 0.37326323986053467, |
| "mean": 4.3032145185861737e-05, |
| "std": 0.03413531556725502, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3174583911895752, |
| "max": 1.2890191078186035, |
| "mean": 0.601619303226471, |
| "std": 0.08366930484771729, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2832256853580475, |
| "max": 0.26046571135520935, |
| "mean": -2.993364205394755e-06, |
| "std": 0.03598063439130783, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.2360483556985855, |
| "max": 0.20603413879871368, |
| "mean": 0.00023948654416017234, |
| "std": 0.05606625974178314, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.4355963468551636, |
| "max": 0.32496193051338196, |
| "mean": 2.4223818400059827e-05, |
| "std": 0.034124087542295456, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.552776336669922, |
| "max": 7.322168350219727, |
| "mean": -0.00738462433218956, |
| "std": 0.7001185417175293, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.34443002939224243, |
| "max": 0.3632832467556, |
| "mean": 0.00010313428356312215, |
| "std": 0.047836337238550186, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07385826855897903, |
| "max": 0.06043381989002228, |
| "mean": 0.0009369200561195612, |
| "std": 0.014941117726266384, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.2565152943134308, |
| "max": 0.28712597489356995, |
| "mean": 4.846529918722808e-06, |
| "std": 0.041564520448446274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05538159981369972, |
| "max": 0.06288077682256699, |
| "mean": 0.00012733059702441096, |
| "std": 0.007154808379709721, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49408578872680664, |
| "max": 1.2223646640777588, |
| "mean": 1.013702154159546, |
| "std": 0.11764581501483917, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0940581560134888, |
| "max": 1.0475841760635376, |
| "mean": -4.863579306402244e-05, |
| "std": 0.0524178184568882, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22388966381549835, |
| "max": 0.1732550710439682, |
| "mean": -0.027240199968218803, |
| "std": 0.03634064644575119, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8849446177482605, |
| "max": 0.9234321713447571, |
| "mean": -0.0001459874474676326, |
| "std": 0.05329861491918564, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17124590277671814, |
| "max": 0.38005468249320984, |
| "mean": 0.0033688729163259268, |
| "std": 0.03990017995238304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7780460119247437, |
| "max": 0.722984254360199, |
| "mean": 1.8001555872615427e-05, |
| "std": 0.046154171228408813, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.33841073513031006, |
| "max": 1.4301798343658447, |
| "mean": 0.9487167596817017, |
| "std": 0.20710234344005585, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7458388805389404, |
| "max": 1.704530119895935, |
| "mean": 0.000226972799282521, |
| "std": 0.15870548784732819, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.2008311748504639, |
| "max": 1.1021909713745117, |
| "mean": -0.009556617587804794, |
| "std": 0.20411409437656403, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4210456311702728, |
| "max": 0.4282980263233185, |
| "mean": 6.39081554254517e-05, |
| "std": 0.04802015796303749, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.769929885864258, |
| "max": 19.564817428588867, |
| "mean": -0.24858255684375763, |
| "std": 4.782279968261719, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.3241115212440491, |
| "max": 0.43888670206069946, |
| "mean": -1.1728005119948648e-05, |
| "std": 0.04616701602935791, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03380877524614334, |
| "max": 0.036888398230075836, |
| "mean": 0.0006396375247277319, |
| "std": 0.012913818471133709, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7038182020187378, |
| "max": 0.6691953539848328, |
| "mean": 4.2681567720137537e-05, |
| "std": 0.05789203941822052, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07242082059383392, |
| "max": 0.06784311681985855, |
| "mean": -0.000134931382490322, |
| "std": 0.01290101557970047, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.3802341818809509, |
| "max": 1.39493727684021, |
| "mean": 1.0668972730636597, |
| "std": 0.21994373202323914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6166523694992065, |
| "max": 0.7187345623970032, |
| "mean": 0.0001129009760916233, |
| "std": 0.0580277256667614, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21905651688575745, |
| "max": 0.22523820400238037, |
| "mean": 0.006192180328071117, |
| "std": 0.049731798470020294, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6298801898956299, |
| "max": 0.8897404074668884, |
| "mean": 1.237633296113927e-05, |
| "std": 0.023545268923044205, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5074089765548706, |
| "max": 0.4742584228515625, |
| "mean": -0.0030243899673223495, |
| "std": 0.06931118667125702, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5381640791893005, |
| "max": 1.182090163230896, |
| "mean": 0.7830706238746643, |
| "std": 0.09912356734275818, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2673421800136566, |
| "max": 0.21319416165351868, |
| "mean": -0.0002236190193798393, |
| "std": 0.05400572717189789, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23863200843334198, |
| "max": 0.014863962307572365, |
| "mean": -0.04393288493156433, |
| "std": 0.03432033956050873, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |