| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.4304574429988861, |
| "max": 0.2989666759967804, |
| "mean": -0.0025583612732589245, |
| "std": 0.042551927268505096, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06317814439535141, |
| "max": 0.10763632506132126, |
| "mean": 0.0005897035007365048, |
| "std": 0.03411067649722099, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4125778377056122, |
| "max": 0.8363006114959717, |
| "mean": -0.00021047875634394586, |
| "std": 0.024107400327920914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.1154782623052597, |
| "max": 0.32146546244621277, |
| "mean": -0.0009399052942171693, |
| "std": 0.019577190279960632, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.7917730808258057, |
| "max": 2.8704917430877686, |
| "mean": -0.0003648003621492535, |
| "std": 0.6153737306594849, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.27894294261932373, |
| "max": 0.38190174102783203, |
| "mean": 0.00042033716454170644, |
| "std": 0.042750339955091476, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.2222987860441208, |
| "max": 0.20967179536819458, |
| "mean": -0.00449405936524272, |
| "std": 0.04091016948223114, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4279242753982544, |
| "max": 0.47530120611190796, |
| "mean": 2.540943796702777e-06, |
| "std": 0.024509120732545853, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.32545235753059387, |
| "max": 0.15698140859603882, |
| "mean": -0.0467013455927372, |
| "std": 0.051578979939222336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.41039708256721497, |
| "max": 0.3545180857181549, |
| "mean": -0.00012633543519768864, |
| "std": 0.023601215332746506, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.2297646850347519, |
| "max": 0.26262199878692627, |
| "mean": -0.029148615896701813, |
| "std": 0.049347542226314545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2546185553073883, |
| "max": 0.8200821876525879, |
| "mean": 0.5254418849945068, |
| "std": 0.08080805093050003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.29693102836608887, |
| "max": 0.26530489325523376, |
| "mean": -0.00042408728040754795, |
| "std": 0.032104212790727615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09274528920650482, |
| "max": 0.12482056021690369, |
| "mean": 0.0006486810743808746, |
| "std": 0.025742707774043083, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.29047587513923645, |
| "max": 0.28141430020332336, |
| "mean": -7.6991505920887e-05, |
| "std": 0.03093625046312809, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.899471282958984, |
| "max": 5.8142476081848145, |
| "mean": -0.009332108311355114, |
| "std": 1.2954597473144531, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.42482444643974304, |
| "max": 0.34377753734588623, |
| "mean": 9.762628906173632e-05, |
| "std": 0.02995302341878414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028968220576643944, |
| "max": 0.027649197727441788, |
| "mean": -0.0003115592699032277, |
| "std": 0.012572345323860645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.45394477248191833, |
| "max": 0.44869503378868103, |
| "mean": 2.2737156541552395e-05, |
| "std": 0.023855075240135193, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08868509531021118, |
| "max": 0.0911499559879303, |
| "mean": 0.002273137215524912, |
| "std": 0.019512129947543144, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.2666190564632416, |
| "max": 1.0562766790390015, |
| "mean": 0.531130313873291, |
| "std": 0.1044141948223114, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5744591355323792, |
| "max": 0.6083897948265076, |
| "mean": -0.00043104952783323824, |
| "std": 0.03859502077102661, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.1818137913942337, |
| "max": 0.045760128647089005, |
| "mean": -0.029441693797707558, |
| "std": 0.042590487748384476, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.166682481765747, |
| "max": 1.634623646736145, |
| "mean": 0.0003185438981745392, |
| "std": 0.02769385650753975, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16253960132598877, |
| "max": 0.2057240754365921, |
| "mean": -0.021116681396961212, |
| "std": 0.027940358966588974, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.2244873046875, |
| "max": 0.8436590433120728, |
| "mean": 0.48752978444099426, |
| "std": 0.07519952952861786, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.25530415773391724, |
| "max": 0.3058406710624695, |
| "mean": -9.383336873725057e-06, |
| "std": 0.03347048535943031, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.09549209475517273, |
| "max": 0.11042480170726776, |
| "mean": 5.650718230754137e-05, |
| "std": 0.02698545530438423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.2974618077278137, |
| "max": 0.295981764793396, |
| "mean": 5.020356547902338e-05, |
| "std": 0.03253836929798126, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.164300918579102, |
| "max": 5.084524154663086, |
| "mean": -0.0145945493131876, |
| "std": 1.1573816537857056, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.3448942005634308, |
| "max": 0.3434945046901703, |
| "mean": 7.886815001256764e-05, |
| "std": 0.030058231204748154, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.036158282309770584, |
| "max": 0.03324951231479645, |
| "mean": -0.00014386117982212454, |
| "std": 0.013023010455071926, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.31528618931770325, |
| "max": 0.3752082884311676, |
| "mean": -2.1654177544405684e-05, |
| "std": 0.024055516347289085, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10527443885803223, |
| "max": 0.12188493460416794, |
| "mean": -0.001954286126419902, |
| "std": 0.0288428645581007, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.31180328130722046, |
| "max": 1.120958685874939, |
| "mean": 0.6662410497665405, |
| "std": 0.09774944931268692, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.872490644454956, |
| "max": 0.627565324306488, |
| "mean": 0.0016757093835622072, |
| "std": 0.047438349574804306, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27100008726119995, |
| "max": 0.03407798707485199, |
| "mean": -0.04660271108150482, |
| "std": 0.04059542715549469, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9202945232391357, |
| "max": 0.9643993973731995, |
| "mean": 0.0010207913583144546, |
| "std": 0.04070187732577324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14455102384090424, |
| "max": 0.07482050359249115, |
| "mean": -0.009084243327379227, |
| "std": 0.025694938376545906, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.23976297676563263, |
| "max": 0.7124081254005432, |
| "mean": 0.4472041726112366, |
| "std": 0.05932378023862839, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.27300503849983215, |
| "max": 0.297477126121521, |
| "mean": 8.662666004966013e-06, |
| "std": 0.035474397242069244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11903306841850281, |
| "max": 0.11846816539764404, |
| "mean": 0.0007502126973122358, |
| "std": 0.02760804258286953, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.28101953864097595, |
| "max": 0.27942612767219543, |
| "mean": -7.648450991837308e-05, |
| "std": 0.03510245680809021, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5096001625061035, |
| "max": 2.5215961933135986, |
| "mean": 0.026745397597551346, |
| "std": 0.586780309677124, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.22110240161418915, |
| "max": 0.27161508798599243, |
| "mean": 2.438401679683011e-06, |
| "std": 0.030731581151485443, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.033151235431432724, |
| "max": 0.031146494671702385, |
| "mean": 0.00011706411896739155, |
| "std": 0.012394252233207226, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23539645969867706, |
| "max": 0.23185278475284576, |
| "mean": 5.7256078434875235e-05, |
| "std": 0.025697633624076843, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13603141903877258, |
| "max": 0.1280086189508438, |
| "mean": -0.005497735925018787, |
| "std": 0.03996264934539795, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.3547299802303314, |
| "max": 1.1723523139953613, |
| "mean": 0.7105399370193481, |
| "std": 0.10377444326877594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6173876523971558, |
| "max": 0.5556272268295288, |
| "mean": 0.001160334562882781, |
| "std": 0.046114034950733185, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.18945953249931335, |
| "max": 0.024937259033322334, |
| "mean": -0.034846723079681396, |
| "std": 0.028622858226299286, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1309547424316406, |
| "max": 0.97038733959198, |
| "mean": 0.00035909086000174284, |
| "std": 0.04234256222844124, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.5978560447692871, |
| "max": 0.06273925304412842, |
| "mean": -0.0048814816400408745, |
| "std": 0.028621360659599304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3753381073474884, |
| "max": 0.9404851794242859, |
| "mean": 0.592466413974762, |
| "std": 0.06694933772087097, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3917763829231262, |
| "max": 0.36936038732528687, |
| "mean": 7.001425547059625e-05, |
| "std": 0.0371866449713707, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11900075525045395, |
| "max": 0.13653883337974548, |
| "mean": 0.0009160788613371551, |
| "std": 0.029187612235546112, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6190802454948425, |
| "max": 0.508792519569397, |
| "mean": 1.5223037735268008e-05, |
| "std": 0.036439377814531326, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.18681526184082, |
| "max": 8.788924217224121, |
| "mean": -0.10927566885948181, |
| "std": 1.6988582611083984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.27652865648269653, |
| "max": 0.2397209107875824, |
| "mean": 5.228666486800648e-05, |
| "std": 0.03261314332485199, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.0514988899230957, |
| "max": 0.03946297615766525, |
| "mean": 9.359161776956171e-05, |
| "std": 0.012969369068741798, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23075971007347107, |
| "max": 0.23487111926078796, |
| "mean": -2.203527037636377e-05, |
| "std": 0.029389776289463043, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20423616468906403, |
| "max": 0.1052512601017952, |
| "mean": -0.004020487889647484, |
| "std": 0.03263992816209793, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.33965712785720825, |
| "max": 1.012444019317627, |
| "mean": 0.7007054090499878, |
| "std": 0.09675901383161545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5647616982460022, |
| "max": 0.8335906267166138, |
| "mean": 0.0004150677123107016, |
| "std": 0.04229460284113884, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21212875843048096, |
| "max": 0.029963094741106033, |
| "mean": -0.03217349201440811, |
| "std": 0.026498712599277496, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7548851370811462, |
| "max": 0.719126284122467, |
| "mean": -1.581827746122144e-05, |
| "std": 0.036835212260484695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.2634251117706299, |
| "max": 0.1063019409775734, |
| "mean": -0.0030143139883875847, |
| "std": 0.028873277828097343, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28394702076911926, |
| "max": 0.6950414180755615, |
| "mean": 0.4993884563446045, |
| "std": 0.04653454199433327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.2782432436943054, |
| "max": 0.2338251918554306, |
| "mean": -0.00011091169290011749, |
| "std": 0.03875752165913582, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15358327329158783, |
| "max": 0.12643983960151672, |
| "mean": -0.0022276192903518677, |
| "std": 0.033326249569654465, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.41438740491867065, |
| "max": 0.6594708561897278, |
| "mean": -1.851528577390127e-05, |
| "std": 0.039096731692552567, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.237917423248291, |
| "max": 4.722480773925781, |
| "mean": -0.020456865429878235, |
| "std": 1.0076923370361328, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.245052769780159, |
| "max": 0.20759740471839905, |
| "mean": 4.428692045621574e-05, |
| "std": 0.0339626781642437, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.034463901072740555, |
| "max": 0.04485860466957092, |
| "mean": -2.209081139881164e-05, |
| "std": 0.012639513239264488, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.2011018991470337, |
| "max": 0.20644338428974152, |
| "mean": -2.9357790481299162e-05, |
| "std": 0.03102092258632183, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.19982746243476868, |
| "max": 0.11318917572498322, |
| "mean": -0.0028952043503522873, |
| "std": 0.03453591465950012, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.36675214767456055, |
| "max": 1.0576648712158203, |
| "mean": 0.6704948544502258, |
| "std": 0.06640778481960297, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.39844217896461487, |
| "max": 0.5021068453788757, |
| "mean": -3.8750327803427354e-05, |
| "std": 0.04113020375370979, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12863779067993164, |
| "max": 0.026958497241139412, |
| "mean": -0.030533233657479286, |
| "std": 0.02188229374587536, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.449487566947937, |
| "max": 0.43325698375701904, |
| "mean": 7.53812346374616e-05, |
| "std": 0.03489059582352638, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.2675015926361084, |
| "max": 0.07307843118906021, |
| "mean": -0.0010904058581218123, |
| "std": 0.02313595451414585, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.28754422068595886, |
| "max": 0.6852768659591675, |
| "mean": 0.5245310068130493, |
| "std": 0.04753505066037178, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22266238927841187, |
| "max": 0.22331833839416504, |
| "mean": 1.5918290955596603e-05, |
| "std": 0.038949232548475266, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13635052740573883, |
| "max": 0.10933808237314224, |
| "mean": 0.00024784280685707927, |
| "std": 0.029207777231931686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37493839859962463, |
| "max": 0.43759685754776, |
| "mean": -9.403542208019644e-06, |
| "std": 0.03928738459944725, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.8458573818206787, |
| "max": 4.999326705932617, |
| "mean": 0.009741819463670254, |
| "std": 0.8452204465866089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22270528972148895, |
| "max": 0.22029587626457214, |
| "mean": -3.1911031328490935e-07, |
| "std": 0.034410301595926285, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.043785978108644485, |
| "max": 0.03592836111783981, |
| "mean": -0.0002596271806396544, |
| "std": 0.012078739702701569, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.21270592510700226, |
| "max": 0.18842868506908417, |
| "mean": -1.7000973457470536e-05, |
| "std": 0.03153671696782112, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.1809375286102295, |
| "max": 0.12074985355138779, |
| "mean": -0.002395304851233959, |
| "std": 0.04127994924783707, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.422917902469635, |
| "max": 0.9417884349822998, |
| "mean": 0.6626536250114441, |
| "std": 0.05681688338518143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.3708776533603668, |
| "max": 0.4765470623970032, |
| "mean": -8.20929926703684e-05, |
| "std": 0.04088940471410751, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20849654078483582, |
| "max": 0.0273736622184515, |
| "mean": -0.03023475781083107, |
| "std": 0.021363815292716026, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.3406715989112854, |
| "max": 0.7341561913490295, |
| "mean": 8.243846968980506e-05, |
| "std": 0.03476623818278313, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.24016188085079193, |
| "max": 0.05046152323484421, |
| "mean": -0.0011865879641845822, |
| "std": 0.020459504798054695, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.30588385462760925, |
| "max": 0.6534701585769653, |
| "mean": 0.5251248478889465, |
| "std": 0.04612228646874428, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.30431559681892395, |
| "max": 0.21719232201576233, |
| "mean": 6.998516619205475e-05, |
| "std": 0.039497170597314835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.14912384748458862, |
| "max": 0.13098323345184326, |
| "mean": 0.0003266759740654379, |
| "std": 0.03045588731765747, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.25694772601127625, |
| "max": 0.201896533370018, |
| "mean": 3.129036849713884e-05, |
| "std": 0.0394882932305336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.336271047592163, |
| "max": 2.375894784927368, |
| "mean": -0.026241114363074303, |
| "std": 0.44977155327796936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.18857863545417786, |
| "max": 0.21028850972652435, |
| "mean": 3.711117460625246e-05, |
| "std": 0.034793779253959656, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03168531507253647, |
| "max": 0.03566686809062958, |
| "mean": -0.00019767877529375255, |
| "std": 0.012288626283407211, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.18829987943172455, |
| "max": 0.17024517059326172, |
| "mean": -6.836466491222382e-05, |
| "std": 0.03217046335339546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.13942238688468933, |
| "max": 0.1372329592704773, |
| "mean": -0.002514950931072235, |
| "std": 0.05129847675561905, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4670739769935608, |
| "max": 0.955595850944519, |
| "mean": 0.6688634157180786, |
| "std": 0.05277201533317566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.3244037926197052, |
| "max": 0.309257835149765, |
| "mean": -1.045628778229002e-06, |
| "std": 0.04094540327787399, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.1248614490032196, |
| "max": 0.025666970759630203, |
| "mean": -0.030689720064401627, |
| "std": 0.019823001697659492, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.43948638439178467, |
| "max": 0.44534069299697876, |
| "mean": 9.591381240170449e-05, |
| "std": 0.035119153559207916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.2246266007423401, |
| "max": 0.051820773631334305, |
| "mean": -0.0011818428756669164, |
| "std": 0.018466750159859657, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.33914706110954285, |
| "max": 0.7398536205291748, |
| "mean": 0.5587007999420166, |
| "std": 0.04139573872089386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.2729904353618622, |
| "max": 0.27884039282798767, |
| "mean": 2.0351768398541026e-05, |
| "std": 0.04105766862630844, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13680818676948547, |
| "max": 0.13977055251598358, |
| "mean": 0.0004918644553981721, |
| "std": 0.02663181535899639, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.49051523208618164, |
| "max": 0.35575586557388306, |
| "mean": 8.911330223781988e-05, |
| "std": 0.04069535806775093, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.2970781326293945, |
| "max": 1.745163917541504, |
| "mean": -0.021079789847135544, |
| "std": 0.500128984451294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.2181096374988556, |
| "max": 0.1974443644285202, |
| "mean": -4.0170674765249714e-05, |
| "std": 0.03423338383436203, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.041142482310533524, |
| "max": 0.03885917738080025, |
| "mean": -0.0001360031747026369, |
| "std": 0.012883774936199188, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17761866748332977, |
| "max": 0.1828862875699997, |
| "mean": 4.801471368409693e-05, |
| "std": 0.03155674412846565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.1799207329750061, |
| "max": 0.18389682471752167, |
| "mean": -0.0022146617993712425, |
| "std": 0.05482979863882065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.474190354347229, |
| "max": 1.0258487462997437, |
| "mean": 0.6452326774597168, |
| "std": 0.05035318806767464, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.27163514494895935, |
| "max": 0.3091295659542084, |
| "mean": 0.00011244519555475563, |
| "std": 0.04068158566951752, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10526852309703827, |
| "max": 0.026741184294223785, |
| "mean": -0.029519207775592804, |
| "std": 0.01793486438691616, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.33932313323020935, |
| "max": 0.329169899225235, |
| "mean": 5.2667885029222816e-05, |
| "std": 0.03441279008984566, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18180307745933533, |
| "max": 0.042509548366069794, |
| "mean": -0.0010597179643809795, |
| "std": 0.017209293320775032, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.32517459988594055, |
| "max": 0.6865665912628174, |
| "mean": 0.511164128780365, |
| "std": 0.03695276752114296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23393133282661438, |
| "max": 0.2253761738538742, |
| "mean": -3.613880107877776e-05, |
| "std": 0.039175428450107574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11511484533548355, |
| "max": 0.13181191682815552, |
| "mean": 0.00015029555652290583, |
| "std": 0.029160132631659508, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.35229772329330444, |
| "max": 0.28487107157707214, |
| "mean": 6.5603690018178895e-06, |
| "std": 0.03924452140927315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.132349967956543, |
| "max": 3.543774366378784, |
| "mean": -0.011590607464313507, |
| "std": 0.6826151609420776, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21073584258556366, |
| "max": 0.20936711132526398, |
| "mean": 3.4690663596848026e-05, |
| "std": 0.03448447957634926, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03585724160075188, |
| "max": 0.047966208308935165, |
| "mean": 0.0007884915685281157, |
| "std": 0.012871142476797104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21028311550617218, |
| "max": 0.19305972754955292, |
| "mean": -9.823215805226937e-07, |
| "std": 0.031695324927568436, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.1864088624715805, |
| "max": 0.17721442878246307, |
| "mean": -0.0028417375870049, |
| "std": 0.058615218847990036, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.47462186217308044, |
| "max": 1.0414687395095825, |
| "mean": 0.651329517364502, |
| "std": 0.049656689167022705, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24834677577018738, |
| "max": 0.3290989398956299, |
| "mean": 0.00018076221749652177, |
| "std": 0.04056994616985321, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.12541481852531433, |
| "max": 0.024957137182354927, |
| "mean": -0.030498644337058067, |
| "std": 0.017614001408219337, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.4203978180885315, |
| "max": 0.4814401865005493, |
| "mean": 1.1958536560996436e-06, |
| "std": 0.03539701923727989, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15133719146251678, |
| "max": 0.04343123733997345, |
| "mean": 4.256972897564992e-05, |
| "std": 0.014886128716170788, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.31556373834609985, |
| "max": 0.6816186308860779, |
| "mean": 0.5528932809829712, |
| "std": 0.04069383069872856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20636020600795746, |
| "max": 0.21985411643981934, |
| "mean": 3.188779010088183e-05, |
| "std": 0.03829942271113396, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13772568106651306, |
| "max": 0.1125853881239891, |
| "mean": 2.6155808882322162e-05, |
| "std": 0.025809435173869133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.40282922983169556, |
| "max": 0.37083154916763306, |
| "mean": 2.5528193873469718e-05, |
| "std": 0.03817952424287796, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7708845138549805, |
| "max": 2.868703603744507, |
| "mean": 0.0011554225347936153, |
| "std": 0.5168288946151733, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20372195541858673, |
| "max": 0.1975945085287094, |
| "mean": 2.9724978958256543e-05, |
| "std": 0.03429732471704483, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.0505308173596859, |
| "max": 0.039880186319351196, |
| "mean": -0.0004213028587400913, |
| "std": 0.01341495756059885, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19602739810943604, |
| "max": 0.20172414183616638, |
| "mean": -1.2448943380150013e-05, |
| "std": 0.031805410981178284, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.19294138252735138, |
| "max": 0.19508768618106842, |
| "mean": -0.0029671685770154, |
| "std": 0.06252522766590118, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.348909467458725, |
| "max": 1.083768367767334, |
| "mean": 0.667101263999939, |
| "std": 0.055243175476789474, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22561651468276978, |
| "max": 0.2514271140098572, |
| "mean": 0.0003585518861655146, |
| "std": 0.04075947403907776, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09107004851102829, |
| "max": 0.04363898187875748, |
| "mean": -0.03007982112467289, |
| "std": 0.017611678689718246, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.353363573551178, |
| "max": 0.3039560914039612, |
| "mean": -4.4702926970785484e-05, |
| "std": 0.037122584879398346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16167114675045013, |
| "max": 0.06346774101257324, |
| "mean": -7.894223381299525e-05, |
| "std": 0.019427189603447914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.34871092438697815, |
| "max": 0.7219411134719849, |
| "mean": 0.5423486828804016, |
| "std": 0.03906320407986641, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.219291090965271, |
| "max": 0.22339218854904175, |
| "mean": -1.1523573448357638e-05, |
| "std": 0.03923090174794197, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.118381567299366, |
| "max": 0.17055465281009674, |
| "mean": 0.00028248116723261774, |
| "std": 0.025117389857769012, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.24647831916809082, |
| "max": 0.30066463351249695, |
| "mean": -3.701161767821759e-05, |
| "std": 0.03893034905195236, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5050106048583984, |
| "max": 3.714456796646118, |
| "mean": 0.015847081318497658, |
| "std": 0.7823866009712219, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.2191196233034134, |
| "max": 0.2373991161584854, |
| "mean": -1.3136124835000373e-05, |
| "std": 0.03630338981747627, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04720474034547806, |
| "max": 0.051363855600357056, |
| "mean": 0.00048070820048451424, |
| "std": 0.013523152098059654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21417121589183807, |
| "max": 0.21722286939620972, |
| "mean": 5.63644825888332e-05, |
| "std": 0.0336158350110054, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21132777631282806, |
| "max": 0.2312006652355194, |
| "mean": -0.0050989487208426, |
| "std": 0.06185900419950485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36193206906318665, |
| "max": 1.1010645627975464, |
| "mean": 0.6992560029029846, |
| "std": 0.05359357222914696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.2351117730140686, |
| "max": 0.24475757777690887, |
| "mean": 0.00046337785897776484, |
| "std": 0.041268885135650635, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09809085726737976, |
| "max": 0.06809623539447784, |
| "mean": -0.0314301960170269, |
| "std": 0.018128085881471634, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.30171892046928406, |
| "max": 0.35163986682891846, |
| "mean": -8.267226803582162e-05, |
| "std": 0.04027453064918518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.1522630751132965, |
| "max": 0.14965395629405975, |
| "mean": 0.0002633024996612221, |
| "std": 0.023038938641548157, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9992594122886658, |
| "max": 1.0015419721603394, |
| "mean": 1.0000762939453125, |
| "std": 0.0006376681849360466, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.03125917166471481, |
| "max": 0.03125542029738426, |
| "mean": -1.929077916429378e-05, |
| "std": 0.018040984869003296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.031228363513946533, |
| "max": 0.030987966805696487, |
| "mean": -0.0010841633193194866, |
| "std": 0.017950600013136864, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.03125608712434769, |
| "max": 0.03125986456871033, |
| "mean": 3.548163931554882e-06, |
| "std": 0.018041392788290977, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.03115428239107132, |
| "max": 0.031174642965197563, |
| "mean": 0.00033392058685421944, |
| "std": 0.01806280016899109, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0006233988679014146, |
| "max": 0.0007061311043798923, |
| "mean": 4.538033408607589e-06, |
| "std": 0.0001893796434160322, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.997599720954895, |
| "max": 1.002988576889038, |
| "mean": 0.9999969601631165, |
| "std": 0.000850954616907984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.033545054495334625, |
| "max": 0.033692505210638046, |
| "mean": -6.091411705710925e-06, |
| "std": 0.018047811463475227, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.033063653856515884, |
| "max": 0.033412136137485504, |
| "mean": -0.00018106887000612915, |
| "std": 0.017954090610146523, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.001468250178731978, |
| "max": 0.0015634398441761732, |
| "mean": 1.9080666788795497e-06, |
| "std": 0.00028948785620741546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0005752606084570289, |
| "max": 0.0007690406637266278, |
| "mean": 7.6006986091670115e-06, |
| "std": 0.00017151834617834538, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.3833079934120178, |
| "max": 0.7191449403762817, |
| "mean": 0.5806841254234314, |
| "std": 0.03885476291179657, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23893281817436218, |
| "max": 0.19658899307250977, |
| "mean": 2.609232979011722e-05, |
| "std": 0.03746626526117325, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.11880965530872345, |
| "max": 0.1667701154947281, |
| "mean": 0.000981115852482617, |
| "std": 0.02755648083984852, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.2465641349554062, |
| "max": 0.49993160367012024, |
| "mean": -5.0439630285836756e-05, |
| "std": 0.03762364014983177, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.9418535232543945, |
| "max": 3.7689952850341797, |
| "mean": -0.003572138026356697, |
| "std": 0.6813418865203857, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.2274625599384308, |
| "max": 0.25183549523353577, |
| "mean": -1.1858754987770226e-05, |
| "std": 0.03743482381105423, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07157625257968903, |
| "max": 0.08059139549732208, |
| "mean": -0.0005097019020467997, |
| "std": 0.0156550370156765, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.22814570367336273, |
| "max": 0.2576799690723419, |
| "mean": -2.8758266125805676e-05, |
| "std": 0.03542165458202362, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20052188634872437, |
| "max": 0.21483485400676727, |
| "mean": -0.0055272276513278484, |
| "std": 0.06832942366600037, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.40502721071243286, |
| "max": 1.189380407333374, |
| "mean": 0.7378897666931152, |
| "std": 0.05522923544049263, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.22088685631752014, |
| "max": 0.2456110566854477, |
| "mean": 0.0005211912211962044, |
| "std": 0.04133584350347519, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10322928428649902, |
| "max": 0.024186961352825165, |
| "mean": -0.03266708552837372, |
| "std": 0.018890798091888428, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.44966569542884827, |
| "max": 0.42246878147125244, |
| "mean": -0.00043506931979209185, |
| "std": 0.04689610004425049, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.2515268921852112, |
| "max": 0.47013524174690247, |
| "mean": 0.003204584587365389, |
| "std": 0.04452726989984512, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.31688186526298523, |
| "max": 0.33314481377601624, |
| "mean": -2.5167657440761104e-05, |
| "std": 0.02128784917294979, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3244757652282715, |
| "max": 0.6856456398963928, |
| "mean": 0.5710105299949646, |
| "std": 0.044706691056489944, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16456718742847443, |
| "max": 0.17448973655700684, |
| "mean": -4.871570490649901e-05, |
| "std": 0.03318251296877861, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18692979216575623, |
| "max": 0.14325818419456482, |
| "mean": 3.459470462985337e-05, |
| "std": 0.029701216146349907, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.38104259967803955, |
| "max": 0.2459549903869629, |
| "mean": -9.848581612459384e-06, |
| "std": 0.03276371210813522, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.655487537384033, |
| "max": 3.2897744178771973, |
| "mean": -0.01425144076347351, |
| "std": 0.985081136226654, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23475398123264313, |
| "max": 0.24735963344573975, |
| "mean": -1.814730239857454e-05, |
| "std": 0.041698258370161057, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07251452654600143, |
| "max": 0.15445762872695923, |
| "mean": 0.0006656228797510266, |
| "std": 0.0251647736877203, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.26630881428718567, |
| "max": 0.2481267750263214, |
| "mean": -1.5170076949289069e-05, |
| "std": 0.0401393324136734, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.1895921230316162, |
| "max": 0.19462409615516663, |
| "mean": -0.001237674499861896, |
| "std": 0.06668463349342346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.32920053601264954, |
| "max": 0.999627411365509, |
| "mean": 0.7191565632820129, |
| "std": 0.052332233637571335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23170752823352814, |
| "max": 0.24531398713588715, |
| "mean": 0.00018265214748680592, |
| "std": 0.040900230407714844, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11451739817857742, |
| "max": 0.019039874896407127, |
| "mean": -0.0424770824611187, |
| "std": 0.018864724785089493, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.38964730501174927, |
| "max": 0.40745288133621216, |
| "mean": -2.1833995560882613e-05, |
| "std": 0.0485333576798439, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6929526925086975, |
| "max": 0.4126836955547333, |
| "mean": 0.0008477572700940073, |
| "std": 0.060282669961452484, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.0013933395966887474, |
| "max": 1.000746726989746, |
| "mean": 0.00048820103984326124, |
| "std": 0.022089513018727303, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.9992843866348267, |
| "max": 1.001552939414978, |
| "mean": 1.0000746250152588, |
| "std": 0.0006248687277548015, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.03125389292836189, |
| "max": 0.03125779330730438, |
| "mean": -2.1020408894401044e-05, |
| "std": 0.01803232543170452, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031215354800224304, |
| "max": 0.031232187524437904, |
| "mean": -0.0006770011968910694, |
| "std": 0.017826862633228302, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03125695139169693, |
| "max": 0.03126237541437149, |
| "mean": -8.831485502014402e-06, |
| "std": 0.018031351268291473, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.03123210370540619, |
| "max": 0.03124479576945305, |
| "mean": -0.0007297537522390485, |
| "std": 0.017941787838935852, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.0005147741758264601, |
| "max": 0.00041916739428415895, |
| "mean": -4.1531684473739006e-06, |
| "std": 0.0001558788208058104, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.997329831123352, |
| "max": 1.0023579597473145, |
| "mean": 0.9995578527450562, |
| "std": 0.0008328193798661232, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.033257633447647095, |
| "max": 0.03283705189824104, |
| "mean": -2.9398686365311733e-06, |
| "std": 0.01802799478173256, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.0324481800198555, |
| "max": 0.03130009397864342, |
| "mean": -0.000511951744556427, |
| "std": 0.01803583651781082, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0017112370114773512, |
| "max": 0.0015153783606365323, |
| "mean": -1.2167475915703108e-06, |
| "std": 0.00028721734997816384, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.00046955313882790506, |
| "max": 0.0003882118908222765, |
| "mean": -3.8059165490267333e-06, |
| "std": 0.00014281016774475574, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.23431308567523956, |
| "max": 0.2725020945072174, |
| "mean": 6.621908141823951e-06, |
| "std": 0.018810350447893143, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.32144924998283386, |
| "max": 0.6939579248428345, |
| "mean": 0.5816149711608887, |
| "std": 0.045937687158584595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18192073702812195, |
| "max": 0.1977624148130417, |
| "mean": -1.1576559700188227e-05, |
| "std": 0.03318417817354202, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.16049131751060486, |
| "max": 0.1293114274740219, |
| "mean": -0.00107291666790843, |
| "std": 0.03413516655564308, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.3323962688446045, |
| "max": 0.31116873025894165, |
| "mean": -1.0262579962727614e-05, |
| "std": 0.03223471716046333, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.802563190460205, |
| "max": 8.761749267578125, |
| "mean": 0.09345458447933197, |
| "std": 1.6194684505462646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23397405445575714, |
| "max": 0.2418195903301239, |
| "mean": 4.162176628597081e-05, |
| "std": 0.04085618257522583, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07595669478178024, |
| "max": 0.0657576471567154, |
| "mean": 0.00048221880570054054, |
| "std": 0.019416553899645805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.2459147870540619, |
| "max": 0.23389238119125366, |
| "mean": -3.2510670280316845e-06, |
| "std": 0.03943093866109848, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.1629837304353714, |
| "max": 0.16088047623634338, |
| "mean": 0.0016233830247074366, |
| "std": 0.06528986245393753, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5571612119674683, |
| "max": 0.9436106085777283, |
| "mean": 0.7128171324729919, |
| "std": 0.04012364149093628, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22801116108894348, |
| "max": 0.2548006474971771, |
| "mean": -4.5571337977889925e-05, |
| "std": 0.04057438299059868, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13471974432468414, |
| "max": 0.0221097432076931, |
| "mean": -0.041352279484272, |
| "std": 0.01838749460875988, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.42162591218948364, |
| "max": 0.3923877477645874, |
| "mean": -4.321471351431683e-06, |
| "std": 0.04778357967734337, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6071884632110596, |
| "max": 0.651282787322998, |
| "mean": 0.0015848546754568815, |
| "std": 0.0568372942507267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.25181877613067627, |
| "max": 0.32084232568740845, |
| "mean": -6.161948476801626e-06, |
| "std": 0.019613562151789665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.35955217480659485, |
| "max": 0.6821547150611877, |
| "mean": 0.5706839561462402, |
| "std": 0.0429888591170311, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.22016532719135284, |
| "max": 0.17702604830265045, |
| "mean": -3.4450480598025024e-05, |
| "std": 0.034298721700906754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.1631413698196411, |
| "max": 0.23277200758457184, |
| "mean": 0.000363422412192449, |
| "std": 0.032813675701618195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.2639073431491852, |
| "max": 0.2398279309272766, |
| "mean": -5.2961986511945724e-05, |
| "std": 0.033897411078214645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.854308605194092, |
| "max": 5.090536117553711, |
| "mean": 0.04387902468442917, |
| "std": 1.2290979623794556, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.24643683433532715, |
| "max": 0.2503347098827362, |
| "mean": 7.216692029032856e-05, |
| "std": 0.04398633539676666, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.06248769536614418, |
| "max": 0.05441384017467499, |
| "mean": 0.0006457050913013518, |
| "std": 0.017188573256134987, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.2864288091659546, |
| "max": 0.2721114456653595, |
| "mean": -5.008514563087374e-05, |
| "std": 0.04298446327447891, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16100545227527618, |
| "max": 0.170342355966568, |
| "mean": -0.0028870203532278538, |
| "std": 0.059300076216459274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5198097229003906, |
| "max": 0.9330063462257385, |
| "mean": 0.7133984565734863, |
| "std": 0.03842313215136528, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23787352442741394, |
| "max": 0.24874305725097656, |
| "mean": 0.0004645891021937132, |
| "std": 0.04045315086841583, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14499974250793457, |
| "max": 0.04109013453125954, |
| "mean": -0.039695803076028824, |
| "std": 0.020541805773973465, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5323729515075684, |
| "max": 0.5824694633483887, |
| "mean": 5.902071279706433e-06, |
| "std": 0.04885893687605858, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5189845561981201, |
| "max": 0.4933343231678009, |
| "mean": 0.0023664908949285746, |
| "std": 0.05344504490494728, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.2737047076225281, |
| "max": 0.31558480858802795, |
| "mean": 1.935944737851969e-06, |
| "std": 0.020050112158060074, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.3658909797668457, |
| "max": 0.7117034196853638, |
| "mean": 0.5931328535079956, |
| "std": 0.04596179351210594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.2108193188905716, |
| "max": 0.1990451216697693, |
| "mean": 3.062548057641834e-05, |
| "std": 0.034867268055677414, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18712614476680756, |
| "max": 0.20343470573425293, |
| "mean": 0.0009520579478703439, |
| "std": 0.031497176736593246, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.2896800935268402, |
| "max": 0.3398098945617676, |
| "mean": -4.6883709728717804e-05, |
| "std": 0.03458770364522934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.8768599033355713, |
| "max": 3.3869552612304688, |
| "mean": 0.014455841854214668, |
| "std": 0.8583106398582458, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.22448983788490295, |
| "max": 0.24981370568275452, |
| "mean": -3.890434527420439e-06, |
| "std": 0.042229313403367996, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.05526347830891609, |
| "max": 0.046524014323949814, |
| "mean": -2.1809362806379795e-05, |
| "std": 0.01583988219499588, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2933104932308197, |
| "max": 0.29035091400146484, |
| "mean": -7.618443305545952e-06, |
| "std": 0.04194440320134163, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.124831423163414, |
| "max": 0.25899115204811096, |
| "mean": -0.0032436971087008715, |
| "std": 0.05317322164773941, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.45623838901519775, |
| "max": 0.844422459602356, |
| "mean": 0.7054718732833862, |
| "std": 0.03522763401269913, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5120505094528198, |
| "max": 0.3482021689414978, |
| "mean": 0.00034296896774321795, |
| "std": 0.04019856080412865, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18573501706123352, |
| "max": 0.03954247012734413, |
| "mean": -0.039387013763189316, |
| "std": 0.02136080153286457, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.543980062007904, |
| "max": 0.5556398034095764, |
| "mean": -7.12752080289647e-05, |
| "std": 0.050733935087919235, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5116539001464844, |
| "max": 0.6641847491264343, |
| "mean": 0.0024422036949545145, |
| "std": 0.049520041793584824, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3325117230415344, |
| "max": 0.2653426229953766, |
| "mean": 3.3086610073951306e-06, |
| "std": 0.019387137144804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.3219893276691437, |
| "max": 0.7664631009101868, |
| "mean": 0.6510411500930786, |
| "std": 0.04532777890563011, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.2498156577348709, |
| "max": 0.2198626697063446, |
| "mean": -1.886132849904243e-06, |
| "std": 0.03650164604187012, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32695695757865906, |
| "max": 0.2867416441440582, |
| "mean": -0.000684951723087579, |
| "std": 0.03855687379837036, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.31001296639442444, |
| "max": 0.3700636327266693, |
| "mean": 6.516962457681075e-05, |
| "std": 0.036242250353097916, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.716774940490723, |
| "max": 5.807016372680664, |
| "mean": 0.03795425221323967, |
| "std": 1.4130064249038696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22152067720890045, |
| "max": 0.20586349070072174, |
| "mean": -7.513246237067506e-05, |
| "std": 0.042484886944293976, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.0776548758149147, |
| "max": 0.05150791257619858, |
| "mean": -0.0009258093778043985, |
| "std": 0.016412504017353058, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.33054521679878235, |
| "max": 0.32925283908843994, |
| "mean": -4.675353011407424e-06, |
| "std": 0.042791180312633514, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.284753680229187, |
| "max": 0.1120273545384407, |
| "mean": -0.0012038055574521422, |
| "std": 0.04701421782374382, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4860539734363556, |
| "max": 0.8868206739425659, |
| "mean": 0.7373669743537903, |
| "std": 0.03824283927679062, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.362324595451355, |
| "max": 0.27455514669418335, |
| "mean": 5.109608173370361e-05, |
| "std": 0.04064401239156723, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.24754445254802704, |
| "max": 0.046375077217817307, |
| "mean": -0.039263028651475906, |
| "std": 0.02328905090689659, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6261394023895264, |
| "max": 0.5965179204940796, |
| "mean": -5.992384103592485e-05, |
| "std": 0.053116101771593094, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7094439268112183, |
| "max": 0.2657933533191681, |
| "mean": 0.000917100696824491, |
| "std": 0.05122515559196472, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3433791399002075, |
| "max": 0.30369648337364197, |
| "mean": 2.4011274035729e-07, |
| "std": 0.019135721027851105, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.34975123405456543, |
| "max": 0.7829355597496033, |
| "mean": 0.6388096809387207, |
| "std": 0.049248941242694855, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.20544706284999847, |
| "max": 0.20679640769958496, |
| "mean": -5.99185805185698e-05, |
| "std": 0.037696123123168945, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.2586185336112976, |
| "max": 0.2680370807647705, |
| "mean": -0.00040146420360542834, |
| "std": 0.04459588602185249, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3540765345096588, |
| "max": 0.3223837912082672, |
| "mean": -6.969309197302209e-06, |
| "std": 0.03720474615693092, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.260976791381836, |
| "max": 4.204005241394043, |
| "mean": -0.026412418112158775, |
| "std": 1.0066431760787964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.23861557245254517, |
| "max": 0.24334679543972015, |
| "mean": -2.5082641514018178e-05, |
| "std": 0.04320957139134407, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06232341378927231, |
| "max": 0.056674133986234665, |
| "mean": 0.0003426429466344416, |
| "std": 0.01415110845118761, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.43692541122436523, |
| "max": 0.37342891097068787, |
| "mean": 1.4435072444030084e-05, |
| "std": 0.04412085935473442, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09643299132585526, |
| "max": 0.17589901387691498, |
| "mean": -0.0006592142744921148, |
| "std": 0.03515716642141342, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4216461777687073, |
| "max": 1.0694262981414795, |
| "mean": 0.7483195662498474, |
| "std": 0.04205932468175888, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.2665816843509674, |
| "max": 0.2969212532043457, |
| "mean": -7.953966996865347e-05, |
| "std": 0.04080412909388542, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.1857525259256363, |
| "max": 0.043901920318603516, |
| "mean": -0.036818623542785645, |
| "std": 0.025608688592910767, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4569249451160431, |
| "max": 0.4865773022174835, |
| "mean": 4.3881707824766636e-05, |
| "std": 0.05420896038413048, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.28651300072669983, |
| "max": 0.5512722134590149, |
| "mean": -0.00088057282846421, |
| "std": 0.04782658815383911, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.292865514755249, |
| "max": 0.32280707359313965, |
| "mean": 6.539526111737359e-06, |
| "std": 0.019969915971159935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.2909410893917084, |
| "max": 0.7601442337036133, |
| "mean": 0.6508233547210693, |
| "std": 0.05213604494929314, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.2434738278388977, |
| "max": 0.2616451680660248, |
| "mean": -6.040764219505945e-06, |
| "std": 0.03961297869682312, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2675459682941437, |
| "max": 0.1998538225889206, |
| "mean": -0.0008808721322566271, |
| "std": 0.05175367370247841, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.2721429765224457, |
| "max": 0.25373363494873047, |
| "mean": 4.028795956401154e-06, |
| "std": 0.03871006891131401, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -12.963708877563477, |
| "max": 15.945626258850098, |
| "mean": 0.03322511166334152, |
| "std": 1.988985300064087, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.2071155309677124, |
| "max": 0.22583135962486267, |
| "mean": -7.227471360238269e-05, |
| "std": 0.04055366292595863, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06934842467308044, |
| "max": 0.06322810798883438, |
| "mean": 0.00015266213449649513, |
| "std": 0.01474202610552311, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46502697467803955, |
| "max": 0.32068270444869995, |
| "mean": 1.9500737835187465e-05, |
| "std": 0.0405886135995388, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06406750530004501, |
| "max": 0.1152099147439003, |
| "mean": 0.0011921885889023542, |
| "std": 0.0247051939368248, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.37462663650512695, |
| "max": 0.9322708249092102, |
| "mean": 0.7508515119552612, |
| "std": 0.040188200771808624, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.27930018305778503, |
| "max": 0.2731732130050659, |
| "mean": -0.00016858182789292186, |
| "std": 0.040994688868522644, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19882012903690338, |
| "max": 0.05084774270653725, |
| "mean": -0.03202420845627785, |
| "std": 0.025111209601163864, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6573402285575867, |
| "max": 0.5352922677993774, |
| "mean": -4.871936471317895e-05, |
| "std": 0.05284557491540909, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.1931021511554718, |
| "max": 0.5820591449737549, |
| "mean": -0.0005149454809725285, |
| "std": 0.04106936603784561, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.4177095592021942, |
| "max": 0.37194108963012695, |
| "mean": 6.037503226252738e-06, |
| "std": 0.021621696650981903, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.21426498889923096, |
| "max": 0.7471067905426025, |
| "mean": 0.6495591998100281, |
| "std": 0.05437273159623146, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.20954997837543488, |
| "max": 0.19577716290950775, |
| "mean": 4.0040544263320044e-05, |
| "std": 0.03946496173739433, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.3292751908302307, |
| "max": 0.25935792922973633, |
| "mean": -0.003224420826882124, |
| "std": 0.05625506490468979, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.2056337594985962, |
| "max": 0.25471389293670654, |
| "mean": 5.435157800093293e-05, |
| "std": 0.038567062467336655, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.24283504486084, |
| "max": 6.9316864013671875, |
| "mean": 0.048334453254938126, |
| "std": 1.3849503993988037, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.20960700511932373, |
| "max": 0.23016247153282166, |
| "mean": -5.2383575166459195e-06, |
| "std": 0.04131292924284935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.043877486139535904, |
| "max": 0.035942550748586655, |
| "mean": 4.677800461649895e-06, |
| "std": 0.012800506316125393, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.39784368872642517, |
| "max": 0.3448275029659271, |
| "mean": -5.554455128731206e-05, |
| "std": 0.04238935187458992, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.05505242943763733, |
| "max": 0.06286512315273285, |
| "mean": 0.0003699597145896405, |
| "std": 0.018672524020075798, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.3501029312610626, |
| "max": 1.0451030731201172, |
| "mean": 0.7893401980400085, |
| "std": 0.04874471575021744, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.3334510326385498, |
| "max": 0.38586220145225525, |
| "mean": -0.0001694880920695141, |
| "std": 0.041480448096990585, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15723954141139984, |
| "max": 0.05913884937763214, |
| "mean": -0.031833715736866, |
| "std": 0.025140652433037758, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6964147090911865, |
| "max": 0.4686952233314514, |
| "mean": -9.150124969892204e-05, |
| "std": 0.05179166793823242, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.24826228618621826, |
| "max": 0.32854214310646057, |
| "mean": -0.00024761329405009747, |
| "std": 0.0414327047765255, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2872432768344879, |
| "max": 0.35023465752601624, |
| "mean": -2.1361338440328836e-06, |
| "std": 0.024239059537649155, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19656625390052795, |
| "max": 0.7792166471481323, |
| "mean": 0.6702941060066223, |
| "std": 0.058692529797554016, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.22861525416374207, |
| "max": 0.23119905591011047, |
| "mean": -1.981826062547043e-05, |
| "std": 0.04044099524617195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.21965257823467255, |
| "max": 0.24067652225494385, |
| "mean": 0.0007787347421981394, |
| "std": 0.05579977110028267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.215622216463089, |
| "max": 0.22666674852371216, |
| "mean": -7.155455386964604e-05, |
| "std": 0.03937716409564018, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.904394149780273, |
| "max": 9.067266464233398, |
| "mean": -0.001250309869647026, |
| "std": 1.8481073379516602, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2693168520927429, |
| "max": 0.25895655155181885, |
| "mean": 4.356484714662656e-05, |
| "std": 0.038407694548368454, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05762461572885513, |
| "max": 0.057689178735017776, |
| "mean": 0.00034963880898430943, |
| "std": 0.014724270440638065, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.2649986743927002, |
| "max": 0.28868991136550903, |
| "mean": -6.175809539854527e-05, |
| "std": 0.039074063301086426, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.043768905103206635, |
| "max": 0.0373171903192997, |
| "mean": -8.572106889914721e-05, |
| "std": 0.013365655206143856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.3394976556301117, |
| "max": 1.0926626920700073, |
| "mean": 0.86370849609375, |
| "std": 0.06385412812232971, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.42326879501342773, |
| "max": 0.419196218252182, |
| "mean": 0.00031274266075342894, |
| "std": 0.043502915650606155, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.21476341784000397, |
| "max": 0.17061911523342133, |
| "mean": -0.029481371864676476, |
| "std": 0.031948987394571304, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.5996708869934082, |
| "max": 0.5596612691879272, |
| "mean": -0.00015256566985044628, |
| "std": 0.053446218371391296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17847125232219696, |
| "max": 0.3766724169254303, |
| "mean": 0.0013643248239532113, |
| "std": 0.037309642881155014, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.39427170157432556, |
| "max": 0.3689534664154053, |
| "mean": 3.643418676801957e-05, |
| "std": 0.028621334582567215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2903065085411072, |
| "max": 0.826573371887207, |
| "mean": 0.7055738568305969, |
| "std": 0.06789194792509079, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9261522889137268, |
| "max": 1.0264601707458496, |
| "mean": -2.5637811631895602e-05, |
| "std": 0.047625649720430374, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8783160448074341, |
| "max": 0.8149734735488892, |
| "mean": -0.00031416097772307694, |
| "std": 0.09553803503513336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.2693849802017212, |
| "max": 0.24096263945102692, |
| "mean": -2.2922044081497006e-05, |
| "std": 0.03895637020468712, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.73985481262207, |
| "max": 22.84831428527832, |
| "mean": -0.09187203645706177, |
| "std": 4.069868564605713, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.227765753865242, |
| "max": 0.24508675932884216, |
| "mean": -2.5811230443650857e-05, |
| "std": 0.03863935545086861, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.06041998043656349, |
| "max": 0.046056248247623444, |
| "mean": -0.00014605963951908052, |
| "std": 0.014698919840157032, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.33846333622932434, |
| "max": 0.3745211064815521, |
| "mean": 7.246726454468444e-06, |
| "std": 0.04081542044878006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.0464671291410923, |
| "max": 0.1957084834575653, |
| "mean": 0.0002726902603171766, |
| "std": 0.013569602742791176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.3744957149028778, |
| "max": 1.1300216913223267, |
| "mean": 0.8900200724601746, |
| "std": 0.06398579478263855, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.4477945864200592, |
| "max": 0.5424723625183105, |
| "mean": 2.4591532564954832e-05, |
| "std": 0.04556761309504509, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.22407397627830505, |
| "max": 0.08826831728219986, |
| "mean": -0.03201541677117348, |
| "std": 0.03776346147060394, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7253258228302002, |
| "max": 0.6892617344856262, |
| "mean": 3.4524080547271296e-05, |
| "std": 0.05177822336554527, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.1745493859052658, |
| "max": 0.21855643391609192, |
| "mean": 4.002213245257735e-05, |
| "std": 0.0317784883081913, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.3402628004550934, |
| "max": 0.37424033880233765, |
| "mean": 4.292904486646876e-05, |
| "std": 0.03414493426680565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3175790011882782, |
| "max": 1.2868926525115967, |
| "mean": 0.6014685034751892, |
| "std": 0.0834617167711258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.28334787487983704, |
| "max": 0.26021766662597656, |
| "mean": -3.078439021919621e-06, |
| "std": 0.03598484769463539, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23551659286022186, |
| "max": 0.20537099242210388, |
| "mean": 0.0002320160565432161, |
| "std": 0.056010857224464417, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.4354335069656372, |
| "max": 0.3252001106739044, |
| "mean": 2.4517319616279565e-05, |
| "std": 0.03413575515151024, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.544912338256836, |
| "max": 7.312640190124512, |
| "mean": -0.007366480305790901, |
| "std": 0.6992346048355103, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.343842089176178, |
| "max": 0.36349090933799744, |
| "mean": 0.0001033815206028521, |
| "std": 0.04782803729176521, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07375385612249374, |
| "max": 0.06036338210105896, |
| "mean": 0.0009326335857622325, |
| "std": 0.014949528500437737, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.25554072856903076, |
| "max": 0.28654900193214417, |
| "mean": 4.4343978515826166e-06, |
| "std": 0.041555255651474, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05532766133546829, |
| "max": 0.06282689422369003, |
| "mean": 0.00014148413902148604, |
| "std": 0.007174154743552208, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49368223547935486, |
| "max": 1.2208430767059326, |
| "mean": 1.0134273767471313, |
| "std": 0.11743992567062378, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0936156511306763, |
| "max": 1.0469433069229126, |
| "mean": -4.977267235517502e-05, |
| "std": 0.05241084843873978, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.22367312014102936, |
| "max": 0.17280347645282745, |
| "mean": -0.02724579907953739, |
| "std": 0.03635029122233391, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8845533132553101, |
| "max": 0.9224876165390015, |
| "mean": -0.000146063175634481, |
| "std": 0.053282301872968674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17102308571338654, |
| "max": 0.37991419434547424, |
| "mean": 0.003368670353665948, |
| "std": 0.03989797830581665, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7772527933120728, |
| "max": 0.7234945297241211, |
| "mean": 1.913893902383279e-05, |
| "std": 0.04616517201066017, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.3385581970214844, |
| "max": 1.4277539253234863, |
| "mean": 0.9483213424682617, |
| "std": 0.20673882961273193, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7455896139144897, |
| "max": 1.7045435905456543, |
| "mean": 0.00022695818915963173, |
| "std": 0.15868604183197021, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.199622631072998, |
| "max": 1.099592685699463, |
| "mean": -0.00953536294400692, |
| "std": 0.203833669424057, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4213031232357025, |
| "max": 0.42637819051742554, |
| "mean": 6.450257205870003e-05, |
| "std": 0.048018429428339005, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.743934631347656, |
| "max": 19.539039611816406, |
| "mean": -0.24830012023448944, |
| "std": 4.776192665100098, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.32387086749076843, |
| "max": 0.4384032189846039, |
| "mean": -1.2015252650598995e-05, |
| "std": 0.046161383390426636, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.0340605154633522, |
| "max": 0.037125036120414734, |
| "mean": 0.0006421188591048121, |
| "std": 0.012921434827148914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.703487753868103, |
| "max": 0.6645694375038147, |
| "mean": 4.3493168050190434e-05, |
| "std": 0.0578836165368557, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.0722307413816452, |
| "max": 0.06750312447547913, |
| "mean": -0.00013278273399919271, |
| "std": 0.012919807806611061, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.3801887333393097, |
| "max": 1.3909631967544556, |
| "mean": 1.0665581226348877, |
| "std": 0.2197146713733673, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6164069175720215, |
| "max": 0.7170259952545166, |
| "mean": 0.00011130621714983135, |
| "std": 0.058021292090415955, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.21958374977111816, |
| "max": 0.2251792550086975, |
| "mean": 0.0062429094687104225, |
| "std": 0.04972800984978676, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6296579241752625, |
| "max": 0.8892135620117188, |
| "mean": 1.1699157766997814e-05, |
| "std": 0.023528022691607475, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5068321824073792, |
| "max": 0.4739873707294464, |
| "mean": -0.003016006201505661, |
| "std": 0.06930257380008698, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5377203226089478, |
| "max": 1.1807109117507935, |
| "mean": 0.7827430367469788, |
| "std": 0.09885811805725098, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.2669532299041748, |
| "max": 0.2126723825931549, |
| "mean": -0.00022305321181192994, |
| "std": 0.05399656668305397, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23791296780109406, |
| "max": 0.014832733199000359, |
| "mean": -0.04395970329642296, |
| "std": 0.03433232381939888, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |