| { |
| "layer_types": { |
| "transformer": 391 |
| }, |
| "parameter_counts": { |
| "transformer.time_embed.time_mlp.0.weight": 262144, |
| "transformer.time_embed.time_mlp.0.bias": 1024, |
| "transformer.time_embed.time_mlp.2.weight": 1048576, |
| "transformer.time_embed.time_mlp.2.bias": 1024, |
| "transformer.text_embed.text_embed.weight": 254600, |
| "transformer.input_embed.proj.weight": 307200, |
| "transformer.input_embed.proj.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, |
| "transformer.layers.0.1.g": 1024, |
| "transformer.layers.0.2.to_q.weight": 1048576, |
| "transformer.layers.0.2.to_q.bias": 1024, |
| "transformer.layers.0.2.to_k.weight": 1048576, |
| "transformer.layers.0.2.to_k.bias": 1024, |
| "transformer.layers.0.2.to_v.weight": 1048576, |
| "transformer.layers.0.2.to_v.bias": 1024, |
| "transformer.layers.0.2.to_out.0.weight": 1048576, |
| "transformer.layers.0.2.to_out.0.bias": 1024, |
| "transformer.layers.0.3.g": 1024, |
| "transformer.layers.0.4.ff.0.0.weight": 4194304, |
| "transformer.layers.0.4.ff.0.0.bias": 4096, |
| "transformer.layers.0.4.ff.2.weight": 4194304, |
| "transformer.layers.0.4.ff.2.bias": 1024, |
| "transformer.layers.1.1.g": 1024, |
| "transformer.layers.1.2.to_q.weight": 1048576, |
| "transformer.layers.1.2.to_q.bias": 1024, |
| "transformer.layers.1.2.to_k.weight": 1048576, |
| "transformer.layers.1.2.to_k.bias": 1024, |
| "transformer.layers.1.2.to_v.weight": 1048576, |
| "transformer.layers.1.2.to_v.bias": 1024, |
| "transformer.layers.1.2.to_out.0.weight": 1048576, |
| "transformer.layers.1.2.to_out.0.bias": 1024, |
| "transformer.layers.1.3.g": 1024, |
| "transformer.layers.1.4.ff.0.0.weight": 4194304, |
| "transformer.layers.1.4.ff.0.0.bias": 4096, |
| "transformer.layers.1.4.ff.2.weight": 4194304, |
| "transformer.layers.1.4.ff.2.bias": 1024, |
| "transformer.layers.2.1.g": 1024, |
| "transformer.layers.2.2.to_q.weight": 1048576, |
| "transformer.layers.2.2.to_q.bias": 1024, |
| "transformer.layers.2.2.to_k.weight": 1048576, |
| "transformer.layers.2.2.to_k.bias": 1024, |
| "transformer.layers.2.2.to_v.weight": 1048576, |
| "transformer.layers.2.2.to_v.bias": 1024, |
| "transformer.layers.2.2.to_out.0.weight": 1048576, |
| "transformer.layers.2.2.to_out.0.bias": 1024, |
| "transformer.layers.2.3.g": 1024, |
| "transformer.layers.2.4.ff.0.0.weight": 4194304, |
| "transformer.layers.2.4.ff.0.0.bias": 4096, |
| "transformer.layers.2.4.ff.2.weight": 4194304, |
| "transformer.layers.2.4.ff.2.bias": 1024, |
| "transformer.layers.3.1.g": 1024, |
| "transformer.layers.3.2.to_q.weight": 1048576, |
| "transformer.layers.3.2.to_q.bias": 1024, |
| "transformer.layers.3.2.to_k.weight": 1048576, |
| "transformer.layers.3.2.to_k.bias": 1024, |
| "transformer.layers.3.2.to_v.weight": 1048576, |
| "transformer.layers.3.2.to_v.bias": 1024, |
| "transformer.layers.3.2.to_out.0.weight": 1048576, |
| "transformer.layers.3.2.to_out.0.bias": 1024, |
| "transformer.layers.3.3.g": 1024, |
| "transformer.layers.3.4.ff.0.0.weight": 4194304, |
| "transformer.layers.3.4.ff.0.0.bias": 4096, |
| "transformer.layers.3.4.ff.2.weight": 4194304, |
| "transformer.layers.3.4.ff.2.bias": 1024, |
| "transformer.layers.4.1.g": 1024, |
| "transformer.layers.4.2.to_q.weight": 1048576, |
| "transformer.layers.4.2.to_q.bias": 1024, |
| "transformer.layers.4.2.to_k.weight": 1048576, |
| "transformer.layers.4.2.to_k.bias": 1024, |
| "transformer.layers.4.2.to_v.weight": 1048576, |
| "transformer.layers.4.2.to_v.bias": 1024, |
| "transformer.layers.4.2.to_out.0.weight": 1048576, |
| "transformer.layers.4.2.to_out.0.bias": 1024, |
| "transformer.layers.4.3.g": 1024, |
| "transformer.layers.4.4.ff.0.0.weight": 4194304, |
| "transformer.layers.4.4.ff.0.0.bias": 4096, |
| "transformer.layers.4.4.ff.2.weight": 4194304, |
| "transformer.layers.4.4.ff.2.bias": 1024, |
| "transformer.layers.5.1.g": 1024, |
| "transformer.layers.5.2.to_q.weight": 1048576, |
| "transformer.layers.5.2.to_q.bias": 1024, |
| "transformer.layers.5.2.to_k.weight": 1048576, |
| "transformer.layers.5.2.to_k.bias": 1024, |
| "transformer.layers.5.2.to_v.weight": 1048576, |
| "transformer.layers.5.2.to_v.bias": 1024, |
| "transformer.layers.5.2.to_out.0.weight": 1048576, |
| "transformer.layers.5.2.to_out.0.bias": 1024, |
| "transformer.layers.5.3.g": 1024, |
| "transformer.layers.5.4.ff.0.0.weight": 4194304, |
| "transformer.layers.5.4.ff.0.0.bias": 4096, |
| "transformer.layers.5.4.ff.2.weight": 4194304, |
| "transformer.layers.5.4.ff.2.bias": 1024, |
| "transformer.layers.6.1.g": 1024, |
| "transformer.layers.6.2.to_q.weight": 1048576, |
| "transformer.layers.6.2.to_q.bias": 1024, |
| "transformer.layers.6.2.to_k.weight": 1048576, |
| "transformer.layers.6.2.to_k.bias": 1024, |
| "transformer.layers.6.2.to_v.weight": 1048576, |
| "transformer.layers.6.2.to_v.bias": 1024, |
| "transformer.layers.6.2.to_out.0.weight": 1048576, |
| "transformer.layers.6.2.to_out.0.bias": 1024, |
| "transformer.layers.6.3.g": 1024, |
| "transformer.layers.6.4.ff.0.0.weight": 4194304, |
| "transformer.layers.6.4.ff.0.0.bias": 4096, |
| "transformer.layers.6.4.ff.2.weight": 4194304, |
| "transformer.layers.6.4.ff.2.bias": 1024, |
| "transformer.layers.7.1.g": 1024, |
| "transformer.layers.7.2.to_q.weight": 1048576, |
| "transformer.layers.7.2.to_q.bias": 1024, |
| "transformer.layers.7.2.to_k.weight": 1048576, |
| "transformer.layers.7.2.to_k.bias": 1024, |
| "transformer.layers.7.2.to_v.weight": 1048576, |
| "transformer.layers.7.2.to_v.bias": 1024, |
| "transformer.layers.7.2.to_out.0.weight": 1048576, |
| "transformer.layers.7.2.to_out.0.bias": 1024, |
| "transformer.layers.7.3.g": 1024, |
| "transformer.layers.7.4.ff.0.0.weight": 4194304, |
| "transformer.layers.7.4.ff.0.0.bias": 4096, |
| "transformer.layers.7.4.ff.2.weight": 4194304, |
| "transformer.layers.7.4.ff.2.bias": 1024, |
| "transformer.layers.8.1.g": 1024, |
| "transformer.layers.8.2.to_q.weight": 1048576, |
| "transformer.layers.8.2.to_q.bias": 1024, |
| "transformer.layers.8.2.to_k.weight": 1048576, |
| "transformer.layers.8.2.to_k.bias": 1024, |
| "transformer.layers.8.2.to_v.weight": 1048576, |
| "transformer.layers.8.2.to_v.bias": 1024, |
| "transformer.layers.8.2.to_out.0.weight": 1048576, |
| "transformer.layers.8.2.to_out.0.bias": 1024, |
| "transformer.layers.8.3.g": 1024, |
| "transformer.layers.8.4.ff.0.0.weight": 4194304, |
| "transformer.layers.8.4.ff.0.0.bias": 4096, |
| "transformer.layers.8.4.ff.2.weight": 4194304, |
| "transformer.layers.8.4.ff.2.bias": 1024, |
| "transformer.layers.9.1.g": 1024, |
| "transformer.layers.9.2.to_q.weight": 1048576, |
| "transformer.layers.9.2.to_q.bias": 1024, |
| "transformer.layers.9.2.to_k.weight": 1048576, |
| "transformer.layers.9.2.to_k.bias": 1024, |
| "transformer.layers.9.2.to_v.weight": 1048576, |
| "transformer.layers.9.2.to_v.bias": 1024, |
| "transformer.layers.9.2.to_out.0.weight": 1048576, |
| "transformer.layers.9.2.to_out.0.bias": 1024, |
| "transformer.layers.9.3.g": 1024, |
| "transformer.layers.9.4.ff.0.0.weight": 4194304, |
| "transformer.layers.9.4.ff.0.0.bias": 4096, |
| "transformer.layers.9.4.ff.2.weight": 4194304, |
| "transformer.layers.9.4.ff.2.bias": 1024, |
| "transformer.layers.10.1.g": 1024, |
| "transformer.layers.10.2.to_q.weight": 1048576, |
| "transformer.layers.10.2.to_q.bias": 1024, |
| "transformer.layers.10.2.to_k.weight": 1048576, |
| "transformer.layers.10.2.to_k.bias": 1024, |
| "transformer.layers.10.2.to_v.weight": 1048576, |
| "transformer.layers.10.2.to_v.bias": 1024, |
| "transformer.layers.10.2.to_out.0.weight": 1048576, |
| "transformer.layers.10.2.to_out.0.bias": 1024, |
| "transformer.layers.10.3.g": 1024, |
| "transformer.layers.10.4.ff.0.0.weight": 4194304, |
| "transformer.layers.10.4.ff.0.0.bias": 4096, |
| "transformer.layers.10.4.ff.2.weight": 4194304, |
| "transformer.layers.10.4.ff.2.bias": 1024, |
| "transformer.layers.11.1.g": 1024, |
| "transformer.layers.11.2.to_q.weight": 1048576, |
| "transformer.layers.11.2.to_q.bias": 1024, |
| "transformer.layers.11.2.to_k.weight": 1048576, |
| "transformer.layers.11.2.to_k.bias": 1024, |
| "transformer.layers.11.2.to_v.weight": 1048576, |
| "transformer.layers.11.2.to_v.bias": 1024, |
| "transformer.layers.11.2.to_out.0.weight": 1048576, |
| "transformer.layers.11.2.to_out.0.bias": 1024, |
| "transformer.layers.11.3.g": 1024, |
| "transformer.layers.11.4.ff.0.0.weight": 4194304, |
| "transformer.layers.11.4.ff.0.0.bias": 4096, |
| "transformer.layers.11.4.ff.2.weight": 4194304, |
| "transformer.layers.11.4.ff.2.bias": 1024, |
| "transformer.layers.12.1.g": 1024, |
| "transformer.layers.12.2.to_q.weight": 1048576, |
| "transformer.layers.12.2.to_q.bias": 1024, |
| "transformer.layers.12.2.to_k.weight": 1048576, |
| "transformer.layers.12.2.to_k.bias": 1024, |
| "transformer.layers.12.2.to_v.weight": 1048576, |
| "transformer.layers.12.2.to_v.bias": 1024, |
| "transformer.layers.12.2.to_out.0.weight": 1048576, |
| "transformer.layers.12.2.to_out.0.bias": 1024, |
| "transformer.layers.12.3.g": 1024, |
| "transformer.layers.12.4.ff.0.0.weight": 4194304, |
| "transformer.layers.12.4.ff.0.0.bias": 4096, |
| "transformer.layers.12.4.ff.2.weight": 4194304, |
| "transformer.layers.12.4.ff.2.bias": 1024, |
| "transformer.layers.13.0.weight": 2097152, |
| "transformer.layers.13.1.g": 1024, |
| "transformer.layers.13.2.to_q.weight": 1048576, |
| "transformer.layers.13.2.to_q.bias": 1024, |
| "transformer.layers.13.2.to_k.weight": 1048576, |
| "transformer.layers.13.2.to_k.bias": 1024, |
| "transformer.layers.13.2.to_v.weight": 1048576, |
| "transformer.layers.13.2.to_v.bias": 1024, |
| "transformer.layers.13.2.to_out.0.weight": 1048576, |
| "transformer.layers.13.2.to_out.0.bias": 1024, |
| "transformer.layers.13.3.g": 1024, |
| "transformer.layers.13.4.ff.0.0.weight": 4194304, |
| "transformer.layers.13.4.ff.0.0.bias": 4096, |
| "transformer.layers.13.4.ff.2.weight": 4194304, |
| "transformer.layers.13.4.ff.2.bias": 1024, |
| "transformer.layers.14.0.weight": 2097152, |
| "transformer.layers.14.1.g": 1024, |
| "transformer.layers.14.2.to_q.weight": 1048576, |
| "transformer.layers.14.2.to_q.bias": 1024, |
| "transformer.layers.14.2.to_k.weight": 1048576, |
| "transformer.layers.14.2.to_k.bias": 1024, |
| "transformer.layers.14.2.to_v.weight": 1048576, |
| "transformer.layers.14.2.to_v.bias": 1024, |
| "transformer.layers.14.2.to_out.0.weight": 1048576, |
| "transformer.layers.14.2.to_out.0.bias": 1024, |
| "transformer.layers.14.3.g": 1024, |
| "transformer.layers.14.4.ff.0.0.weight": 4194304, |
| "transformer.layers.14.4.ff.0.0.bias": 4096, |
| "transformer.layers.14.4.ff.2.weight": 4194304, |
| "transformer.layers.14.4.ff.2.bias": 1024, |
| "transformer.layers.15.0.weight": 2097152, |
| "transformer.layers.15.1.g": 1024, |
| "transformer.layers.15.2.to_q.weight": 1048576, |
| "transformer.layers.15.2.to_q.bias": 1024, |
| "transformer.layers.15.2.to_k.weight": 1048576, |
| "transformer.layers.15.2.to_k.bias": 1024, |
| "transformer.layers.15.2.to_v.weight": 1048576, |
| "transformer.layers.15.2.to_v.bias": 1024, |
| "transformer.layers.15.2.to_out.0.weight": 1048576, |
| "transformer.layers.15.2.to_out.0.bias": 1024, |
| "transformer.layers.15.3.g": 1024, |
| "transformer.layers.15.4.ff.0.0.weight": 4194304, |
| "transformer.layers.15.4.ff.0.0.bias": 4096, |
| "transformer.layers.15.4.ff.2.weight": 4194304, |
| "transformer.layers.15.4.ff.2.bias": 1024, |
| "transformer.layers.16.0.weight": 2097152, |
| "transformer.layers.16.1.g": 1024, |
| "transformer.layers.16.2.to_q.weight": 1048576, |
| "transformer.layers.16.2.to_q.bias": 1024, |
| "transformer.layers.16.2.to_k.weight": 1048576, |
| "transformer.layers.16.2.to_k.bias": 1024, |
| "transformer.layers.16.2.to_v.weight": 1048576, |
| "transformer.layers.16.2.to_v.bias": 1024, |
| "transformer.layers.16.2.to_out.0.weight": 1048576, |
| "transformer.layers.16.2.to_out.0.bias": 1024, |
| "transformer.layers.16.3.g": 1024, |
| "transformer.layers.16.4.ff.0.0.weight": 4194304, |
| "transformer.layers.16.4.ff.0.0.bias": 4096, |
| "transformer.layers.16.4.ff.2.weight": 4194304, |
| "transformer.layers.16.4.ff.2.bias": 1024, |
| "transformer.layers.17.0.weight": 2097152, |
| "transformer.layers.17.1.g": 1024, |
| "transformer.layers.17.2.to_q.weight": 1048576, |
| "transformer.layers.17.2.to_q.bias": 1024, |
| "transformer.layers.17.2.to_k.weight": 1048576, |
| "transformer.layers.17.2.to_k.bias": 1024, |
| "transformer.layers.17.2.to_v.weight": 1048576, |
| "transformer.layers.17.2.to_v.bias": 1024, |
| "transformer.layers.17.2.to_out.0.weight": 1048576, |
| "transformer.layers.17.2.to_out.0.bias": 1024, |
| "transformer.layers.17.3.g": 1024, |
| "transformer.layers.17.4.ff.0.0.weight": 4194304, |
| "transformer.layers.17.4.ff.0.0.bias": 4096, |
| "transformer.layers.17.4.ff.2.weight": 4194304, |
| "transformer.layers.17.4.ff.2.bias": 1024, |
| "transformer.layers.18.0.weight": 2097152, |
| "transformer.layers.18.1.g": 1024, |
| "transformer.layers.18.2.to_q.weight": 1048576, |
| "transformer.layers.18.2.to_q.bias": 1024, |
| "transformer.layers.18.2.to_k.weight": 1048576, |
| "transformer.layers.18.2.to_k.bias": 1024, |
| "transformer.layers.18.2.to_v.weight": 1048576, |
| "transformer.layers.18.2.to_v.bias": 1024, |
| "transformer.layers.18.2.to_out.0.weight": 1048576, |
| "transformer.layers.18.2.to_out.0.bias": 1024, |
| "transformer.layers.18.3.g": 1024, |
| "transformer.layers.18.4.ff.0.0.weight": 4194304, |
| "transformer.layers.18.4.ff.0.0.bias": 4096, |
| "transformer.layers.18.4.ff.2.weight": 4194304, |
| "transformer.layers.18.4.ff.2.bias": 1024, |
| "transformer.layers.19.0.weight": 2097152, |
| "transformer.layers.19.1.g": 1024, |
| "transformer.layers.19.2.to_q.weight": 1048576, |
| "transformer.layers.19.2.to_q.bias": 1024, |
| "transformer.layers.19.2.to_k.weight": 1048576, |
| "transformer.layers.19.2.to_k.bias": 1024, |
| "transformer.layers.19.2.to_v.weight": 1048576, |
| "transformer.layers.19.2.to_v.bias": 1024, |
| "transformer.layers.19.2.to_out.0.weight": 1048576, |
| "transformer.layers.19.2.to_out.0.bias": 1024, |
| "transformer.layers.19.3.g": 1024, |
| "transformer.layers.19.4.ff.0.0.weight": 4194304, |
| "transformer.layers.19.4.ff.0.0.bias": 4096, |
| "transformer.layers.19.4.ff.2.weight": 4194304, |
| "transformer.layers.19.4.ff.2.bias": 1024, |
| "transformer.layers.20.0.weight": 2097152, |
| "transformer.layers.20.1.g": 1024, |
| "transformer.layers.20.2.to_q.weight": 1048576, |
| "transformer.layers.20.2.to_q.bias": 1024, |
| "transformer.layers.20.2.to_k.weight": 1048576, |
| "transformer.layers.20.2.to_k.bias": 1024, |
| "transformer.layers.20.2.to_v.weight": 1048576, |
| "transformer.layers.20.2.to_v.bias": 1024, |
| "transformer.layers.20.2.to_out.0.weight": 1048576, |
| "transformer.layers.20.2.to_out.0.bias": 1024, |
| "transformer.layers.20.3.g": 1024, |
| "transformer.layers.20.4.ff.0.0.weight": 4194304, |
| "transformer.layers.20.4.ff.0.0.bias": 4096, |
| "transformer.layers.20.4.ff.2.weight": 4194304, |
| "transformer.layers.20.4.ff.2.bias": 1024, |
| "transformer.layers.21.0.weight": 2097152, |
| "transformer.layers.21.1.g": 1024, |
| "transformer.layers.21.2.to_q.weight": 1048576, |
| "transformer.layers.21.2.to_q.bias": 1024, |
| "transformer.layers.21.2.to_k.weight": 1048576, |
| "transformer.layers.21.2.to_k.bias": 1024, |
| "transformer.layers.21.2.to_v.weight": 1048576, |
| "transformer.layers.21.2.to_v.bias": 1024, |
| "transformer.layers.21.2.to_out.0.weight": 1048576, |
| "transformer.layers.21.2.to_out.0.bias": 1024, |
| "transformer.layers.21.3.g": 1024, |
| "transformer.layers.21.4.ff.0.0.weight": 4194304, |
| "transformer.layers.21.4.ff.0.0.bias": 4096, |
| "transformer.layers.21.4.ff.2.weight": 4194304, |
| "transformer.layers.21.4.ff.2.bias": 1024, |
| "transformer.layers.22.0.weight": 2097152, |
| "transformer.layers.22.1.g": 1024, |
| "transformer.layers.22.2.to_q.weight": 1048576, |
| "transformer.layers.22.2.to_q.bias": 1024, |
| "transformer.layers.22.2.to_k.weight": 1048576, |
| "transformer.layers.22.2.to_k.bias": 1024, |
| "transformer.layers.22.2.to_v.weight": 1048576, |
| "transformer.layers.22.2.to_v.bias": 1024, |
| "transformer.layers.22.2.to_out.0.weight": 1048576, |
| "transformer.layers.22.2.to_out.0.bias": 1024, |
| "transformer.layers.22.3.g": 1024, |
| "transformer.layers.22.4.ff.0.0.weight": 4194304, |
| "transformer.layers.22.4.ff.0.0.bias": 4096, |
| "transformer.layers.22.4.ff.2.weight": 4194304, |
| "transformer.layers.22.4.ff.2.bias": 1024, |
| "transformer.layers.23.0.weight": 2097152, |
| "transformer.layers.23.1.g": 1024, |
| "transformer.layers.23.2.to_q.weight": 1048576, |
| "transformer.layers.23.2.to_q.bias": 1024, |
| "transformer.layers.23.2.to_k.weight": 1048576, |
| "transformer.layers.23.2.to_k.bias": 1024, |
| "transformer.layers.23.2.to_v.weight": 1048576, |
| "transformer.layers.23.2.to_v.bias": 1024, |
| "transformer.layers.23.2.to_out.0.weight": 1048576, |
| "transformer.layers.23.2.to_out.0.bias": 1024, |
| "transformer.layers.23.3.g": 1024, |
| "transformer.layers.23.4.ff.0.0.weight": 4194304, |
| "transformer.layers.23.4.ff.0.0.bias": 4096, |
| "transformer.layers.23.4.ff.2.weight": 4194304, |
| "transformer.layers.23.4.ff.2.bias": 1024, |
| "transformer.layers.24.0.weight": 2097152, |
| "transformer.layers.24.1.g": 1024, |
| "transformer.layers.24.2.to_q.weight": 1048576, |
| "transformer.layers.24.2.to_q.bias": 1024, |
| "transformer.layers.24.2.to_k.weight": 1048576, |
| "transformer.layers.24.2.to_k.bias": 1024, |
| "transformer.layers.24.2.to_v.weight": 1048576, |
| "transformer.layers.24.2.to_v.bias": 1024, |
| "transformer.layers.24.2.to_out.0.weight": 1048576, |
| "transformer.layers.24.2.to_out.0.bias": 1024, |
| "transformer.layers.24.3.g": 1024, |
| "transformer.layers.24.4.ff.0.0.weight": 4194304, |
| "transformer.layers.24.4.ff.0.0.bias": 4096, |
| "transformer.layers.24.4.ff.2.weight": 4194304, |
| "transformer.layers.24.4.ff.2.bias": 1024, |
| "transformer.layers.25.0.weight": 2097152, |
| "transformer.layers.25.1.g": 1024, |
| "transformer.layers.25.2.to_q.weight": 1048576, |
| "transformer.layers.25.2.to_q.bias": 1024, |
| "transformer.layers.25.2.to_k.weight": 1048576, |
| "transformer.layers.25.2.to_k.bias": 1024, |
| "transformer.layers.25.2.to_v.weight": 1048576, |
| "transformer.layers.25.2.to_v.bias": 1024, |
| "transformer.layers.25.2.to_out.0.weight": 1048576, |
| "transformer.layers.25.2.to_out.0.bias": 1024, |
| "transformer.layers.25.3.g": 1024, |
| "transformer.layers.25.4.ff.0.0.weight": 4194304, |
| "transformer.layers.25.4.ff.0.0.bias": 4096, |
| "transformer.layers.25.4.ff.2.weight": 4194304, |
| "transformer.layers.25.4.ff.2.bias": 1024, |
| "transformer.norm_out.g": 1024, |
| "transformer.proj_out.weight": 102400, |
| "transformer.proj_out.bias": 100 |
| }, |
| "important_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ], |
| "bottleneck_layers": [], |
| "recommendations": { |
| "focus_layers": [ |
| "transformer.time_embed.time_mlp.0.weight", |
| "transformer.time_embed.time_mlp.2.weight", |
| "transformer.text_embed.text_embed.weight", |
| "transformer.input_embed.proj.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight", |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight", |
| "transformer.layers.0.2.to_q.weight", |
| "transformer.layers.0.2.to_k.weight", |
| "transformer.layers.0.2.to_v.weight", |
| "transformer.layers.0.2.to_out.0.weight", |
| "transformer.layers.0.4.ff.0.0.weight", |
| "transformer.layers.0.4.ff.2.weight", |
| "transformer.layers.1.2.to_q.weight", |
| "transformer.layers.1.2.to_k.weight", |
| "transformer.layers.1.2.to_v.weight", |
| "transformer.layers.1.2.to_out.0.weight", |
| "transformer.layers.1.4.ff.0.0.weight", |
| "transformer.layers.1.4.ff.2.weight", |
| "transformer.layers.2.2.to_q.weight", |
| "transformer.layers.2.2.to_k.weight", |
| "transformer.layers.2.2.to_v.weight", |
| "transformer.layers.2.2.to_out.0.weight", |
| "transformer.layers.2.4.ff.0.0.weight", |
| "transformer.layers.2.4.ff.2.weight", |
| "transformer.layers.3.2.to_q.weight", |
| "transformer.layers.3.2.to_k.weight", |
| "transformer.layers.3.2.to_v.weight", |
| "transformer.layers.3.2.to_out.0.weight", |
| "transformer.layers.3.4.ff.0.0.weight", |
| "transformer.layers.3.4.ff.2.weight", |
| "transformer.layers.4.2.to_q.weight", |
| "transformer.layers.4.2.to_k.weight", |
| "transformer.layers.4.2.to_v.weight", |
| "transformer.layers.4.2.to_out.0.weight", |
| "transformer.layers.4.4.ff.0.0.weight", |
| "transformer.layers.4.4.ff.2.weight", |
| "transformer.layers.5.2.to_q.weight", |
| "transformer.layers.5.2.to_k.weight", |
| "transformer.layers.5.2.to_v.weight", |
| "transformer.layers.5.2.to_out.0.weight", |
| "transformer.layers.5.4.ff.0.0.weight", |
| "transformer.layers.5.4.ff.2.weight", |
| "transformer.layers.6.2.to_q.weight", |
| "transformer.layers.6.2.to_k.weight", |
| "transformer.layers.6.2.to_v.weight", |
| "transformer.layers.6.2.to_out.0.weight", |
| "transformer.layers.6.4.ff.0.0.weight", |
| "transformer.layers.6.4.ff.2.weight", |
| "transformer.layers.7.2.to_q.weight", |
| "transformer.layers.7.2.to_k.weight", |
| "transformer.layers.7.2.to_v.weight", |
| "transformer.layers.7.2.to_out.0.weight", |
| "transformer.layers.7.4.ff.0.0.weight", |
| "transformer.layers.7.4.ff.2.weight", |
| "transformer.layers.8.4.ff.0.0.weight", |
| "transformer.layers.8.4.ff.2.weight", |
| "transformer.layers.9.4.ff.0.0.weight", |
| "transformer.layers.9.4.ff.2.weight", |
| "transformer.layers.10.4.ff.0.0.weight", |
| "transformer.layers.10.4.ff.2.weight", |
| "transformer.layers.11.4.ff.0.0.weight", |
| "transformer.layers.11.4.ff.2.weight", |
| "transformer.layers.12.4.ff.0.0.weight", |
| "transformer.layers.12.4.ff.2.weight", |
| "transformer.layers.13.0.weight", |
| "transformer.layers.13.4.ff.0.0.weight", |
| "transformer.layers.13.4.ff.2.weight", |
| "transformer.layers.14.0.weight", |
| "transformer.layers.14.4.ff.0.0.weight", |
| "transformer.layers.14.4.ff.2.weight", |
| "transformer.layers.15.0.weight", |
| "transformer.layers.15.4.ff.0.0.weight", |
| "transformer.layers.15.4.ff.2.weight", |
| "transformer.layers.16.4.ff.0.0.weight", |
| "transformer.layers.16.4.ff.2.weight", |
| "transformer.layers.17.4.ff.0.0.weight", |
| "transformer.layers.17.4.ff.2.weight", |
| "transformer.layers.18.4.ff.0.0.weight", |
| "transformer.layers.18.4.ff.2.weight", |
| "transformer.layers.19.4.ff.0.0.weight", |
| "transformer.layers.19.4.ff.2.weight", |
| "transformer.layers.20.4.ff.0.0.weight", |
| "transformer.layers.20.4.ff.2.weight", |
| "transformer.layers.21.4.ff.0.0.weight", |
| "transformer.layers.21.4.ff.2.weight", |
| "transformer.layers.22.4.ff.0.0.weight", |
| "transformer.layers.22.4.ff.2.weight", |
| "transformer.layers.23.4.ff.0.0.weight", |
| "transformer.layers.23.4.ff.2.weight", |
| "transformer.layers.24.4.ff.0.0.weight", |
| "transformer.layers.24.4.ff.2.weight", |
| "transformer.layers.25.4.ff.0.0.weight", |
| "transformer.layers.25.4.ff.2.weight" |
| ] |
| }, |
| "total_parameters": 391, |
| "total_elements": 360755948, |
| "param_ranges": { |
| "transformer.time_embed.time_mlp.0.weight": { |
| "min": -0.4310249388217926, |
| "max": 0.29892200231552124, |
| "mean": -0.0025504794903099537, |
| "std": 0.0425548329949379, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 256 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.0.bias": { |
| "min": -0.06312082707881927, |
| "max": 0.10854886472225189, |
| "mean": 0.000634247378911823, |
| "std": 0.03414047509431839, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.weight": { |
| "min": -0.4126858711242676, |
| "max": 0.8365619778633118, |
| "mean": -0.00020620696886908263, |
| "std": 0.02410798706114292, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.time_embed.time_mlp.2.bias": { |
| "min": -0.1163593977689743, |
| "max": 0.32443463802337646, |
| "mean": -0.0009363778517581522, |
| "std": 0.019653797149658203, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.text_embed.text_embed.weight": { |
| "min": -2.8154137134552, |
| "max": 2.8935482501983643, |
| "mean": -0.0003568639513105154, |
| "std": 0.6153793334960938, |
| "sparsity": 0.0, |
| "shape": [ |
| 2546, |
| 100 |
| ] |
| }, |
| "transformer.input_embed.proj.weight": { |
| "min": -0.2813769578933716, |
| "max": 0.38245514035224915, |
| "mean": 0.00042411635513417423, |
| "std": 0.04274803400039673, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 300 |
| ] |
| }, |
| "transformer.input_embed.proj.bias": { |
| "min": -0.22421328723430634, |
| "max": 0.21138469874858856, |
| "mean": -0.004506870172917843, |
| "std": 0.04105628281831741, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { |
| "min": -0.4279993176460266, |
| "max": 0.47548574209213257, |
| "mean": 4.261187768861419e-06, |
| "std": 0.02450713701546192, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { |
| "min": -0.327997088432312, |
| "max": 0.15884317457675934, |
| "mean": -0.04679153859615326, |
| "std": 0.05176762491464615, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { |
| "min": -0.4111199676990509, |
| "max": 0.35511136054992676, |
| "mean": -0.00012967045768164098, |
| "std": 0.02359858900308609, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 64, |
| 31 |
| ] |
| }, |
| "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { |
| "min": -0.23166728019714355, |
| "max": 0.26478779315948486, |
| "mean": -0.029217107221484184, |
| "std": 0.0495423898100853, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.1.g": { |
| "min": 0.2546941041946411, |
| "max": 0.8268164992332458, |
| "mean": 0.5258853435516357, |
| "std": 0.08176200091838837, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.weight": { |
| "min": -0.29768767952919006, |
| "max": 0.26705101132392883, |
| "mean": -0.00042415110510773957, |
| "std": 0.03210066258907318, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_q.bias": { |
| "min": -0.09323342144489288, |
| "max": 0.12589719891548157, |
| "mean": 0.0006516888970509171, |
| "std": 0.02578314207494259, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.weight": { |
| "min": -0.2915492653846741, |
| "max": 0.2830723226070404, |
| "mean": -7.510973955504596e-05, |
| "std": 0.03093201108276844, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_k.bias": { |
| "min": -5.933852195739746, |
| "max": 5.848132610321045, |
| "mean": -0.009441309608519077, |
| "std": 1.2997525930404663, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.weight": { |
| "min": -0.4259975850582123, |
| "max": 0.34512922167778015, |
| "mean": 9.808027243707329e-05, |
| "std": 0.029951922595500946, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_v.bias": { |
| "min": -0.028870832175016403, |
| "max": 0.027608035132288933, |
| "mean": -0.0003159761254210025, |
| "std": 0.012566526420414448, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.weight": { |
| "min": -0.4554309844970703, |
| "max": 0.44925424456596375, |
| "mean": 2.2834456103737466e-05, |
| "std": 0.023853331804275513, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.2.to_out.0.bias": { |
| "min": -0.08927308022975922, |
| "max": 0.09165928512811661, |
| "mean": 0.002274596830829978, |
| "std": 0.019546369090676308, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.3.g": { |
| "min": 0.26676347851753235, |
| "max": 1.06475031375885, |
| "mean": 0.5317091345787048, |
| "std": 0.1056147962808609, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.weight": { |
| "min": -0.5758013129234314, |
| "max": 0.60973060131073, |
| "mean": -0.00043392262887209654, |
| "std": 0.03859521821141243, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.0.4.ff.0.0.bias": { |
| "min": -0.18311595916748047, |
| "max": 0.045692577958106995, |
| "mean": -0.02953081764280796, |
| "std": 0.04277201369404793, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.weight": { |
| "min": -1.169153094291687, |
| "max": 1.6363517045974731, |
| "mean": 0.00031960621709004045, |
| "std": 0.027692886069417, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.0.4.ff.2.bias": { |
| "min": -0.16331635415554047, |
| "max": 0.20692557096481323, |
| "mean": -0.02113202027976513, |
| "std": 0.0279996357858181, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.1.g": { |
| "min": 0.22424264252185822, |
| "max": 0.8506074547767639, |
| "mean": 0.487909197807312, |
| "std": 0.0759621262550354, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.weight": { |
| "min": -0.25719332695007324, |
| "max": 0.3069766163825989, |
| "mean": -8.219409210141748e-06, |
| "std": 0.033469025045633316, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_q.bias": { |
| "min": -0.0958663746714592, |
| "max": 0.1111140251159668, |
| "mean": 6.868487980682403e-05, |
| "std": 0.02699616365134716, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.weight": { |
| "min": -0.2987782061100006, |
| "max": 0.2982846796512604, |
| "mean": 5.100301495986059e-05, |
| "std": 0.03253886476159096, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_k.bias": { |
| "min": -5.194380760192871, |
| "max": 5.11414098739624, |
| "mean": -0.01477175671607256, |
| "std": 1.1622190475463867, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.weight": { |
| "min": -0.3454170525074005, |
| "max": 0.3440503478050232, |
| "mean": 7.885548257036135e-05, |
| "std": 0.03005816601216793, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_v.bias": { |
| "min": -0.036366600543260574, |
| "max": 0.033365145325660706, |
| "mean": -0.00014353547885548323, |
| "std": 0.013023492880165577, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.weight": { |
| "min": -0.3166007697582245, |
| "max": 0.37669771909713745, |
| "mean": -2.1011579519836232e-05, |
| "std": 0.024054987356066704, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.2.to_out.0.bias": { |
| "min": -0.10603390634059906, |
| "max": 0.12274863570928574, |
| "mean": -0.0019654321949929, |
| "std": 0.028894905000925064, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.3.g": { |
| "min": 0.311918169260025, |
| "max": 1.1306103467941284, |
| "mean": 0.666860818862915, |
| "std": 0.0989983081817627, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.weight": { |
| "min": -0.8729648590087891, |
| "max": 0.6280122995376587, |
| "mean": 0.0016747020417824388, |
| "std": 0.047436561435461044, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.1.4.ff.0.0.bias": { |
| "min": -0.27260690927505493, |
| "max": 0.03427213430404663, |
| "mean": -0.04665624350309372, |
| "std": 0.04072800651192665, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.weight": { |
| "min": -0.9236066937446594, |
| "max": 0.9658545255661011, |
| "mean": 0.0010218569077551365, |
| "std": 0.04070160537958145, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.1.4.ff.2.bias": { |
| "min": -0.14540822803974152, |
| "max": 0.07539817690849304, |
| "mean": -0.009104669094085693, |
| "std": 0.025749636813998222, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.1.g": { |
| "min": 0.23975443840026855, |
| "max": 0.7185607552528381, |
| "mean": 0.44753360748291016, |
| "std": 0.06007208302617073, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.weight": { |
| "min": -0.2746535837650299, |
| "max": 0.2996414601802826, |
| "mean": 8.662165782880038e-06, |
| "std": 0.03547052666544914, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_q.bias": { |
| "min": -0.11975187063217163, |
| "max": 0.11919566243886948, |
| "mean": 0.0007501145591959357, |
| "std": 0.02767573855817318, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.weight": { |
| "min": -0.2831306457519531, |
| "max": 0.2817768156528473, |
| "mean": -7.67814417486079e-05, |
| "std": 0.035099856555461884, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_k.bias": { |
| "min": -2.5266785621643066, |
| "max": 2.5387556552886963, |
| "mean": 0.026949256658554077, |
| "std": 0.5885584354400635, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.weight": { |
| "min": -0.22260574996471405, |
| "max": 0.2732996642589569, |
| "mean": 2.9508364605135284e-06, |
| "std": 0.030731212347745895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_v.bias": { |
| "min": -0.0335291288793087, |
| "max": 0.031390510499477386, |
| "mean": 0.00011758864275179803, |
| "std": 0.012400473468005657, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.weight": { |
| "min": -0.23621369898319244, |
| "max": 0.23289528489112854, |
| "mean": 5.6726221373537555e-05, |
| "std": 0.025696825236082077, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.2.to_out.0.bias": { |
| "min": -0.13667543232440948, |
| "max": 0.12879958748817444, |
| "mean": -0.005504202097654343, |
| "std": 0.040019236505031586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.3.g": { |
| "min": 0.35455986857414246, |
| "max": 1.1826062202453613, |
| "mean": 0.7107979655265808, |
| "std": 0.10437346249818802, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.weight": { |
| "min": -0.6191003918647766, |
| "max": 0.5564218759536743, |
| "mean": 0.0011606740299612284, |
| "std": 0.04611353576183319, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.2.4.ff.0.0.bias": { |
| "min": -0.19018200039863586, |
| "max": 0.02485579438507557, |
| "mean": -0.03489173576235771, |
| "std": 0.028727849945425987, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.weight": { |
| "min": -1.1358468532562256, |
| "max": 0.9746898412704468, |
| "mean": 0.00035939598456025124, |
| "std": 0.04234171286225319, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.2.4.ff.2.bias": { |
| "min": -0.6019405722618103, |
| "max": 0.06334464251995087, |
| "mean": -0.00488577876240015, |
| "std": 0.028712771832942963, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.1.g": { |
| "min": 0.3755652904510498, |
| "max": 0.9507709741592407, |
| "mean": 0.5931843519210815, |
| "std": 0.0686625987291336, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.weight": { |
| "min": -0.3929532766342163, |
| "max": 0.37091946601867676, |
| "mean": 7.025484228506684e-05, |
| "std": 0.03718522936105728, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_q.bias": { |
| "min": -0.11978376656770706, |
| "max": 0.13744011521339417, |
| "mean": 0.0009335688664577901, |
| "std": 0.029282478615641594, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.weight": { |
| "min": -0.6229383945465088, |
| "max": 0.5121926069259644, |
| "mean": 1.5349294699262828e-05, |
| "std": 0.03643808513879776, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_k.bias": { |
| "min": -8.242501258850098, |
| "max": 8.848700523376465, |
| "mean": -0.10966195166110992, |
| "std": 1.7074756622314453, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.weight": { |
| "min": -0.2780378460884094, |
| "max": 0.24072492122650146, |
| "mean": 5.223074913374148e-05, |
| "std": 0.03261224925518036, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_v.bias": { |
| "min": -0.05211928114295006, |
| "max": 0.03976155444979668, |
| "mean": 9.01424209587276e-05, |
| "std": 0.012970111332833767, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.weight": { |
| "min": -0.23169712722301483, |
| "max": 0.23602090775966644, |
| "mean": -2.2195828933035955e-05, |
| "std": 0.029388954862952232, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.2.to_out.0.bias": { |
| "min": -0.20550638437271118, |
| "max": 0.10590175539255142, |
| "mean": -0.004026752896606922, |
| "std": 0.03266817331314087, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.3.g": { |
| "min": 0.3396901488304138, |
| "max": 1.022835612297058, |
| "mean": 0.7008680701255798, |
| "std": 0.09710492938756943, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.weight": { |
| "min": -0.5683938264846802, |
| "max": 0.8381193280220032, |
| "mean": 0.00041519341175444424, |
| "std": 0.04229409247636795, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.3.4.ff.0.0.bias": { |
| "min": -0.21325451135635376, |
| "max": 0.03037591464817524, |
| "mean": -0.03223013877868652, |
| "std": 0.026610074564814568, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.weight": { |
| "min": -0.7600710391998291, |
| "max": 0.7236490845680237, |
| "mean": -1.6499760022270493e-05, |
| "std": 0.03683502599596977, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.3.4.ff.2.bias": { |
| "min": -0.26496192812919617, |
| "max": 0.10684733092784882, |
| "mean": -0.0030161943286657333, |
| "std": 0.028908496722579002, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.1.g": { |
| "min": 0.28418251872062683, |
| "max": 0.7011516094207764, |
| "mean": 0.499736487865448, |
| "std": 0.047200758010149, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.weight": { |
| "min": -0.28040796518325806, |
| "max": 0.23536527156829834, |
| "mean": -0.00011076986265834421, |
| "std": 0.03875643387436867, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_q.bias": { |
| "min": -0.15493866801261902, |
| "max": 0.12730616331100464, |
| "mean": -0.002237653825432062, |
| "std": 0.03343982622027397, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.weight": { |
| "min": -0.4170800745487213, |
| "max": 0.6621686220169067, |
| "mean": -1.8650103811523877e-05, |
| "std": 0.039095137268304825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_k.bias": { |
| "min": -4.2626214027404785, |
| "max": 4.750005722045898, |
| "mean": -0.020378686487674713, |
| "std": 1.0105632543563843, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.weight": { |
| "min": -0.24659502506256104, |
| "max": 0.2085939198732376, |
| "mean": 4.402307604323141e-05, |
| "std": 0.033962100744247437, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_v.bias": { |
| "min": -0.03477818891406059, |
| "max": 0.045115940272808075, |
| "mean": -1.805905776564032e-05, |
| "std": 0.012638943269848824, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.weight": { |
| "min": -0.20247574150562286, |
| "max": 0.20785965025424957, |
| "mean": -2.8977701731491834e-05, |
| "std": 0.031019993126392365, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.2.to_out.0.bias": { |
| "min": -0.2010650485754013, |
| "max": 0.11400442570447922, |
| "mean": -0.002901929896324873, |
| "std": 0.03455876186490059, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.3.g": { |
| "min": 0.3669453561306, |
| "max": 1.068376898765564, |
| "mean": 0.6706770658493042, |
| "std": 0.06678663939237595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.weight": { |
| "min": -0.4009625017642975, |
| "max": 0.5047707557678223, |
| "mean": -3.825509702437557e-05, |
| "std": 0.04113015532493591, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.4.4.ff.0.0.bias": { |
| "min": -0.12967447936534882, |
| "max": 0.026864072307944298, |
| "mean": -0.03057170659303665, |
| "std": 0.021967768669128418, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.weight": { |
| "min": -0.4517863094806671, |
| "max": 0.4363614320755005, |
| "mean": 7.544152322225273e-05, |
| "std": 0.03489035367965698, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.4.4.ff.2.bias": { |
| "min": -0.2692056894302368, |
| "max": 0.07339853048324585, |
| "mean": -0.0010960557265207171, |
| "std": 0.023164359852671623, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.1.g": { |
| "min": 0.2873815894126892, |
| "max": 0.6924071311950684, |
| "mean": 0.5248355865478516, |
| "std": 0.048200905323028564, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.weight": { |
| "min": -0.22408804297447205, |
| "max": 0.22555872797966003, |
| "mean": 1.55975158122601e-05, |
| "std": 0.038948412984609604, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_q.bias": { |
| "min": -0.13717913627624512, |
| "max": 0.10996447503566742, |
| "mean": 0.00024089610087685287, |
| "std": 0.02930767834186554, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.weight": { |
| "min": -0.37717288732528687, |
| "max": 0.43975257873535156, |
| "mean": -9.77939271251671e-06, |
| "std": 0.03928566351532936, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_k.bias": { |
| "min": -3.868288516998291, |
| "max": 5.028470516204834, |
| "mean": 0.009761041030287743, |
| "std": 0.8478302955627441, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.weight": { |
| "min": -0.22423577308654785, |
| "max": 0.221679225564003, |
| "mean": -3.3901324059115723e-07, |
| "std": 0.034409064799547195, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_v.bias": { |
| "min": -0.0438535250723362, |
| "max": 0.03604500740766525, |
| "mean": -0.00025803165044635534, |
| "std": 0.0120812077075243, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.weight": { |
| "min": -0.2146783322095871, |
| "max": 0.1904102861881256, |
| "mean": -1.7072843547794037e-05, |
| "std": 0.03153547644615173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.2.to_out.0.bias": { |
| "min": -0.18190543353557587, |
| "max": 0.12149464339017868, |
| "mean": -0.0023945681750774384, |
| "std": 0.04129800572991371, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.3.g": { |
| "min": 0.4226498305797577, |
| "max": 0.9518083333969116, |
| "mean": 0.6629198789596558, |
| "std": 0.057358019053936005, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.weight": { |
| "min": -0.372251033782959, |
| "max": 0.47781607508659363, |
| "mean": -8.197914576157928e-05, |
| "std": 0.040889132767915726, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.5.4.ff.0.0.bias": { |
| "min": -0.20997951924800873, |
| "max": 0.027235740795731544, |
| "mean": -0.030272582545876503, |
| "std": 0.021444976329803467, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.weight": { |
| "min": -0.34334975481033325, |
| "max": 0.7389779686927795, |
| "mean": 8.186099876184016e-05, |
| "std": 0.034765809774398804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.5.4.ff.2.bias": { |
| "min": -0.2415534406900406, |
| "max": 0.050704218447208405, |
| "mean": -0.001192720839753747, |
| "std": 0.02049700915813446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.1.g": { |
| "min": 0.3061361312866211, |
| "max": 0.6592679023742676, |
| "mean": 0.5253557562828064, |
| "std": 0.04659049212932587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.weight": { |
| "min": -0.3061867356300354, |
| "max": 0.2188880741596222, |
| "mean": 7.013476715655997e-05, |
| "std": 0.03949468210339546, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_q.bias": { |
| "min": -0.15020529925823212, |
| "max": 0.13198836147785187, |
| "mean": 0.00033842536504380405, |
| "std": 0.030562784522771835, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.weight": { |
| "min": -0.25926315784454346, |
| "max": 0.20377042889595032, |
| "mean": 3.10853029077407e-05, |
| "std": 0.039484549313783646, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_k.bias": { |
| "min": -2.3498988151550293, |
| "max": 2.389754056930542, |
| "mean": -0.02631671540439129, |
| "std": 0.4510843753814697, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.weight": { |
| "min": -0.19007518887519836, |
| "max": 0.2122075855731964, |
| "mean": 3.708741132868454e-05, |
| "std": 0.03479320555925369, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_v.bias": { |
| "min": -0.03199063614010811, |
| "max": 0.03580143302679062, |
| "mean": -0.00019849740783683956, |
| "std": 0.012292149476706982, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.weight": { |
| "min": -0.19011414051055908, |
| "max": 0.17155633866786957, |
| "mean": -6.832154031144455e-05, |
| "std": 0.0321698896586895, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.2.to_out.0.bias": { |
| "min": -0.14033056795597076, |
| "max": 0.13829410076141357, |
| "mean": -0.0025126286782324314, |
| "std": 0.05131656676530838, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.3.g": { |
| "min": 0.4672001600265503, |
| "max": 0.9642724394798279, |
| "mean": 0.6692001819610596, |
| "std": 0.05353807285428047, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.weight": { |
| "min": -0.32512417435646057, |
| "max": 0.3099176585674286, |
| "mean": -8.536699169781059e-07, |
| "std": 0.04094506427645683, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.6.4.ff.0.0.bias": { |
| "min": -0.12580342590808868, |
| "max": 0.025558948516845703, |
| "mean": -0.030726371333003044, |
| "std": 0.019892578944563866, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.weight": { |
| "min": -0.44301649928092957, |
| "max": 0.448657363653183, |
| "mean": 9.49525274336338e-05, |
| "std": 0.03511860594153404, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.6.4.ff.2.bias": { |
| "min": -0.22610187530517578, |
| "max": 0.0521467961370945, |
| "mean": -0.0011865891283378005, |
| "std": 0.018514476716518402, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.1.g": { |
| "min": 0.3391834497451782, |
| "max": 0.7460214495658875, |
| "mean": 0.5588462352752686, |
| "std": 0.04179359972476959, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.weight": { |
| "min": -0.2743752598762512, |
| "max": 0.27987486124038696, |
| "mean": 2.0352064893813804e-05, |
| "std": 0.04105662927031517, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_q.bias": { |
| "min": -0.13770411908626556, |
| "max": 0.14076648652553558, |
| "mean": 0.0004916964680887759, |
| "std": 0.026698192581534386, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.weight": { |
| "min": -0.4935597777366638, |
| "max": 0.3583414554595947, |
| "mean": 8.887881995178759e-05, |
| "std": 0.04069438576698303, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_k.bias": { |
| "min": -2.311286687850952, |
| "max": 1.7559641599655151, |
| "mean": -0.02118358016014099, |
| "std": 0.5012499094009399, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.weight": { |
| "min": -0.2191997468471527, |
| "max": 0.19883301854133606, |
| "mean": -4.048732444061898e-05, |
| "std": 0.03423238918185234, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_v.bias": { |
| "min": -0.041594695299863815, |
| "max": 0.039164409041404724, |
| "mean": -0.00013954236055724323, |
| "std": 0.012892705388367176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.weight": { |
| "min": -0.17905071377754211, |
| "max": 0.18448761105537415, |
| "mean": 4.79043010273017e-05, |
| "std": 0.03155573084950447, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.2.to_out.0.bias": { |
| "min": -0.1810525357723236, |
| "max": 0.18478283286094666, |
| "mean": -0.0022157104685902596, |
| "std": 0.054884668439626694, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.3.g": { |
| "min": 0.47422513365745544, |
| "max": 1.034525752067566, |
| "mean": 0.6455625891685486, |
| "std": 0.05127067118883133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.weight": { |
| "min": -0.2727859616279602, |
| "max": 0.31039154529571533, |
| "mean": 0.00011223299225093797, |
| "std": 0.04068140313029289, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.7.4.ff.0.0.bias": { |
| "min": -0.10606198012828827, |
| "max": 0.026645641773939133, |
| "mean": -0.02954702451825142, |
| "std": 0.01799139380455017, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.weight": { |
| "min": -0.34065425395965576, |
| "max": 0.33199548721313477, |
| "mean": 5.238396261120215e-05, |
| "std": 0.034412581473588943, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.7.4.ff.2.bias": { |
| "min": -0.18290212750434875, |
| "max": 0.042540330439805984, |
| "mean": -0.001063595642335713, |
| "std": 0.017244886606931686, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.1.g": { |
| "min": 0.32540637254714966, |
| "max": 0.6927012801170349, |
| "mean": 0.511530876159668, |
| "std": 0.037588104605674744, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.weight": { |
| "min": -0.23500792682170868, |
| "max": 0.22661413252353668, |
| "mean": -3.6375215131556615e-05, |
| "std": 0.039175912737846375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_q.bias": { |
| "min": -0.11630432307720184, |
| "max": 0.1327952891588211, |
| "mean": 0.00015614689618814737, |
| "std": 0.02927626110613346, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.weight": { |
| "min": -0.35499081015586853, |
| "max": 0.28717586398124695, |
| "mean": 7.152914804464672e-06, |
| "std": 0.03924452140927315, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_k.bias": { |
| "min": -4.1564154624938965, |
| "max": 3.564419746398926, |
| "mean": -0.011666063219308853, |
| "std": 0.6851950883865356, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.weight": { |
| "min": -0.21194273233413696, |
| "max": 0.21046526730060577, |
| "mean": 3.472749813226983e-05, |
| "std": 0.0344846174120903, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_v.bias": { |
| "min": -0.03606359288096428, |
| "max": 0.0485043041408062, |
| "mean": 0.0007934037130326033, |
| "std": 0.01287116389721632, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.weight": { |
| "min": -0.21187099814414978, |
| "max": 0.19423909485340118, |
| "mean": -1.3818132629239699e-06, |
| "std": 0.03169572353363037, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.2.to_out.0.bias": { |
| "min": -0.1876450628042221, |
| "max": 0.1781487911939621, |
| "mean": -0.0028378514107316732, |
| "std": 0.05868522822856903, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.3.g": { |
| "min": 0.4746300280094147, |
| "max": 1.0532299280166626, |
| "mean": 0.6519026756286621, |
| "std": 0.0511440671980381, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.weight": { |
| "min": -0.24888233840465546, |
| "max": 0.329919695854187, |
| "mean": 0.00018074009858537465, |
| "std": 0.04056980833411217, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.8.4.ff.0.0.bias": { |
| "min": -0.1257043331861496, |
| "max": 0.024808209389448166, |
| "mean": -0.03052573651075363, |
| "std": 0.01766115613281727, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.weight": { |
| "min": -0.4241631031036377, |
| "max": 0.48552921414375305, |
| "mean": -1.5207942851702683e-06, |
| "std": 0.03539673238992691, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.8.4.ff.2.bias": { |
| "min": -0.15242178738117218, |
| "max": 0.0436730720102787, |
| "mean": 4.8590598453301936e-05, |
| "std": 0.01490879151970148, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.1.g": { |
| "min": 0.3154313564300537, |
| "max": 0.68807452917099, |
| "mean": 0.5530612468719482, |
| "std": 0.041024595499038696, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.weight": { |
| "min": -0.20784315466880798, |
| "max": 0.22137802839279175, |
| "mean": 3.199603088432923e-05, |
| "std": 0.038299061357975006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_q.bias": { |
| "min": -0.13870200514793396, |
| "max": 0.11339821666479111, |
| "mean": 2.9128044843673706e-05, |
| "std": 0.025894545018672943, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.weight": { |
| "min": -0.4055723249912262, |
| "max": 0.37375950813293457, |
| "mean": 2.5988052584580146e-05, |
| "std": 0.038179732859134674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_k.bias": { |
| "min": -3.7928740978240967, |
| "max": 2.885420560836792, |
| "mean": 0.0012225983664393425, |
| "std": 0.5186418294906616, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.weight": { |
| "min": -0.20435325801372528, |
| "max": 0.1985306441783905, |
| "mean": 2.9608720069518313e-05, |
| "std": 0.03429684415459633, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_v.bias": { |
| "min": -0.051018889993429184, |
| "max": 0.040129613131284714, |
| "mean": -0.00042048803879879415, |
| "std": 0.013424505479633808, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.weight": { |
| "min": -0.19798687100410461, |
| "max": 0.20356523990631104, |
| "mean": -1.2490939298004378e-05, |
| "std": 0.03180477395653725, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.2.to_out.0.bias": { |
| "min": -0.1941322237253189, |
| "max": 0.19617649912834167, |
| "mean": -0.002969961380586028, |
| "std": 0.06259642541408539, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.3.g": { |
| "min": 0.3487941026687622, |
| "max": 1.0952281951904297, |
| "mean": 0.6676215529441833, |
| "std": 0.05664284899830818, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.weight": { |
| "min": -0.22712087631225586, |
| "max": 0.25315943360328674, |
| "mean": 0.00035851544816978276, |
| "std": 0.04075949266552925, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.9.4.ff.0.0.bias": { |
| "min": -0.09184330701828003, |
| "max": 0.04372864216566086, |
| "mean": -0.030109990388154984, |
| "std": 0.017667723819613457, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.weight": { |
| "min": -0.35518717765808105, |
| "max": 0.30635109543800354, |
| "mean": -4.3967633246211335e-05, |
| "std": 0.037122078239917755, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.9.4.ff.2.bias": { |
| "min": -0.16265232861042023, |
| "max": 0.06366349011659622, |
| "mean": -8.268894453067333e-05, |
| "std": 0.019441038370132446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.1.g": { |
| "min": 0.3488224744796753, |
| "max": 0.7298842668533325, |
| "mean": 0.5426357388496399, |
| "std": 0.039679452776908875, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.weight": { |
| "min": -0.22033143043518066, |
| "max": 0.22433431446552277, |
| "mean": -1.1077730960096233e-05, |
| "std": 0.03923030197620392, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_q.bias": { |
| "min": -0.11923559010028839, |
| "max": 0.1716114580631256, |
| "mean": 0.00028718815883621573, |
| "std": 0.025185901671648026, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.weight": { |
| "min": -0.2481980323791504, |
| "max": 0.3025566339492798, |
| "mean": -3.676430060295388e-05, |
| "std": 0.0389297790825367, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_k.bias": { |
| "min": -3.5254225730895996, |
| "max": 3.736085891723633, |
| "mean": 0.01585158333182335, |
| "std": 0.7859480977058411, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.weight": { |
| "min": -0.21972878277301788, |
| "max": 0.23833929002285004, |
| "mean": -1.325977427768521e-05, |
| "std": 0.03630264848470688, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_v.bias": { |
| "min": -0.04748326912522316, |
| "max": 0.051650550216436386, |
| "mean": 0.0004778398433700204, |
| "std": 0.01352317538112402, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.weight": { |
| "min": -0.21533912420272827, |
| "max": 0.21868844330310822, |
| "mean": 5.647652506013401e-05, |
| "std": 0.03361491113901138, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.2.to_out.0.bias": { |
| "min": -0.21255744993686676, |
| "max": 0.23268213868141174, |
| "mean": -0.005099742207676172, |
| "std": 0.06193498894572258, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.3.g": { |
| "min": 0.36217188835144043, |
| "max": 1.112847089767456, |
| "mean": 0.69975745677948, |
| "std": 0.05501763895153999, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.weight": { |
| "min": -0.23635224997997284, |
| "max": 0.24658624827861786, |
| "mean": 0.00046343228314071894, |
| "std": 0.041268426924943924, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.10.4.ff.0.0.bias": { |
| "min": -0.09862525016069412, |
| "max": 0.06863635033369064, |
| "mean": -0.03145936504006386, |
| "std": 0.018182674422860146, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.weight": { |
| "min": -0.30422019958496094, |
| "max": 0.3540525734424591, |
| "mean": -8.221832831623033e-05, |
| "std": 0.04027421772480011, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.10.4.ff.2.bias": { |
| "min": -0.1533002257347107, |
| "max": 0.150687575340271, |
| "mean": 0.00025470374384894967, |
| "std": 0.023078717291355133, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.1.g": { |
| "min": 0.9982896447181702, |
| "max": 1.017301082611084, |
| "mean": 1.0001298189163208, |
| "std": 0.0026745295617729425, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.weight": { |
| "min": -0.031271953135728836, |
| "max": 0.03127208724617958, |
| "mean": -1.929010068124626e-05, |
| "std": 0.01804104819893837, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_q.bias": { |
| "min": -0.03122810088098049, |
| "max": 0.030984606593847275, |
| "mean": -0.0010841733310371637, |
| "std": 0.0179507527500391, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.weight": { |
| "min": -0.03126660734415054, |
| "max": 0.03127255663275719, |
| "mean": 3.5378593565837946e-06, |
| "std": 0.018041487783193588, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_k.bias": { |
| "min": -0.031172683462500572, |
| "max": 0.031167395412921906, |
| "mean": 0.0003339074901305139, |
| "std": 0.01806284487247467, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.2.to_out.0.bias": { |
| "min": -0.0006182725192047656, |
| "max": 0.0004164598067291081, |
| "mean": 1.3710750863538124e-06, |
| "std": 0.0001378587185172364, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.3.g": { |
| "min": 0.9979904890060425, |
| "max": 1.0161197185516357, |
| "mean": 1.0013301372528076, |
| "std": 0.004817315377295017, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.weight": { |
| "min": -0.032745394855737686, |
| "max": 0.03283839672803879, |
| "mean": -6.682760158582823e-06, |
| "std": 0.018042659386992455, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.11.4.ff.0.0.bias": { |
| "min": -0.03276297450065613, |
| "max": 0.0325884111225605, |
| "mean": -0.00013115988986101002, |
| "std": 0.017956366762518883, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.weight": { |
| "min": -0.0011839725775644183, |
| "max": 0.0011610303772613406, |
| "mean": 3.635812220181833e-07, |
| "std": 0.00021423342695925385, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.11.4.ff.2.bias": { |
| "min": -0.0005281989579088986, |
| "max": 0.0004011568380519748, |
| "mean": 2.2640601855528075e-06, |
| "std": 0.00012689748837146908, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.1.g": { |
| "min": 0.383169025182724, |
| "max": 0.725769579410553, |
| "mean": 0.5810222625732422, |
| "std": 0.039563409984111786, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.weight": { |
| "min": -0.23967966437339783, |
| "max": 0.19745716452598572, |
| "mean": 2.6129977413802408e-05, |
| "std": 0.0374654158949852, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_q.bias": { |
| "min": -0.1195952445268631, |
| "max": 0.16743028163909912, |
| "mean": 0.0009849121561273932, |
| "std": 0.02763625606894493, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.weight": { |
| "min": -0.24753768742084503, |
| "max": 0.502853274345398, |
| "mean": -4.9970258260145783e-05, |
| "std": 0.0376228392124176, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_k.bias": { |
| "min": -3.9648470878601074, |
| "max": 3.7909820079803467, |
| "mean": -0.0036168191581964493, |
| "std": 0.6834573745727539, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.weight": { |
| "min": -0.22818903625011444, |
| "max": 0.25305306911468506, |
| "mean": -1.1425543561927043e-05, |
| "std": 0.037434399127960205, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_v.bias": { |
| "min": -0.07215739786624908, |
| "max": 0.08118511736392975, |
| "mean": -0.0005145666655153036, |
| "std": 0.015683691948652267, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.weight": { |
| "min": -0.2285011112689972, |
| "max": 0.25927454233169556, |
| "mean": -2.8810776711907238e-05, |
| "std": 0.03542128577828407, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.2.to_out.0.bias": { |
| "min": -0.20174317061901093, |
| "max": 0.21631476283073425, |
| "mean": -0.005539278965443373, |
| "std": 0.06842140108346939, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.3.g": { |
| "min": 0.4053976237773895, |
| "max": 1.1997506618499756, |
| "mean": 0.7383711338043213, |
| "std": 0.05650194734334946, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.weight": { |
| "min": -0.2226068526506424, |
| "max": 0.24658025801181793, |
| "mean": 0.0005210487288422883, |
| "std": 0.04133579134941101, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.12.4.ff.0.0.bias": { |
| "min": -0.10394058376550674, |
| "max": 0.02423257753252983, |
| "mean": -0.032700441777706146, |
| "std": 0.018963389098644257, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.weight": { |
| "min": -0.452515184879303, |
| "max": 0.4254130423069, |
| "mean": -0.0004341741732787341, |
| "std": 0.04689616709947586, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.12.4.ff.2.bias": { |
| "min": -0.25287455320358276, |
| "max": 0.4728158116340637, |
| "mean": 0.003204880515113473, |
| "std": 0.04463134706020355, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.0.weight": { |
| "min": -0.31750747561454773, |
| "max": 0.333750456571579, |
| "mean": -2.5235824068658985e-05, |
| "std": 0.021287381649017334, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.13.1.g": { |
| "min": 0.3244800865650177, |
| "max": 0.6913307905197144, |
| "mean": 0.5712176561355591, |
| "std": 0.045165594667196274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.weight": { |
| "min": -0.16547071933746338, |
| "max": 0.1755398064851761, |
| "mean": -4.8899608373176306e-05, |
| "std": 0.033180754631757736, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_q.bias": { |
| "min": -0.18801826238632202, |
| "max": 0.1438588947057724, |
| "mean": 4.4942658860236406e-05, |
| "std": 0.029767248779535294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.weight": { |
| "min": -0.38313359022140503, |
| "max": 0.24818716943264008, |
| "mean": -9.953633707482368e-06, |
| "std": 0.03276177868247032, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_k.bias": { |
| "min": -3.6768205165863037, |
| "max": 3.3089771270751953, |
| "mean": -0.014381470158696175, |
| "std": 0.9868160486221313, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.weight": { |
| "min": -0.23584222793579102, |
| "max": 0.24873286485671997, |
| "mean": -1.8046124750981107e-05, |
| "std": 0.0416971780359745, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_v.bias": { |
| "min": -0.07315867394208908, |
| "max": 0.15554027259349823, |
| "mean": 0.0006676731863990426, |
| "std": 0.02520027756690979, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.weight": { |
| "min": -0.2670559585094452, |
| "max": 0.24887487292289734, |
| "mean": -1.537521166028455e-05, |
| "std": 0.04013797268271446, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.2.to_out.0.bias": { |
| "min": -0.1908693015575409, |
| "max": 0.1960526406764984, |
| "mean": -0.001238689525052905, |
| "std": 0.06672189384698868, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.3.g": { |
| "min": 0.3290148973464966, |
| "max": 1.0089884996414185, |
| "mean": 0.719682514667511, |
| "std": 0.053548477590084076, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.weight": { |
| "min": -0.23323918879032135, |
| "max": 0.2469726949930191, |
| "mean": 0.00018311971507500857, |
| "std": 0.04089980572462082, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.13.4.ff.0.0.bias": { |
| "min": -0.11503507941961288, |
| "max": 0.019024236127734184, |
| "mean": -0.04251422733068466, |
| "std": 0.018931886181235313, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.weight": { |
| "min": -0.3927544355392456, |
| "max": 0.4104294776916504, |
| "mean": -2.164382931368891e-05, |
| "std": 0.04853343218564987, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.13.4.ff.2.bias": { |
| "min": -0.6971645355224609, |
| "max": 0.414955198764801, |
| "mean": 0.0008486253209412098, |
| "std": 0.060451194643974304, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.0.weight": { |
| "min": -0.001029345323331654, |
| "max": 1.0005033016204834, |
| "mean": 0.00048820505617186427, |
| "std": 0.022088995203375816, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.14.1.g": { |
| "min": 0.99776691198349, |
| "max": 1.0153907537460327, |
| "mean": 0.9997058510780334, |
| "std": 0.0012300637317821383, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.weight": { |
| "min": -0.031274545937776566, |
| "max": 0.03127707168459892, |
| "mean": -2.1027797629358247e-05, |
| "std": 0.018032420426607132, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_q.bias": { |
| "min": -0.031217729672789574, |
| "max": 0.031233638525009155, |
| "mean": -0.0006770637119188905, |
| "std": 0.017827108502388, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.weight": { |
| "min": -0.03128187730908394, |
| "max": 0.031268589198589325, |
| "mean": -8.834878826746717e-06, |
| "std": 0.018031446263194084, |
| "sparsity": 9.5367431640625e-07, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_k.bias": { |
| "min": -0.031228115782141685, |
| "max": 0.03124588541686535, |
| "mean": -0.0007299837889149785, |
| "std": 0.017942119389772415, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_v.bias": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.weight": { |
| "min": 0.0, |
| "max": 0.0, |
| "mean": 0.0, |
| "std": 0.0, |
| "sparsity": 1.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.2.to_out.0.bias": { |
| "min": -0.0004204909782856703, |
| "max": 0.00033413738128729165, |
| "mean": -3.152099679937237e-06, |
| "std": 0.0001164414279628545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.3.g": { |
| "min": 0.997612476348877, |
| "max": 1.018494963645935, |
| "mean": 1.0012025833129883, |
| "std": 0.0055990261025726795, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.weight": { |
| "min": -0.032435424625873566, |
| "max": 0.032380323857069016, |
| "mean": -1.7302188553003361e-06, |
| "std": 0.018027864396572113, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.14.4.ff.0.0.bias": { |
| "min": -0.032131362706422806, |
| "max": 0.031162748113274574, |
| "mean": -0.00037396998959593475, |
| "std": 0.01804373785853386, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.weight": { |
| "min": -0.0012890547513961792, |
| "max": 0.001122222631238401, |
| "mean": -8.950937626650557e-07, |
| "std": 0.00020965519070159644, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.14.4.ff.2.bias": { |
| "min": -0.00034396781120449305, |
| "max": 0.00029873003950342536, |
| "mean": -3.7820796023879666e-06, |
| "std": 0.000104848513728939, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.0.weight": { |
| "min": -0.2348298579454422, |
| "max": 0.27300530672073364, |
| "mean": 6.816113909735577e-06, |
| "std": 0.018809327855706215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.15.1.g": { |
| "min": 0.3214486837387085, |
| "max": 0.7001691460609436, |
| "mean": 0.5819005370140076, |
| "std": 0.04646027460694313, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.weight": { |
| "min": -0.18254612386226654, |
| "max": 0.19860517978668213, |
| "mean": -1.1607673513935879e-05, |
| "std": 0.03318353369832039, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_q.bias": { |
| "min": -0.1615392416715622, |
| "max": 0.13018541038036346, |
| "mean": -0.001078265719115734, |
| "std": 0.03421453759074211, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.weight": { |
| "min": -0.33349576592445374, |
| "max": 0.31233182549476624, |
| "mean": -1.0118232239619829e-05, |
| "std": 0.032234255224466324, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_k.bias": { |
| "min": -7.8480448722839355, |
| "max": 8.8128080368042, |
| "mean": 0.09380069375038147, |
| "std": 1.6259617805480957, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.weight": { |
| "min": -0.23474065959453583, |
| "max": 0.24273009598255157, |
| "mean": 4.155310307396576e-05, |
| "std": 0.04085606709122658, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_v.bias": { |
| "min": -0.07642843574285507, |
| "max": 0.06617211550474167, |
| "mean": 0.0004827451193705201, |
| "std": 0.01944047026336193, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.weight": { |
| "min": -0.24762944877147675, |
| "max": 0.2358739972114563, |
| "mean": -3.232937160646543e-06, |
| "std": 0.03943068906664848, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.2.to_out.0.bias": { |
| "min": -0.16411840915679932, |
| "max": 0.1619885265827179, |
| "mean": 0.001625007251277566, |
| "std": 0.06529368460178375, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.3.g": { |
| "min": 0.5569814443588257, |
| "max": 0.9541290402412415, |
| "mean": 0.7133999466896057, |
| "std": 0.04144103080034256, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.weight": { |
| "min": -0.22980599105358124, |
| "max": 0.2567155957221985, |
| "mean": -4.5827197027392685e-05, |
| "std": 0.04057452455163002, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.15.4.ff.0.0.bias": { |
| "min": -0.13575804233551025, |
| "max": 0.02213761769235134, |
| "mean": -0.04138356074690819, |
| "std": 0.01845938339829445, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.weight": { |
| "min": -0.4245927333831787, |
| "max": 0.39355969429016113, |
| "mean": -4.580877430271357e-06, |
| "std": 0.04778376594185829, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.15.4.ff.2.bias": { |
| "min": -0.6110193133354187, |
| "max": 0.6553415656089783, |
| "mean": 0.001590792671777308, |
| "std": 0.056976497173309326, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.0.weight": { |
| "min": -0.251875638961792, |
| "max": 0.3209821879863739, |
| "mean": -6.120833859313279e-06, |
| "std": 0.019612718373537064, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.16.1.g": { |
| "min": 0.35964423418045044, |
| "max": 0.6887573599815369, |
| "mean": 0.5708860754966736, |
| "std": 0.04330369085073471, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.weight": { |
| "min": -0.2213190197944641, |
| "max": 0.17759515345096588, |
| "mean": -3.466910129645839e-05, |
| "std": 0.03429858386516571, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_q.bias": { |
| "min": -0.16418921947479248, |
| "max": 0.23438312113285065, |
| "mean": 0.0003640234936028719, |
| "std": 0.03290766850113869, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.weight": { |
| "min": -0.2654394805431366, |
| "max": 0.24140575528144836, |
| "mean": -5.2719900850206614e-05, |
| "std": 0.03389739617705345, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_k.bias": { |
| "min": -4.882589817047119, |
| "max": 5.12019157409668, |
| "mean": 0.04409287869930267, |
| "std": 1.233181118965149, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.weight": { |
| "min": -0.2474043071269989, |
| "max": 0.2517080307006836, |
| "mean": 7.239622209453955e-05, |
| "std": 0.0439867228269577, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_v.bias": { |
| "min": -0.0629691556096077, |
| "max": 0.054786957800388336, |
| "mean": 0.0006426851614378393, |
| "std": 0.017202140763401985, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.weight": { |
| "min": -0.28832921385765076, |
| "max": 0.2730186879634857, |
| "mean": -5.011680332245305e-05, |
| "std": 0.04298482462763786, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.2.to_out.0.bias": { |
| "min": -0.16195480525493622, |
| "max": 0.1713690608739853, |
| "mean": -0.002885536290705204, |
| "std": 0.05930813401937485, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.3.g": { |
| "min": 0.5195947885513306, |
| "max": 0.9433215260505676, |
| "mean": 0.713985800743103, |
| "std": 0.0396861806511879, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.weight": { |
| "min": -0.23872706294059753, |
| "max": 0.24947769939899445, |
| "mean": 0.000464944401755929, |
| "std": 0.04045351594686508, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.16.4.ff.0.0.bias": { |
| "min": -0.14595000445842743, |
| "max": 0.041102174669504166, |
| "mean": -0.03972803056240082, |
| "std": 0.020616797730326653, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.weight": { |
| "min": -0.5366718769073486, |
| "max": 0.5868415236473083, |
| "mean": 5.812449671793729e-06, |
| "std": 0.04885939508676529, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.16.4.ff.2.bias": { |
| "min": -0.5220040678977966, |
| "max": 0.4962327182292938, |
| "mean": 0.0023680159356445074, |
| "std": 0.05358637124300003, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.0.weight": { |
| "min": -0.2740743160247803, |
| "max": 0.31590986251831055, |
| "mean": 1.968129254237283e-06, |
| "std": 0.02004937082529068, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.17.1.g": { |
| "min": 0.36616218090057373, |
| "max": 0.718187689781189, |
| "mean": 0.5934113264083862, |
| "std": 0.04643949121236801, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.weight": { |
| "min": -0.21206998825073242, |
| "max": 0.20034025609493256, |
| "mean": 3.0636681913165376e-05, |
| "std": 0.03486590087413788, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_q.bias": { |
| "min": -0.18825751543045044, |
| "max": 0.20496514439582825, |
| "mean": 0.000955467636231333, |
| "std": 0.03160287067294121, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.weight": { |
| "min": -0.2913488745689392, |
| "max": 0.34160566329956055, |
| "mean": -4.710702705779113e-05, |
| "std": 0.03458679839968681, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_k.bias": { |
| "min": -3.8994882106781006, |
| "max": 3.406729221343994, |
| "mean": 0.014544591307640076, |
| "std": 0.8605263829231262, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.weight": { |
| "min": -0.2257968783378601, |
| "max": 0.2514858543872833, |
| "mean": -3.6003511922899634e-06, |
| "std": 0.042229436337947845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_v.bias": { |
| "min": -0.055651042610406876, |
| "max": 0.04694758728146553, |
| "mean": -1.666278694756329e-05, |
| "std": 0.015861017629504204, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.weight": { |
| "min": -0.2935076653957367, |
| "max": 0.2909187078475952, |
| "mean": -7.359203209489351e-06, |
| "std": 0.04194429889321327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.2.to_out.0.bias": { |
| "min": -0.12573029100894928, |
| "max": 0.2607214152812958, |
| "mean": -0.003240898484364152, |
| "std": 0.05319065600633621, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.3.g": { |
| "min": 0.45657190680503845, |
| "max": 0.8538610339164734, |
| "mean": 0.7059471011161804, |
| "std": 0.03630220517516136, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.weight": { |
| "min": -0.5123063325881958, |
| "max": 0.3483346104621887, |
| "mean": 0.00034276110818609595, |
| "std": 0.04019864276051521, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.17.4.ff.0.0.bias": { |
| "min": -0.18701618909835815, |
| "max": 0.03957710787653923, |
| "mean": -0.03942158818244934, |
| "std": 0.021421542391180992, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.weight": { |
| "min": -0.5481660962104797, |
| "max": 0.5603045225143433, |
| "mean": -7.152351463446394e-05, |
| "std": 0.050734106451272964, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.17.4.ff.2.bias": { |
| "min": -0.5146781802177429, |
| "max": 0.6680049300193787, |
| "mean": 0.002443398116156459, |
| "std": 0.04963434487581253, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.0.weight": { |
| "min": -0.3329000473022461, |
| "max": 0.2665855884552002, |
| "mean": 3.3853375498438254e-06, |
| "std": 0.01938658207654953, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.18.1.g": { |
| "min": 0.32180243730545044, |
| "max": 0.7734456062316895, |
| "mean": 0.6512116193771362, |
| "std": 0.04565456882119179, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.weight": { |
| "min": -0.2506096363067627, |
| "max": 0.2205670177936554, |
| "mean": -2.243723429273814e-06, |
| "std": 0.0365004725754261, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_q.bias": { |
| "min": -0.32875651121139526, |
| "max": 0.28859665989875793, |
| "mean": -0.0006945514469407499, |
| "std": 0.03869060054421425, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.weight": { |
| "min": -0.31226253509521484, |
| "max": 0.3726266324520111, |
| "mean": 6.49260327918455e-05, |
| "std": 0.03624095767736435, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_k.bias": { |
| "min": -4.75054407119751, |
| "max": 5.848582744598389, |
| "mean": 0.0380375012755394, |
| "std": 1.4184556007385254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.weight": { |
| "min": -0.22316591441631317, |
| "max": 0.2069820612668991, |
| "mean": -7.529938011430204e-05, |
| "std": 0.042484965175390244, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_v.bias": { |
| "min": -0.07815916836261749, |
| "max": 0.051765959709882736, |
| "mean": -0.0009295076015405357, |
| "std": 0.016425304114818573, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.weight": { |
| "min": -0.3312581181526184, |
| "max": 0.3296850621700287, |
| "mean": -4.723461188405054e-06, |
| "std": 0.04279135540127754, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.2.to_out.0.bias": { |
| "min": -0.2866402864456177, |
| "max": 0.11266554147005081, |
| "mean": -0.0012074881233274937, |
| "std": 0.04703830927610397, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.3.g": { |
| "min": 0.4860897958278656, |
| "max": 0.8950455784797668, |
| "mean": 0.7378093004226685, |
| "std": 0.039171766489744186, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.weight": { |
| "min": -0.3630240857601166, |
| "max": 0.2759678065776825, |
| "mean": 5.1290608098497614e-05, |
| "std": 0.04064415767788887, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.18.4.ff.0.0.bias": { |
| "min": -0.2490600198507309, |
| "max": 0.04639717563986778, |
| "mean": -0.03930266201496124, |
| "std": 0.023369962349534035, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.weight": { |
| "min": -0.6307172775268555, |
| "max": 0.6014147996902466, |
| "mean": -6.16723409621045e-05, |
| "std": 0.05311626195907593, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.18.4.ff.2.bias": { |
| "min": -0.7142688035964966, |
| "max": 0.267661988735199, |
| "mean": 0.0009166492964141071, |
| "std": 0.051358189433813095, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.0.weight": { |
| "min": -0.3435579240322113, |
| "max": 0.3038428723812103, |
| "mean": 1.3023259270994458e-07, |
| "std": 0.019134989008307457, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.19.1.g": { |
| "min": 0.3500676155090332, |
| "max": 0.7897790670394897, |
| "mean": 0.6390184760093689, |
| "std": 0.04962107539176941, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.weight": { |
| "min": -0.2066265493631363, |
| "max": 0.20817363262176514, |
| "mean": -5.989617056911811e-05, |
| "std": 0.037695348262786865, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_q.bias": { |
| "min": -0.2602774202823639, |
| "max": 0.2698180377483368, |
| "mean": -0.00039462913991883397, |
| "std": 0.04474588483572006, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.weight": { |
| "min": -0.3561350107192993, |
| "max": 0.32447537779808044, |
| "mean": -6.916588063177187e-06, |
| "std": 0.03720375522971153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_k.bias": { |
| "min": -5.291650295257568, |
| "max": 4.228523254394531, |
| "mean": -0.02643691562116146, |
| "std": 1.0099413394927979, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.weight": { |
| "min": -0.2399577796459198, |
| "max": 0.24472706019878387, |
| "mean": -2.5193990950356238e-05, |
| "std": 0.04320961609482765, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_v.bias": { |
| "min": -0.06267981976270676, |
| "max": 0.05705071985721588, |
| "mean": 0.0003437635023146868, |
| "std": 0.014168186113238335, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.weight": { |
| "min": -0.4376278221607208, |
| "max": 0.3739663064479828, |
| "mean": 1.456045083614299e-05, |
| "std": 0.04412108287215233, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.2.to_out.0.bias": { |
| "min": -0.09702851623296738, |
| "max": 0.17698785662651062, |
| "mean": -0.0006597189931198955, |
| "std": 0.03517333045601845, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.3.g": { |
| "min": 0.4217059910297394, |
| "max": 1.0791560411453247, |
| "mean": 0.7486134767532349, |
| "std": 0.04263925552368164, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.weight": { |
| "min": -0.26739904284477234, |
| "max": 0.298541396856308, |
| "mean": -7.951692532515153e-05, |
| "std": 0.040804121643304825, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.19.4.ff.0.0.bias": { |
| "min": -0.18641552329063416, |
| "max": 0.043663352727890015, |
| "mean": -0.036861587315797806, |
| "std": 0.0257096104323864, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.weight": { |
| "min": -0.4583725333213806, |
| "max": 0.4902479946613312, |
| "mean": 4.34339017374441e-05, |
| "std": 0.05420944094657898, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.19.4.ff.2.bias": { |
| "min": -0.2883600890636444, |
| "max": 0.5551440119743347, |
| "mean": -0.0008822724921628833, |
| "std": 0.04795018211007118, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.0.weight": { |
| "min": -0.2930268347263336, |
| "max": 0.3230960965156555, |
| "mean": 6.1333103076322e-06, |
| "std": 0.01996854692697525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.20.1.g": { |
| "min": 0.29084402322769165, |
| "max": 0.768223226070404, |
| "mean": 0.650917649269104, |
| "std": 0.05231805518269539, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.weight": { |
| "min": -0.24454748630523682, |
| "max": 0.2624610364437103, |
| "mean": -5.949783371761441e-06, |
| "std": 0.039611514657735825, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_q.bias": { |
| "min": -0.2689764201641083, |
| "max": 0.20118767023086548, |
| "mean": -0.000883190892636776, |
| "std": 0.05189211666584015, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.weight": { |
| "min": -0.27367931604385376, |
| "max": 0.25521987676620483, |
| "mean": 4.683277438743971e-06, |
| "std": 0.038708530366420746, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_k.bias": { |
| "min": -13.039263725280762, |
| "max": 16.03864097595215, |
| "mean": 0.03343699499964714, |
| "std": 1.9974913597106934, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.weight": { |
| "min": -0.2084328532218933, |
| "max": 0.2273532599210739, |
| "mean": -7.200734398793429e-05, |
| "std": 0.040553417056798935, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_v.bias": { |
| "min": -0.06970705837011337, |
| "max": 0.06357143819332123, |
| "mean": 0.00015784359129611403, |
| "std": 0.014761138707399368, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.weight": { |
| "min": -0.46569308638572693, |
| "max": 0.3209618628025055, |
| "mean": 1.970405901374761e-05, |
| "std": 0.04058854654431343, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.2.to_out.0.bias": { |
| "min": -0.06452719122171402, |
| "max": 0.11591468751430511, |
| "mean": 0.0011942506534978747, |
| "std": 0.024729805067181587, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.3.g": { |
| "min": 0.37459689378738403, |
| "max": 0.9426000118255615, |
| "mean": 0.7511058449745178, |
| "std": 0.040696173906326294, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.weight": { |
| "min": -0.2817957103252411, |
| "max": 0.27507483959198, |
| "mean": -0.00016845125355757773, |
| "std": 0.040994707494974136, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.20.4.ff.0.0.bias": { |
| "min": -0.19982005655765533, |
| "max": 0.05116043612360954, |
| "mean": -0.03206067159771919, |
| "std": 0.025184709578752518, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.weight": { |
| "min": -0.6629015207290649, |
| "max": 0.5394555330276489, |
| "mean": -4.886999522568658e-05, |
| "std": 0.052846018224954605, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.20.4.ff.2.bias": { |
| "min": -0.1941312849521637, |
| "max": 0.5856620669364929, |
| "mean": -0.0005102052818983793, |
| "std": 0.04117872565984726, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.0.weight": { |
| "min": -0.41802144050598145, |
| "max": 0.37218335270881653, |
| "mean": 6.143730843177764e-06, |
| "std": 0.021620716899633408, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.21.1.g": { |
| "min": 0.214231476187706, |
| "max": 0.7551652193069458, |
| "mean": 0.6496015787124634, |
| "std": 0.05449988320469856, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.weight": { |
| "min": -0.21102380752563477, |
| "max": 0.19707706570625305, |
| "mean": 4.027696923003532e-05, |
| "std": 0.03946160152554512, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_q.bias": { |
| "min": -0.3312985599040985, |
| "max": 0.2609282433986664, |
| "mean": -0.0032433252781629562, |
| "std": 0.05640969052910805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.weight": { |
| "min": -0.20687410235404968, |
| "max": 0.25594964623451233, |
| "mean": 5.426290590548888e-05, |
| "std": 0.038564227521419525, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_k.bias": { |
| "min": -6.281450271606445, |
| "max": 6.974554538726807, |
| "mean": 0.04850253462791443, |
| "std": 1.3900896310806274, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.weight": { |
| "min": -0.2110043168067932, |
| "max": 0.23172873258590698, |
| "mean": -5.136051640874939e-06, |
| "std": 0.04131242260336876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_v.bias": { |
| "min": -0.04407680407166481, |
| "max": 0.03620957210659981, |
| "mean": 5.837064236402512e-07, |
| "std": 0.012804933823645115, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.weight": { |
| "min": -0.3980613648891449, |
| "max": 0.34518715739250183, |
| "mean": -5.568802953348495e-05, |
| "std": 0.04238880053162575, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.2.to_out.0.bias": { |
| "min": -0.0554049089550972, |
| "max": 0.06314343214035034, |
| "mean": 0.00036526317126117647, |
| "std": 0.01868700049817562, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.3.g": { |
| "min": 0.35041460394859314, |
| "max": 1.054603099822998, |
| "mean": 0.7895448207855225, |
| "std": 0.04915067180991173, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.weight": { |
| "min": -0.33399659395217896, |
| "max": 0.3868362009525299, |
| "mean": -0.00016958778724074364, |
| "std": 0.04147977754473686, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.21.4.ff.0.0.bias": { |
| "min": -0.15840038657188416, |
| "max": 0.059087082743644714, |
| "mean": -0.03186880797147751, |
| "std": 0.02521045319736004, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.weight": { |
| "min": -0.6981510519981384, |
| "max": 0.47227516770362854, |
| "mean": -8.876612992025912e-05, |
| "std": 0.05179238319396973, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.21.4.ff.2.bias": { |
| "min": -0.2498706579208374, |
| "max": 0.33086034655570984, |
| "mean": -0.0002500821719877422, |
| "std": 0.04153008759021759, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.0.weight": { |
| "min": -0.2874675989151001, |
| "max": 0.3506753444671631, |
| "mean": -2.142998255294515e-06, |
| "std": 0.024235961958765984, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.22.1.g": { |
| "min": 0.19644968211650848, |
| "max": 0.7875264883041382, |
| "mean": 0.6702861189842224, |
| "std": 0.058757346123456955, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.weight": { |
| "min": -0.2307407557964325, |
| "max": 0.23255716264247894, |
| "mean": -1.9847611838486046e-05, |
| "std": 0.04043736308813095, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_q.bias": { |
| "min": -0.22115467488765717, |
| "max": 0.24231739342212677, |
| "mean": 0.0007812330732122064, |
| "std": 0.05595459043979645, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.weight": { |
| "min": -0.21687255799770355, |
| "max": 0.22770829498767853, |
| "mean": -7.165952411014587e-05, |
| "std": 0.03937350586056709, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_k.bias": { |
| "min": -8.959362030029297, |
| "max": 9.123239517211914, |
| "mean": -0.0011855876073241234, |
| "std": 1.8560608625411987, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.weight": { |
| "min": -0.2711891233921051, |
| "max": 0.2605840563774109, |
| "mean": 4.364762571640313e-05, |
| "std": 0.038405757397413254, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_v.bias": { |
| "min": -0.05802099406719208, |
| "max": 0.05812212452292442, |
| "mean": 0.0003513882402330637, |
| "std": 0.014736738055944443, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.weight": { |
| "min": -0.26627787947654724, |
| "max": 0.28912854194641113, |
| "mean": -6.142335041658953e-05, |
| "std": 0.03907188028097153, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.2.to_out.0.bias": { |
| "min": -0.04412651062011719, |
| "max": 0.03752894699573517, |
| "mean": -9.05310153029859e-05, |
| "std": 0.013374187983572483, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.3.g": { |
| "min": 0.339313268661499, |
| "max": 1.1022799015045166, |
| "mean": 0.8638956546783447, |
| "std": 0.06418420374393463, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.weight": { |
| "min": -0.42381733655929565, |
| "max": 0.41949865221977234, |
| "mean": 0.0003125929506495595, |
| "std": 0.04350028932094574, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.22.4.ff.0.0.bias": { |
| "min": -0.2159820944070816, |
| "max": 0.1717892736196518, |
| "mean": -0.02952037751674652, |
| "std": 0.0320223867893219, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.weight": { |
| "min": -0.6032647490501404, |
| "max": 0.5633653998374939, |
| "mean": -0.00015064005856402218, |
| "std": 0.053445085883140564, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.22.4.ff.2.bias": { |
| "min": -0.17956292629241943, |
| "max": 0.37900540232658386, |
| "mean": 0.0013650960754603148, |
| "std": 0.03737950697541237, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.0.weight": { |
| "min": -0.3949747383594513, |
| "max": 0.36959531903266907, |
| "mean": 3.693038524943404e-05, |
| "std": 0.028617311269044876, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.23.1.g": { |
| "min": 0.2902548313140869, |
| "max": 0.835411548614502, |
| "mean": 0.7055742740631104, |
| "std": 0.06795050203800201, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.weight": { |
| "min": -0.9264549016952515, |
| "max": 1.0266518592834473, |
| "mean": -2.6062916731461883e-05, |
| "std": 0.047624703496694565, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_q.bias": { |
| "min": -0.8848392963409424, |
| "max": 0.8210154175758362, |
| "mean": -0.00031388079514726996, |
| "std": 0.09599340707063675, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.weight": { |
| "min": -0.2704119086265564, |
| "max": 0.24200940132141113, |
| "mean": -2.2776041078031994e-05, |
| "std": 0.03895159065723419, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_k.bias": { |
| "min": -23.909391403198242, |
| "max": 23.011491775512695, |
| "mean": -0.09215216338634491, |
| "std": 4.095620155334473, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.weight": { |
| "min": -0.2288811355829239, |
| "max": 0.24590590596199036, |
| "mean": -2.564151509432122e-05, |
| "std": 0.03863710165023804, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_v.bias": { |
| "min": -0.060657572001218796, |
| "max": 0.04613931104540825, |
| "mean": -0.00014338521577883512, |
| "std": 0.014703062362968922, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.weight": { |
| "min": -0.33906009793281555, |
| "max": 0.37649407982826233, |
| "mean": 7.5478201324585825e-06, |
| "std": 0.04081288352608681, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.2.to_out.0.bias": { |
| "min": -0.04671977460384369, |
| "max": 0.19674423336982727, |
| "mean": 0.0002734751324169338, |
| "std": 0.013588963076472282, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.3.g": { |
| "min": 0.3744518756866455, |
| "max": 1.1423423290252686, |
| "mean": 0.890155553817749, |
| "std": 0.0642639547586441, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.weight": { |
| "min": -0.44847023487091064, |
| "max": 0.5443573594093323, |
| "mean": 2.4567927539465018e-05, |
| "std": 0.04556553065776825, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.23.4.ff.0.0.bias": { |
| "min": -0.2254226952791214, |
| "max": 0.08823559433221817, |
| "mean": -0.0320654921233654, |
| "std": 0.03788232430815697, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.weight": { |
| "min": -0.7300624251365662, |
| "max": 0.6936558485031128, |
| "mean": 3.439782449277118e-05, |
| "std": 0.05177776888012886, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.23.4.ff.2.bias": { |
| "min": -0.1755923330783844, |
| "max": 0.21977680921554565, |
| "mean": 4.2144907638430595e-05, |
| "std": 0.03183648735284805, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.0.weight": { |
| "min": -0.3417545258998871, |
| "max": 0.3754495084285736, |
| "mean": 4.2937641410389915e-05, |
| "std": 0.03413964807987213, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.24.1.g": { |
| "min": 0.3177294135093689, |
| "max": 1.2977259159088135, |
| "mean": 0.6017159223556519, |
| "std": 0.08427947759628296, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.weight": { |
| "min": -0.2838163673877716, |
| "max": 0.2612304091453552, |
| "mean": -2.8361523618514184e-06, |
| "std": 0.03598065674304962, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_q.bias": { |
| "min": -0.23691536486148834, |
| "max": 0.20665380358695984, |
| "mean": 0.0002377421478740871, |
| "std": 0.05610164627432823, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.weight": { |
| "min": -0.4367288649082184, |
| "max": 0.326652467250824, |
| "mean": 2.422912439214997e-05, |
| "std": 0.034131284803152084, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_k.bias": { |
| "min": -5.582788944244385, |
| "max": 7.362354278564453, |
| "mean": -0.007508529350161552, |
| "std": 0.7035665512084961, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.weight": { |
| "min": -0.34583720564842224, |
| "max": 0.3661332130432129, |
| "mean": 0.00010320795263396576, |
| "std": 0.04782785847783089, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_v.bias": { |
| "min": -0.07427486777305603, |
| "max": 0.060801248997449875, |
| "mean": 0.0009337762021459639, |
| "std": 0.014963135123252869, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.weight": { |
| "min": -0.25689101219177246, |
| "max": 0.28821247816085815, |
| "mean": 4.153083864366636e-06, |
| "std": 0.04155467450618744, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.2.to_out.0.bias": { |
| "min": -0.05564720183610916, |
| "max": 0.0631924495100975, |
| "mean": 0.0001379186287522316, |
| "std": 0.007182796951383352, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.3.g": { |
| "min": 0.49357107281684875, |
| "max": 1.2338876724243164, |
| "mean": 1.0134950876235962, |
| "std": 0.11754289269447327, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.weight": { |
| "min": -1.0940601825714111, |
| "max": 1.0474328994750977, |
| "mean": -4.88213227072265e-05, |
| "std": 0.05240841209888458, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.24.4.ff.0.0.bias": { |
| "min": -0.2248232364654541, |
| "max": 0.17388059198856354, |
| "mean": -0.02729785442352295, |
| "std": 0.036497559398412704, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.weight": { |
| "min": -0.8899852633476257, |
| "max": 0.9281743168830872, |
| "mean": -0.00014587071200367063, |
| "std": 0.05328153818845749, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.24.4.ff.2.bias": { |
| "min": -0.17224453389644623, |
| "max": 0.38245582580566406, |
| "mean": 0.0033820997923612595, |
| "std": 0.04001828283071518, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.0.weight": { |
| "min": -0.7799473404884338, |
| "max": 0.7260819673538208, |
| "mean": 1.8725522750173695e-05, |
| "std": 0.046160738915205, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 2048 |
| ] |
| }, |
| "transformer.layers.25.1.g": { |
| "min": 0.33860552310943604, |
| "max": 1.442690134048462, |
| "mean": 0.9484557509422302, |
| "std": 0.20696218311786652, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.weight": { |
| "min": -1.7459073066711426, |
| "max": 1.704575538635254, |
| "mean": 0.00022730980708729476, |
| "std": 0.15868498384952545, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_q.bias": { |
| "min": -1.2076622247695923, |
| "max": 1.1073572635650635, |
| "mean": -0.00959145836532116, |
| "std": 0.20509476959705353, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.weight": { |
| "min": -0.4218980371952057, |
| "max": 0.4278029203414917, |
| "mean": 6.46372718620114e-05, |
| "std": 0.048015668988227844, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_k.bias": { |
| "min": -19.878219604492188, |
| "max": 19.671934127807617, |
| "mean": -0.24954606592655182, |
| "std": 4.8062262535095215, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.weight": { |
| "min": -0.3252118229866028, |
| "max": 0.44012102484703064, |
| "mean": -1.1724467185558751e-05, |
| "std": 0.04616120085120201, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_v.bias": { |
| "min": -0.03427257761359215, |
| "max": 0.03733307123184204, |
| "mean": 0.0006422841688618064, |
| "std": 0.012923721224069595, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.weight": { |
| "min": -0.7051200270652771, |
| "max": 0.6666434407234192, |
| "mean": 4.353695476311259e-05, |
| "std": 0.0578814335167408, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.2.to_out.0.bias": { |
| "min": -0.07273512333631516, |
| "max": 0.06799687445163727, |
| "mean": -0.0001354652486043051, |
| "std": 0.012961134314537048, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.3.g": { |
| "min": 0.3802323043346405, |
| "max": 1.392055869102478, |
| "mean": 1.0665756464004517, |
| "std": 0.2197023183107376, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.weight": { |
| "min": -0.6175218224525452, |
| "max": 0.7191157341003418, |
| "mean": 0.00011173778329975903, |
| "std": 0.058020252734422684, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096, |
| 1024 |
| ] |
| }, |
| "transformer.layers.25.4.ff.0.0.bias": { |
| "min": -0.22093473374843597, |
| "max": 0.22644445300102234, |
| "mean": 0.006260717287659645, |
| "std": 0.04986373335123062, |
| "sparsity": 0.0, |
| "shape": [ |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.weight": { |
| "min": -0.6302544474601746, |
| "max": 0.8900287747383118, |
| "mean": 1.1643458492471837e-05, |
| "std": 0.023527663201093674, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024, |
| 4096 |
| ] |
| }, |
| "transformer.layers.25.4.ff.2.bias": { |
| "min": -0.5102453231811523, |
| "max": 0.4771297872066498, |
| "mean": -0.0030403323471546173, |
| "std": 0.06969437003135681, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.norm_out.g": { |
| "min": 0.5377517342567444, |
| "max": 1.1850762367248535, |
| "mean": 0.7829766273498535, |
| "std": 0.09934176504611969, |
| "sparsity": 0.0, |
| "shape": [ |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.weight": { |
| "min": -0.26876378059387207, |
| "max": 0.21405881643295288, |
| "mean": -0.00022433605045080185, |
| "std": 0.053995925933122635, |
| "sparsity": 0.0, |
| "shape": [ |
| 100, |
| 1024 |
| ] |
| }, |
| "transformer.proj_out.bias": { |
| "min": -0.23968708515167236, |
| "max": 0.014838683418929577, |
| "mean": -0.0440097339451313, |
| "std": 0.03449948504567146, |
| "sparsity": 0.0, |
| "shape": [ |
| 100 |
| ] |
| } |
| } |
| } |