{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.430247962474823, "max": 0.29814788699150085, "mean": -0.0025456156581640244, "std": 0.042562179267406464, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06305033713579178, "max": 0.10756707191467285, "mean": 0.0006329622119665146, "std": 0.03406817466020584, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4126828908920288, "max": 0.8368642926216125, "mean": -0.00020196933473926038, "std": 0.024113450199365616, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11526867002248764, "max": 0.3216077983379364, "mean": -0.0009404964512214065, "std": 0.019565371796488762, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.7922351360321045, "max": 2.8709537982940674, "mean": -0.0003647372650448233, "std": 0.6154845356941223, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.27921348810195923, "max": 0.38164129853248596, "mean": 0.0004232236242387444, "std": 0.04274886101484299, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.2224942147731781, "max": 0.20972047746181488, "mean": -0.004487486090511084, "std": 0.040916070342063904, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4284340739250183, "max": 0.47617435455322266, "mean": 3.322187239973573e-06, "std": 0.024511422961950302, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.32528114318847656, "max": 0.15677402913570404, "mean": -0.04670446366071701, "std": 0.051589105278253555, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.41054657101631165, "max": 0.3546879291534424, "mean": -0.00012705953849945217, "std": 0.023604456335306168, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.22982755303382874, "max": 0.26271378993988037, "mean": -0.029137738049030304, "std": 0.049353621900081635, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.25457319617271423, "max": 0.8201438188552856, "mean": 0.5254908800125122, "std": 0.08082503080368042, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.29710477590560913, "max": 0.26579147577285767, "mean": -0.0004257034743204713, "std": 0.03210267424583435, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09286229312419891, "max": 0.12479868531227112, "mean": 0.0006487525533884764, "std": 0.025735046714544296, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.290811687707901, "max": 0.2813718020915985, "mean": -7.56493245717138e-05, "std": 0.030931707471609116, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.900395393371582, "max": 5.815171718597412, "mean": -0.009333105757832527, "std": 1.295695185661316, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.4251435399055481, "max": 0.3437366187572479, "mean": 9.79713149718009e-05, "std": 0.02995358221232891, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028972996398806572, "max": 0.027724435552954674, "mean": -0.00031865754863247275, "std": 0.012574296444654465, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.45405057072639465, "max": 0.44834038615226746, "mean": 2.372298331465572e-05, "std": 0.02385387383401394, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08870794624090195, "max": 0.09110292047262192, "mean": 0.0022859524469822645, "std": 0.01951485686004162, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.26681551337242126, "max": 1.056317687034607, "mean": 0.5312033891677856, "std": 0.10443911701440811, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5745526552200317, "max": 0.6082873940467834, "mean": -0.00043126955279149115, "std": 0.03860025480389595, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18273141980171204, "max": 0.04556818678975105, "mean": -0.029461650177836418, "std": 0.042611170560121536, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.1671894788742065, "max": 1.6339271068572998, "mean": 0.0003239789803046733, "std": 0.027696946635842323, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16238771378993988, "max": 0.20571960508823395, "mean": -0.021131085231900215, "std": 0.02794588916003704, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22399598360061646, "max": 0.8438678979873657, "mean": 0.48765647411346436, "std": 0.07522650808095932, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.2555526793003082, "max": 0.305812269449234, "mean": -6.7934306571260095e-06, "std": 0.03347478806972504, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09538023918867111, "max": 0.11050069332122803, "mean": 6.53832103125751e-05, "std": 0.02696637623012066, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.297147661447525, "max": 0.2961280345916748, "mean": 5.286935265758075e-05, "std": 0.032545968890190125, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.165225028991699, "max": 5.085448741912842, "mean": -0.014597500674426556, "std": 1.1575955152511597, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.3449052572250366, "max": 0.34331217408180237, "mean": 7.911311695352197e-05, "std": 0.03006201609969139, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.03610210865736008, "max": 0.03328812122344971, "mean": -0.0001417656458215788, "std": 0.01303204894065857, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.3154510259628296, "max": 0.37501609325408936, "mean": -2.077353019558359e-05, "std": 0.024059347808361053, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10547598451375961, "max": 0.1221047043800354, "mean": -0.0019677607342600822, "std": 0.028854791074991226, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.31151488423347473, "max": 1.1208997964859009, "mean": 0.6663015484809875, "std": 0.09774678200483322, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8727833032608032, "max": 0.6275414824485779, "mean": 0.001675266888923943, "std": 0.04743880406022072, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.2714674770832062, "max": 0.03427550569176674, "mean": -0.04661353677511215, "std": 0.040598493069410324, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9226045608520508, "max": 0.9647504687309265, "mean": 0.0010200842516496778, "std": 0.040706485509872437, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.1445719450712204, "max": 0.07502147555351257, "mean": -0.009089105762541294, "std": 0.025694996118545532, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.24015086889266968, "max": 0.7130303978919983, "mean": 0.4472612142562866, "std": 0.05932846665382385, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.27250105142593384, "max": 0.29779112339019775, "mean": 9.235942343366332e-06, "std": 0.03546915203332901, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.1193777546286583, "max": 0.11857955157756805, "mean": 0.0007589810993522406, "std": 0.02763049118220806, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.28105634450912476, "max": 0.2798849046230316, "mean": -7.697378896409646e-05, "std": 0.0350995697081089, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.5100622177124023, "max": 2.5220582485198975, "mean": 0.02675231173634529, "std": 0.5868890285491943, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.2211104929447174, "max": 0.27162447571754456, "mean": 2.60172691923799e-06, "std": 0.030733274295926094, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.033548399806022644, "max": 0.03133385255932808, "mean": 0.00011904191342182457, "std": 0.012407796457409859, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23527584969997406, "max": 0.23167696595191956, "mean": 5.708727621822618e-05, "std": 0.025696981698274612, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13586905598640442, "max": 0.12758414447307587, "mean": -0.0054936036467552185, "std": 0.039962876588106155, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.35451188683509827, "max": 1.1720999479293823, "mean": 0.710637629032135, "std": 0.10376914590597153, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6174948811531067, "max": 0.5544577240943909, "mean": 0.0011600415455177426, "std": 0.04611966758966446, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.1883939653635025, "max": 0.02492486871778965, "mean": -0.03484141081571579, "std": 0.028610829263925552, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.131612777709961, "max": 0.9714275002479553, "mean": 0.00035819801269099116, "std": 0.04234758019447327, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5980822443962097, "max": 0.06284141540527344, "mean": -0.004877430386841297, "std": 0.028617603704333305, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.37526264786720276, "max": 0.9405426383018494, "mean": 0.5925549268722534, "std": 0.0669507160782814, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.39145711064338684, "max": 0.3691279888153076, "mean": 7.120549707906321e-05, "std": 0.03718876466155052, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11895960569381714, "max": 0.13652607798576355, "mean": 0.0009289687732234597, "std": 0.029236802831292152, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.619219183921814, "max": 0.5088949203491211, "mean": 1.4944693248253316e-05, "std": 0.036442093551158905, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.188663482666016, "max": 8.790773391723633, "mean": -0.10929473489522934, "std": 1.6991605758666992, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.2766683101654053, "max": 0.23983481526374817, "mean": 5.299611802911386e-05, "std": 0.032615721225738525, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.052095651626586914, "max": 0.039515361189842224, "mean": 9.424134623259306e-05, "std": 0.012960628606379032, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23076868057250977, "max": 0.234751895070076, "mean": -2.1736430426244624e-05, "std": 0.029392007738351822, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20435833930969238, "max": 0.10555171221494675, "mean": -0.004022371023893356, "std": 0.03262435272336006, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.33977094292640686, "max": 1.0126755237579346, "mean": 0.7008676528930664, "std": 0.0967569425702095, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5649488568305969, "max": 0.8331477046012878, "mean": 0.00041524306288920343, "std": 0.04230210557579994, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21171551942825317, "max": 0.030433084815740585, "mean": -0.03218771517276764, "std": 0.026509009301662445, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7544965744018555, "max": 0.7186921834945679, "mean": -1.2556927686091512e-05, "std": 0.036842044442892075, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.26356518268585205, "max": 0.10585562884807587, "mean": -0.003026221413165331, "std": 0.028868772089481354, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.28427132964134216, "max": 0.6951562762260437, "mean": 0.4995492994785309, "std": 0.046537742018699646, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.27920955419540405, "max": 0.23424308001995087, "mean": -0.00011120487761218101, "std": 0.038762450218200684, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15435229241847992, "max": 0.126743882894516, "mean": -0.002232551807537675, "std": 0.03338867425918579, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.41404443979263306, "max": 0.6600516438484192, "mean": -1.9756593246711418e-05, "std": 0.03909948095679283, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.238841533660889, "max": 4.723404884338379, "mean": -0.02046278491616249, "std": 1.0078744888305664, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.24500444531440735, "max": 0.20759114623069763, "mean": 4.401802652864717e-05, "std": 0.03396647423505783, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03457580879330635, "max": 0.04486193135380745, "mean": -1.914246240630746e-05, "std": 0.012628658674657345, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20080946385860443, "max": 0.20593363046646118, "mean": -2.9703282052651048e-05, "std": 0.03102399967610836, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.20000168681144714, "max": 0.11336001008749008, "mean": -0.002912652213126421, "std": 0.03451835736632347, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.3670476973056793, "max": 1.0570876598358154, "mean": 0.6706215143203735, "std": 0.06639451533555984, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.39835721254348755, "max": 0.5023353695869446, "mean": -3.849938002531417e-05, "std": 0.0411369614303112, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12806333601474762, "max": 0.026793837547302246, "mean": -0.030542662367224693, "std": 0.021876059472560883, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.4490928053855896, "max": 0.4329548478126526, "mean": 7.997997454367578e-05, "std": 0.03489622473716736, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.2676912248134613, "max": 0.07277432084083557, "mean": -0.0011054163333028555, "std": 0.023129144683480263, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.28743863105773926, "max": 0.6852545738220215, "mean": 0.5245908498764038, "std": 0.047539178282022476, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22235621511936188, "max": 0.2234710454940796, "mean": 1.5755222193547525e-05, "std": 0.03895283117890358, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13644249737262726, "max": 0.10925862938165665, "mean": 0.00023633803357370198, "std": 0.029229167848825455, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.3750911056995392, "max": 0.4374293088912964, "mean": -9.469786164117977e-06, "std": 0.03928925842046738, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8464367389678955, "max": 5.000250816345215, "mean": 0.009745623916387558, "std": 0.8453732132911682, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22324559092521667, "max": 0.22006931900978088, "mean": -2.64663412963273e-07, "std": 0.03441375494003296, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.04371564835309982, "max": 0.03597109019756317, "mean": -0.0002580236759968102, "std": 0.012081029824912548, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21329627931118011, "max": 0.1888744831085205, "mean": -1.6700443666195497e-05, "std": 0.03154045715928078, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.1808258593082428, "max": 0.12078980356454849, "mean": -0.002406290266662836, "std": 0.04127614200115204, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.42247915267944336, "max": 0.9420861601829529, "mean": 0.6627910733222961, "std": 0.0568135567009449, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.3714267611503601, "max": 0.47587329149246216, "mean": -8.246101788245142e-05, "std": 0.04089611768722534, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.208319753408432, "max": 0.02722310833632946, "mean": -0.03024582751095295, "std": 0.021349623799324036, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.34010598063468933, "max": 0.7335456013679504, "mean": 8.291324775200337e-05, "std": 0.03477157652378082, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.2402523010969162, "max": 0.050502024590969086, "mean": -0.0011936500668525696, "std": 0.020464643836021423, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.3060189485549927, "max": 0.6537417769432068, "mean": 0.5251810550689697, "std": 0.046129435300827026, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.3043527901172638, "max": 0.2173452079296112, "mean": 6.987799861235544e-05, "std": 0.03949924185872078, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.1495305597782135, "max": 0.13139042258262634, "mean": 0.0003452928503975272, "std": 0.03046758659183979, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.25741448998451233, "max": 0.2021329253911972, "mean": 3.105932046310045e-05, "std": 0.039488501846790314, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.336733102798462, "max": 2.376356840133667, "mean": -0.026247980073094368, "std": 0.44985267519950867, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.18904413282871246, "max": 0.2104651778936386, "mean": 3.720704626175575e-05, "std": 0.03479856252670288, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03166992589831352, "max": 0.035564228892326355, "mean": -0.00020107123418711126, "std": 0.012294227257370949, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.18845464289188385, "max": 0.17046742141246796, "mean": -6.800049595767632e-05, "std": 0.03217524290084839, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13940171897411346, "max": 0.13724905252456665, "mean": -0.002515769563615322, "std": 0.05131084844470024, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4671289920806885, "max": 0.9564934968948364, "mean": 0.6689913272857666, "std": 0.05279172211885452, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.3243524730205536, "max": 0.30971962213516235, "mean": -1.389088538417127e-06, "std": 0.04095206782221794, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12475074827671051, "max": 0.02534548193216324, "mean": -0.03070956841111183, "std": 0.019817529246211052, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.44013386964797974, "max": 0.44524946808815, "mean": 9.531535761198029e-05, "std": 0.03512435778975487, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22465433180332184, "max": 0.05168891325592995, "mean": -0.0011842836393043399, "std": 0.018476232886314392, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.3392145037651062, "max": 0.739431619644165, "mean": 0.5587528944015503, "std": 0.04140577092766762, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.2725517153739929, "max": 0.2784435749053955, "mean": 1.987360155908391e-05, "std": 0.04106256738305092, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13695892691612244, "max": 0.13984902203083038, "mean": 0.00048777679330669343, "std": 0.026632118970155716, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.4907291829586029, "max": 0.35599952936172485, "mean": 8.879909000825137e-05, "std": 0.0407005213201046, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.2975404262542725, "max": 1.7454535961151123, "mean": -0.02108157053589821, "std": 0.5002167820930481, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.2176651507616043, "max": 0.19791799783706665, "mean": -4.056983016198501e-05, "std": 0.03423743695020676, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.04131868854165077, "max": 0.038581475615501404, "mean": -0.00014208082575351, "std": 0.012879491783678532, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17750245332717896, "max": 0.18368542194366455, "mean": 4.755006739287637e-05, "std": 0.031560346484184265, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.17995940148830414, "max": 0.18388336896896362, "mean": -0.0022164953406900167, "std": 0.05484570935368538, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4742797613143921, "max": 1.0257062911987305, "mean": 0.6453534960746765, "std": 0.05035950988531113, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.27185168862342834, "max": 0.3093569278717041, "mean": 0.00011239617015235126, "std": 0.04068810120224953, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10582997649908066, "max": 0.02683391235768795, "mean": -0.029520545154809952, "std": 0.01793094538152218, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.3390536606311798, "max": 0.32923397421836853, "mean": 5.560236604651436e-05, "std": 0.03441813215613365, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.181716188788414, "max": 0.04217486456036568, "mean": -0.0010700200218707323, "std": 0.017213836312294006, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.32544824481010437, "max": 0.6866950988769531, "mean": 0.511271595954895, "std": 0.036954350769519806, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.23384520411491394, "max": 0.22571122646331787, "mean": -3.601049320423044e-05, "std": 0.0391816720366478, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.1153523325920105, "max": 0.1316574662923813, "mean": 0.000150712497998029, "std": 0.029186168685555458, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.35289716720581055, "max": 0.285473108291626, "mean": 7.233719770738389e-06, "std": 0.03925013542175293, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.133274078369141, "max": 3.544353723526001, "mean": -0.011593173258006573, "std": 0.6827409267425537, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21133771538734436, "max": 0.20911119878292084, "mean": 3.477419522823766e-05, "std": 0.034489333629608154, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03563081845641136, "max": 0.04807223752140999, "mean": 0.0007964536780491471, "std": 0.012856329791247845, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21064519882202148, "max": 0.19317731261253357, "mean": -1.2986236015422037e-06, "std": 0.03169986233115196, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.1866597682237625, "max": 0.17717307806015015, "mean": -0.002846275921911001, "std": 0.05864023044705391, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.47464174032211304, "max": 1.0418421030044556, "mean": 0.6514742970466614, "std": 0.049661051481962204, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.2484884411096573, "max": 0.3291080594062805, "mean": 0.00018062048184219748, "std": 0.040576666593551636, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12466001510620117, "max": 0.024652821943163872, "mean": -0.030505184084177017, "std": 0.01760147698223591, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.42117249965667725, "max": 0.48183169960975647, "mean": 4.90086677018553e-07, "std": 0.03540300950407982, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15187376737594604, "max": 0.04340476170182228, "mean": 4.305229231249541e-05, "std": 0.014882412739098072, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.31561803817749023, "max": 0.6820628046989441, "mean": 0.5529670715332031, "std": 0.04071620851755142, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20640292763710022, "max": 0.2199181616306305, "mean": 3.100156754953787e-05, "std": 0.03830336779356003, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13785934448242188, "max": 0.11272227019071579, "mean": 2.0263127225916833e-05, "std": 0.02582014910876751, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.4027767777442932, "max": 0.37112095952033997, "mean": 2.6220748623018153e-05, "std": 0.038185179233551025, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.7714638710021973, "max": 2.8691656589508057, "mean": 0.0011573480442166328, "std": 0.5169197916984558, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20294718444347382, "max": 0.1975032389163971, "mean": 2.9508448278647847e-05, "std": 0.03430049493908882, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.050956204533576965, "max": 0.04001324996352196, "mean": -0.0004197848029434681, "std": 0.013423827476799488, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.1965385526418686, "max": 0.20179617404937744, "mean": -1.230049292644253e-05, "std": 0.03180824965238571, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.1932075023651123, "max": 0.19514988362789154, "mean": -0.002968719694763422, "std": 0.06257235258817673, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.3494449555873871, "max": 1.084139108657837, "mean": 0.6672452688217163, "std": 0.055235255509614944, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22517867386341095, "max": 0.2515127956867218, "mean": 0.0003590761625673622, "std": 0.04076584428548813, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09105702489614487, "max": 0.043770160526037216, "mean": -0.030091021209955215, "std": 0.0176088884472847, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.3535248339176178, "max": 0.30410754680633545, "mean": -4.392282062326558e-05, "std": 0.03712813928723335, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16202455759048462, "max": 0.06354078650474548, "mean": -8.128902118187398e-05, "std": 0.01940615102648735, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.34876754879951477, "max": 0.7220309376716614, "mean": 0.5424379706382751, "std": 0.039069268852472305, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.2193686068058014, "max": 0.22314214706420898, "mean": -1.1116904715890996e-05, "std": 0.03923606500029564, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11840695887804031, "max": 0.1707676649093628, "mean": 0.00028346438193693757, "std": 0.025122247636318207, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24684838950634003, "max": 0.3010847866535187, "mean": -3.651722363429144e-05, "std": 0.038935575634241104, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.5055902004241943, "max": 3.715036153793335, "mean": 0.01585192233324051, "std": 0.7825286984443665, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21871182322502136, "max": 0.2376304566860199, "mean": -1.361081376671791e-05, "std": 0.03630790859460831, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04719124361872673, "max": 0.05140624940395355, "mean": 0.00048010991304181516, "std": 0.013516944833099842, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21404245495796204, "max": 0.21762129664421082, "mean": 5.64762121939566e-05, "std": 0.03361983224749565, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.2114625871181488, "max": 0.231521874666214, "mean": -0.005106819327920675, "std": 0.06188430264592171, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36219048500061035, "max": 1.1013058423995972, "mean": 0.6993670463562012, "std": 0.053603965789079666, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23459365963935852, "max": 0.2449057400226593, "mean": 0.00046347593888640404, "std": 0.04127476364374161, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09808015823364258, "max": 0.06838114559650421, "mean": -0.03143930807709694, "std": 0.01812371425330639, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.30170318484306335, "max": 0.3515554368495941, "mean": -8.153638191288337e-05, "std": 0.040280573070049286, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.15233194828033447, "max": 0.14967864751815796, "mean": 0.00025540069327689707, "std": 0.023036718368530273, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.99940425157547, "max": 1.0017729997634888, "mean": 1.0002546310424805, "std": 0.0006659556529484689, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.03126639127731323, "max": 0.03126263990998268, "mean": -1.9294351659482345e-05, "std": 0.018044061958789825, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.031232889741659164, "max": 0.03099249303340912, "mean": -0.001084338640794158, "std": 0.017953665927052498, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.031263306736946106, "max": 0.031267084181308746, "mean": 3.548895620042458e-06, "std": 0.018044468015432358, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.03115880861878395, "max": 0.031179169192910194, "mean": 0.0003339822869747877, "std": 0.018065886572003365, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.00013742789451498538, "max": 0.00015863632143009454, "mean": 2.736554449711548e-07, "std": 4.781073585036211e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9996252655982971, "max": 1.0021158456802368, "mean": 1.0004429817199707, "std": 0.0006555348518304527, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.03161333501338959, "max": 0.031580716371536255, "mean": -9.014614079205785e-06, "std": 0.018046868965029716, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.031167982146143913, "max": 0.03145414963364601, "mean": 0.0002899511018767953, "std": 0.01800374686717987, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.00018904745229519904, "max": 0.00019723534933291376, "mean": 1.0521711502065045e-08, "std": 3.849043423542753e-05, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.00014144052693154663, "max": 0.00015886471373960376, "mean": 2.7657870305120014e-07, "std": 4.894055746262893e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.38299599289894104, "max": 0.7195751070976257, "mean": 0.5807684659957886, "std": 0.03886786475777626, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23805734515190125, "max": 0.19658388197422028, "mean": 2.6588520995574072e-05, "std": 0.037470221519470215, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11865263432264328, "max": 0.16607660055160522, "mean": 0.0009905615588650107, "std": 0.027556024491786957, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.24617764353752136, "max": 0.5007338523864746, "mean": -5.0468875997466967e-05, "std": 0.03762808069586754, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.9424328804016113, "max": 3.7695746421813965, "mean": -0.003572134766727686, "std": 0.681464433670044, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22736115753650665, "max": 0.2514519989490509, "mean": -1.1535179510246962e-05, "std": 0.037439387291669846, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07172132283449173, "max": 0.08075973391532898, "mean": -0.0005193240358494222, "std": 0.0156661756336689, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.2282123565673828, "max": 0.25804591178894043, "mean": -2.8565638785948977e-05, "std": 0.03542618080973625, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20044255256652832, "max": 0.21519678831100464, "mean": -0.005535616539418697, "std": 0.06834741681814194, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.40515244007110596, "max": 1.1894633769989014, "mean": 0.7380411624908447, "std": 0.055237166583538055, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.221146821975708, "max": 0.24604949355125427, "mean": 0.0005211484967730939, "std": 0.041342463344335556, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10338832437992096, "max": 0.02417122572660446, "mean": -0.03267121687531471, "std": 0.018886109814047813, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.4494054913520813, "max": 0.4224247634410858, "mean": -0.0004330066149123013, "std": 0.046903740614652634, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.2513982057571411, "max": 0.47010472416877747, "mean": 0.003200565231963992, "std": 0.04454652965068817, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.3171570301055908, "max": 0.33336329460144043, "mean": -2.526402022340335e-05, "std": 0.021290859207510948, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.3245790898799896, "max": 0.6854778528213501, "mean": 0.5710608959197998, "std": 0.04472013935446739, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16466441750526428, "max": 0.1739748865365982, "mean": -4.8596641136100516e-05, "std": 0.03318468853831291, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.18683482706546783, "max": 0.14287494122982025, "mean": 3.6249548429623246e-05, "std": 0.029692435637116432, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.38059577345848083, "max": 0.24607740342617035, "mean": -9.968647646019235e-06, "std": 0.03276587277650833, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.65606689453125, "max": 3.290353775024414, "mean": -0.01425391435623169, "std": 0.9852582812309265, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23509447276592255, "max": 0.24749873578548431, "mean": -1.7839809515862726e-05, "std": 0.04170282557606697, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07275734841823578, "max": 0.15453355014324188, "mean": 0.0006638452177867293, "std": 0.025170044973492622, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.26656097173690796, "max": 0.24857115745544434, "mean": -1.5359542885562405e-05, "std": 0.040143173187971115, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18948662281036377, "max": 0.19466565549373627, "mean": -0.0012274996843189, "std": 0.06669430434703827, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.3292614817619324, "max": 0.9995094537734985, "mean": 0.7192604541778564, "std": 0.05234057828783989, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.2315857857465744, "max": 0.24574460089206696, "mean": 0.00018271194130647928, "std": 0.04090625420212746, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11421883851289749, "max": 0.018689358606934547, "mean": -0.04248232766985893, "std": 0.018854642286896706, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.38993996381759644, "max": 0.4073200523853302, "mean": -2.1967953216517344e-05, "std": 0.04854067787528038, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6932199001312256, "max": 0.4125868082046509, "mean": 0.0008555519161745906, "std": 0.06029324233531952, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.0002173546963604167, "max": 1.0001165866851807, "mean": 0.0004882887005805969, "std": 0.0220916960388422, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9994292855262756, "max": 1.0017839670181274, "mean": 1.000253677368164, "std": 0.000652652932330966, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.03126111254096031, "max": 0.0312650129199028, "mean": -2.1023370209150016e-05, "std": 0.0180354006588459, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.031219881027936935, "max": 0.031236713752150536, "mean": -0.0006771213375031948, "std": 0.017829909920692444, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03126417100429535, "max": 0.03126959502696991, "mean": -8.83279244590085e-06, "std": 0.018034426495432854, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.03123662993311882, "max": 0.03124932385981083, "mean": -0.0007298794225789607, "std": 0.01794484816491604, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.00017386232502758503, "max": 0.00014760847261641175, "mean": 3.442557272137492e-06, "std": 5.325600432115607e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9995221495628357, "max": 1.0020443201065063, "mean": 1.0004539489746094, "std": 0.000669351196847856, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.03147042542695999, "max": 0.03158598765730858, "mean": 5.1154065658920445e-06, "std": 0.018045036122202873, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.03117763064801693, "max": 0.031405530869960785, "mean": 0.00032266404014080763, "std": 0.0180798526853323, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.00019398781296331435, "max": 0.0002045449218712747, "mean": 1.7092556845454965e-06, "std": 3.9782767998985946e-05, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.00017830374417826533, "max": 0.0001471550203859806, "mean": 3.7268218875396997e-06, "std": 5.360128852771595e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23455342650413513, "max": 0.27251818776130676, "mean": 7.011342859186698e-06, "std": 0.018812235444784164, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.3213299512863159, "max": 0.6936513781547546, "mean": 0.5816924571990967, "std": 0.045936986804008484, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18183718621730804, "max": 0.19770397245883942, "mean": -1.1711626939359121e-05, "std": 0.033187560737133026, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16069863736629486, "max": 0.12950360774993896, "mean": -0.001068056095391512, "std": 0.03414401412010193, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.33220145106315613, "max": 0.31142792105674744, "mean": -1.0354739060858265e-05, "std": 0.03223816305398941, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.803721904754639, "max": 8.76359748840332, "mean": 0.09347197413444519, "std": 1.6197658777236938, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23378030955791473, "max": 0.24203070998191833, "mean": 4.133610491408035e-05, "std": 0.0408620685338974, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07593037933111191, "max": 0.06580135226249695, "mean": 0.0004787116195075214, "std": 0.019414879381656647, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24592415988445282, "max": 0.2340637594461441, "mean": -2.9871353035559878e-06, "std": 0.03943677991628647, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.1628992110490799, "max": 0.16083794832229614, "mean": 0.001633270876482129, "std": 0.06527844816446304, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5569714307785034, "max": 0.9439458250999451, "mean": 0.7129694819450378, "std": 0.04013355076313019, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.2286839783191681, "max": 0.2551024854183197, "mean": -4.545085539575666e-05, "std": 0.04058132320642471, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13476935029029846, "max": 0.02225329726934433, "mean": -0.04135678708553314, "std": 0.018384402617812157, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.42168760299682617, "max": 0.39237409830093384, "mean": -4.401172191137448e-06, "std": 0.04779110476374626, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6073517799377441, "max": 0.6513891220092773, "mean": 0.0015880158171057701, "std": 0.05683854594826698, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.2518226206302643, "max": 0.3207785189151764, "mean": -6.094680884416448e-06, "std": 0.019615668803453445, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.3598737120628357, "max": 0.6824128031730652, "mean": 0.5707628726959229, "std": 0.0429723858833313, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22058245539665222, "max": 0.1771002560853958, "mean": -3.480628220131621e-05, "std": 0.0343024767935276, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16346584260463715, "max": 0.23297329246997833, "mean": 0.000366326654329896, "std": 0.03285832703113556, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.2638060748577118, "max": 0.23985332250595093, "mean": -5.253252311376855e-05, "std": 0.033901575952768326, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.8552327156066895, "max": 5.091460227966309, "mean": 0.04388260096311569, "std": 1.2293205261230469, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24656128883361816, "max": 0.2505475580692291, "mean": 7.217615348054096e-05, "std": 0.043992768973112106, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.0626230239868164, "max": 0.054548561573028564, "mean": 0.0006508217193186283, "std": 0.017192188650369644, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.2865524888038635, "max": 0.2719300389289856, "mean": -4.991707464796491e-05, "std": 0.04299106448888779, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.1607704609632492, "max": 0.17038598656654358, "mean": -0.0028860813472419977, "std": 0.05928485840559006, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5196844339370728, "max": 0.9328820705413818, "mean": 0.7135865688323975, "std": 0.03841733559966087, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23817408084869385, "max": 0.2493610382080078, "mean": 0.00046480720629915595, "std": 0.04046126455068588, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14443805813789368, "max": 0.04147465527057648, "mean": -0.03969287499785423, "std": 0.020544789731502533, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5328277945518494, "max": 0.5829682350158691, "mean": 6.036185368429869e-06, "std": 0.048868391662836075, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5192180871963501, "max": 0.49342840909957886, "mean": 0.0023608917836099863, "std": 0.05344958230853081, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.2736181318759918, "max": 0.31526556611061096, "mean": 1.8652735889190808e-06, "std": 0.020052799955010414, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.36623507738113403, "max": 0.7115861177444458, "mean": 0.5932326316833496, "std": 0.045942164957523346, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21099260449409485, "max": 0.19959695637226105, "mean": 3.07829977828078e-05, "std": 0.034868910908699036, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18723583221435547, "max": 0.20388372242450714, "mean": 0.000956192088779062, "std": 0.031518690288066864, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.28975075483322144, "max": 0.3398789167404175, "mean": -4.732892557512969e-05, "std": 0.034589968621730804, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.877439260482788, "max": 3.3875346183776855, "mean": 0.014458952471613884, "std": 0.858471155166626, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.22435642778873444, "max": 0.249828040599823, "mean": -4.0124336919689085e-06, "std": 0.04223557561635971, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.05512487143278122, "max": 0.046701643615961075, "mean": -1.9162820535711944e-05, "std": 0.015846921131014824, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.29301708936691284, "max": 0.29095572233200073, "mean": -7.334054771490628e-06, "std": 0.04195055365562439, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12482845038175583, "max": 0.25941941142082214, "mean": -0.003237831173464656, "std": 0.05315971001982689, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.4561736285686493, "max": 0.8445789813995361, "mean": 0.7056531310081482, "std": 0.035228051245212555, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5114014148712158, "max": 0.348456472158432, "mean": 0.00034256701474078, "std": 0.04020610451698303, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18698255717754364, "max": 0.03949001431465149, "mean": -0.03939007595181465, "std": 0.0213507991284132, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.544358491897583, "max": 0.5564395785331726, "mean": -7.145745621528476e-05, "std": 0.05074309930205345, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5117879509925842, "max": 0.6644083857536316, "mean": 0.002445152960717678, "std": 0.04953145608305931, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.33249062299728394, "max": 0.2656247019767761, "mean": 3.6327573980088346e-06, "std": 0.019390461966395378, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.3221387565135956, "max": 0.7663495540618896, "mean": 0.651084840297699, "std": 0.04530828446149826, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.24955259263515472, "max": 0.21952223777770996, "mean": -2.4627406673971564e-06, "std": 0.0365021638572216, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.32713782787323, "max": 0.2872367203235626, "mean": -0.0006778471870347857, "std": 0.03855384141206741, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.31010347604751587, "max": 0.36993831396102905, "mean": 6.482718890765682e-05, "std": 0.036242760717868805, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.71769905090332, "max": 5.807940483093262, "mean": 0.03795948997139931, "std": 1.4132622480392456, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.2217160314321518, "max": 0.20588469505310059, "mean": -7.503203232772648e-05, "std": 0.04249139502644539, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07754088938236237, "max": 0.051487792283296585, "mean": -0.0009253682801499963, "std": 0.016408486291766167, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.3308248519897461, "max": 0.32916712760925293, "mean": -4.993749826098792e-06, "std": 0.042798057198524475, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2850324213504791, "max": 0.1117776408791542, "mean": -0.0012074043042957783, "std": 0.047010280191898346, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4863123297691345, "max": 0.8869433403015137, "mean": 0.7375507354736328, "std": 0.03823651745915413, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.36125612258911133, "max": 0.27433156967163086, "mean": 5.119972047396004e-05, "std": 0.04065272584557533, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.2477303296327591, "max": 0.04647788032889366, "mean": -0.03926857188344002, "std": 0.023257533088326454, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6263415217399597, "max": 0.5970607399940491, "mean": -6.0351769207045436e-05, "std": 0.05312627553939819, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.709812343120575, "max": 0.2658604085445404, "mean": 0.0009171634446829557, "std": 0.051236364990472794, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3433721363544464, "max": 0.30349576473236084, "mean": 1.867878154371283e-07, "std": 0.019139809533953667, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.34990525245666504, "max": 0.7829033136367798, "mean": 0.6388983726501465, "std": 0.04923005402088165, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20573130249977112, "max": 0.2069031298160553, "mean": -5.999910717946477e-05, "std": 0.037698354572057724, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.25860944390296936, "max": 0.2683144211769104, "mean": -0.00040654174517840147, "std": 0.04462500661611557, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.3541562557220459, "max": 0.3225262761116028, "mean": -7.357165486610029e-06, "std": 0.03720669820904732, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.261901378631592, "max": 4.204929351806641, "mean": -0.026422729715704918, "std": 1.0068349838256836, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.23875762522220612, "max": 0.24374397099018097, "mean": -2.557489278842695e-05, "std": 0.04321581870317459, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06234561279416084, "max": 0.05673680081963539, "mean": 0.00034723637509159744, "std": 0.01415068656206131, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.4374503195285797, "max": 0.37361523509025574, "mean": 1.4507659216178581e-05, "std": 0.044127773493528366, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09634225070476532, "max": 0.17621064186096191, "mean": -0.0006586947711184621, "std": 0.035146258771419525, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.421725332736969, "max": 1.0694254636764526, "mean": 0.7485451698303223, "std": 0.04206714406609535, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.2659734785556793, "max": 0.2969002425670624, "mean": -7.885815284680575e-05, "std": 0.04081321880221367, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18494504690170288, "max": 0.043268244713544846, "mean": -0.03681334853172302, "std": 0.025581398978829384, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4577294886112213, "max": 0.4868638217449188, "mean": 4.411918780533597e-05, "std": 0.054221056401729584, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.286346971988678, "max": 0.5518361330032349, "mean": -0.0008815097389742732, "std": 0.04783621430397034, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.29267972707748413, "max": 0.3227570652961731, "mean": 6.020641194481868e-06, "std": 0.019972950220108032, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.2912514805793762, "max": 0.7601991891860962, "mean": 0.6508588194847107, "std": 0.05212089791893959, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.2437000423669815, "max": 0.26162612438201904, "mean": -5.554972631216515e-06, "std": 0.039614368230104446, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2675025463104248, "max": 0.20013028383255005, "mean": -0.0008774266461841762, "std": 0.05176888778805733, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.27221566438674927, "max": 0.25374382734298706, "mean": 5.006398168916348e-06, "std": 0.03871097415685654, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.966026306152344, "max": 15.947824478149414, "mean": 0.03323008120059967, "std": 1.989342451095581, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.20656642317771912, "max": 0.22588562965393066, "mean": -7.24760175216943e-05, "std": 0.040559086948633194, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06937043368816376, "max": 0.06317680329084396, "mean": 0.000156470196088776, "std": 0.014745255932211876, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.46550098061561584, "max": 0.32025203108787537, "mean": 1.966371200978756e-05, "std": 0.04059458151459694, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06405901163816452, "max": 0.11548515409231186, "mean": 0.0011954698711633682, "std": 0.024709828197956085, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.37493425607681274, "max": 0.9319035410881042, "mean": 0.7510924339294434, "std": 0.0401909314095974, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.27919864654541016, "max": 0.273176908493042, "mean": -0.0001684028684394434, "std": 0.041004277765750885, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19848693907260895, "max": 0.05126062035560608, "mean": -0.032024383544921875, "std": 0.025078732520341873, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6584433317184448, "max": 0.5357221961021423, "mean": -4.880438791587949e-05, "std": 0.05285734310746193, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.19274669885635376, "max": 0.5823217630386353, "mean": -0.0005133696831762791, "std": 0.041087545454502106, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.4175601005554199, "max": 0.37188875675201416, "mean": 6.479064722952899e-06, "std": 0.021628154441714287, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.2145100235939026, "max": 0.7467755675315857, "mean": 0.6495225429534912, "std": 0.054342612624168396, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.20954373478889465, "max": 0.19555190205574036, "mean": 4.0139111661119387e-05, "std": 0.03946155682206154, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.32948848605155945, "max": 0.2595402002334595, "mean": -0.0032335962168872356, "std": 0.05627242103219032, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.2058991640806198, "max": 0.2547155022621155, "mean": 5.40805995115079e-05, "std": 0.03856402263045311, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.243993759155273, "max": 6.932845115661621, "mean": 0.048340216279029846, "std": 1.385199785232544, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.20978908240795135, "max": 0.23056426644325256, "mean": -4.742521468870109e-06, "std": 0.04131828248500824, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.04378769174218178, "max": 0.0359850712120533, "mean": -6.261238013394177e-06, "std": 0.012797025963664055, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.39764100313186646, "max": 0.34504374861717224, "mean": -5.53192148800008e-05, "std": 0.0423952080309391, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.05508939549326897, "max": 0.06280933320522308, "mean": 0.0003585501981433481, "std": 0.018675601109862328, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.3507746756076813, "max": 1.0452601909637451, "mean": 0.7896535992622375, "std": 0.04874108359217644, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.3336845338344574, "max": 0.38642778992652893, "mean": -0.00016908602265175432, "std": 0.041490186005830765, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.1574612259864807, "max": 0.05922037363052368, "mean": -0.03182276338338852, "std": 0.025103161111474037, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6963140964508057, "max": 0.46921107172966003, "mean": -8.656673162477091e-05, "std": 0.05180606618523598, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24794527888298035, "max": 0.3287939429283142, "mean": -0.00025959889171645045, "std": 0.04145469143986702, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.28705933690071106, "max": 0.3503926694393158, "mean": -2.8700230814138195e-06, "std": 0.024241898208856583, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19675415754318237, "max": 0.7791337370872498, "mean": 0.6702517867088318, "std": 0.05866968631744385, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.22908443212509155, "max": 0.2313445806503296, "mean": -2.062591738649644e-05, "std": 0.040440406650304794, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.22002485394477844, "max": 0.24098847806453705, "mean": 0.00078444869723171, "std": 0.0558483712375164, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21667493879795074, "max": 0.22645404934883118, "mean": -7.211311458377168e-05, "std": 0.03937484323978424, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.906242370605469, "max": 9.069114685058594, "mean": -0.0012534279376268387, "std": 1.8484383821487427, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2695206105709076, "max": 0.2589607834815979, "mean": 4.368612644611858e-05, "std": 0.03841120004653931, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05792244151234627, "max": 0.05800376832485199, "mean": 0.0003531992551870644, "std": 0.014716269448399544, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.2641335129737854, "max": 0.2883334755897522, "mean": -6.170988490339369e-05, "std": 0.03907797113060951, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.043938618153333664, "max": 0.037385016679763794, "mean": -9.84332655207254e-05, "std": 0.013347743079066277, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.3393842577934265, "max": 1.0925544500350952, "mean": 0.8639589548110962, "std": 0.0638754740357399, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.4231780469417572, "max": 0.41907352209091187, "mean": 0.0003135594888590276, "std": 0.04351302981376648, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21478679776191711, "max": 0.1706700474023819, "mean": -0.02944377437233925, "std": 0.03187936916947365, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5987504720687866, "max": 0.5598719120025635, "mean": -0.00014867217396385968, "std": 0.05346066504716873, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17880699038505554, "max": 0.37724727392196655, "mean": 0.0013524596579372883, "std": 0.037310197949409485, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.39442750811576843, "max": 0.3689110279083252, "mean": 3.764010398299433e-05, "std": 0.028617940843105316, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.29055094718933105, "max": 0.8275657296180725, "mean": 0.7055599689483643, "std": 0.06785259395837784, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9265406131744385, "max": 1.0269172191619873, "mean": -2.7786163627752103e-05, "std": 0.04764207825064659, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8793070316314697, "max": 0.8158283829689026, "mean": -0.0003010375367011875, "std": 0.09555298835039139, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.26992541551589966, "max": 0.24092742800712585, "mean": -2.246434632979799e-05, "std": 0.03895093873143196, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.743555068969727, "max": 22.852014541625977, "mean": -0.09188304841518402, "std": 4.070625305175781, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22777004539966583, "max": 0.2455480843782425, "mean": -2.5490313419140875e-05, "std": 0.03864210844039917, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.060185808688402176, "max": 0.04548603296279907, "mean": -0.00013778329594060779, "std": 0.014688468538224697, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.33804869651794434, "max": 0.3748103082180023, "mean": 7.576927600894123e-06, "std": 0.04082098975777626, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.046251166611909866, "max": 0.19543442130088806, "mean": 0.00027753060567192733, "std": 0.013553835451602936, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.37363529205322266, "max": 1.1304537057876587, "mean": 0.8902342319488525, "std": 0.06401188671588898, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.44750913977622986, "max": 0.5426135659217834, "mean": 2.5048013412742876e-05, "std": 0.0455806739628315, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22384138405323029, "max": 0.08764129132032394, "mean": -0.03201291710138321, "std": 0.03774724155664444, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7260749936103821, "max": 0.688654899597168, "mean": 3.5635155654745176e-05, "std": 0.051793280988931656, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.17447420954704285, "max": 0.21816052496433258, "mean": 3.443963942117989e-05, "std": 0.03176717460155487, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.33968257904052734, "max": 0.3729552924633026, "mean": 4.328345676185563e-05, "std": 0.034136127680540085, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.3178211450576782, "max": 1.2872322797775269, "mean": 0.6015591025352478, "std": 0.08348726481199265, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.28302425146102905, "max": 0.26023271679878235, "mean": -2.7253747703070985e-06, "std": 0.0359804667532444, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.23563744127750397, "max": 0.20571035146713257, "mean": 0.00023820970091037452, "std": 0.056028686463832855, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.43542271852493286, "max": 0.3249562382698059, "mean": 2.4268334527732804e-05, "std": 0.034124359488487244, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.546493053436279, "max": 7.314059257507324, "mean": -0.007369840517640114, "std": 0.6993855834007263, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.34410950541496277, "max": 0.36279547214508057, "mean": 0.0001030894200084731, "std": 0.04783707857131958, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07371430099010468, "max": 0.060424793511629105, "mean": 0.0009352926863357425, "std": 0.01493847742676735, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.2562869191169739, "max": 0.2867131233215332, "mean": 4.736550181405619e-06, "std": 0.04156505689024925, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.0553305447101593, "max": 0.06281695514917374, "mean": 0.00012849000631831586, "std": 0.007162065710872412, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.49391981959342957, "max": 1.220736026763916, "mean": 1.0135732889175415, "std": 0.11749263107776642, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0939218997955322, "max": 1.0474658012390137, "mean": -4.883138171862811e-05, "std": 0.05241798609495163, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.223901629447937, "max": 0.17314252257347107, "mean": -0.027228916063904762, "std": 0.03630804270505905, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8840344548225403, "max": 0.9224310517311096, "mean": -0.00014670705422759056, "std": 0.053297851234674454, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17102152109146118, "max": 0.3797409236431122, "mean": 0.003368864767253399, "std": 0.0398765504360199, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7776780724525452, "max": 0.7227001190185547, "mean": 1.787853761925362e-05, "std": 0.04615465924143791, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.3386647403240204, "max": 1.4281901121139526, "mean": 0.9484964609146118, "std": 0.20680245757102966, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.745869517326355, "max": 1.7045400142669678, "mean": 0.00022709640325047076, "std": 0.15870508551597595, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.1994972229003906, "max": 1.1010137796401978, "mean": -0.009549295529723167, "std": 0.20389875769615173, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4210166335105896, "max": 0.4279645085334778, "mean": 6.39720747130923e-05, "std": 0.04802015796303749, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.747936248779297, "max": 19.543052673339844, "mean": -0.24834343791007996, "std": 4.777070999145508, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.3238843083381653, "max": 0.4385298192501068, "mean": -1.1759563676605467e-05, "std": 0.04616716504096985, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.03387872874736786, "max": 0.036932073533535004, "mean": 0.0006410478381440043, "std": 0.01291597355157137, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7035592198371887, "max": 0.6685189604759216, "mean": 4.281650763005018e-05, "std": 0.05789238214492798, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07232622057199478, "max": 0.06769084185361862, "mean": -0.00013414367276709527, "std": 0.012906934134662151, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.3805098831653595, "max": 1.3928314447402954, "mean": 1.0667389631271362, "std": 0.21977593004703522, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6165266633033752, "max": 0.7183749079704285, "mean": 0.00011245780478930101, "std": 0.05802787095308304, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21882832050323486, "max": 0.2250150591135025, "mean": 0.006199384108185768, "std": 0.049713458865880966, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6297744512557983, "max": 0.8895941972732544, "mean": 1.2031738151563331e-05, "std": 0.023544643074274063, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.506857693195343, "max": 0.47375017404556274, "mean": -0.003018573159351945, "std": 0.06925369799137115, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5381409525871277, "max": 1.1801701784133911, "mean": 0.7828266620635986, "std": 0.09875727444887161, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.2670648992061615, "max": 0.21295404434204102, "mean": -0.0002240903995698318, "std": 0.054007235914468765, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23832593858242035, "max": 0.014832383021712303, "mean": -0.043932899832725525, "std": 0.03429204970598221, "sparsity": 0.0, "shape": [ 100 ] } } }