{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.429890900850296, "max": 0.2975340783596039, "mean": -0.002528043230995536, "std": 0.042567234486341476, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06285920739173889, "max": 0.10713651776313782, "mean": 0.0006724470877088606, "std": 0.03401060774922371, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4127168655395508, "max": 0.8372595310211182, "mean": -0.0001970978337340057, "std": 0.024115173146128654, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11470083892345428, "max": 0.3203592598438263, "mean": -0.0009399179834872484, "std": 0.019510779529809952, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.786435842514038, "max": 2.8647964000701904, "mean": -0.00036496162647381425, "std": 0.6155204772949219, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.2788304090499878, "max": 0.38129961490631104, "mean": 0.00042573572136461735, "std": 0.042747072875499725, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22175073623657227, "max": 0.208872988820076, "mean": -0.0044786068610847, "std": 0.040869712829589844, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4284111559391022, "max": 0.47638577222824097, "mean": 4.7679491217422765e-06, "std": 0.024512330070137978, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.32299283146858215, "max": 0.15659146010875702, "mean": -0.04666333645582199, "std": 0.051485899835824966, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.41033437848091125, "max": 0.35466355085372925, "mean": -0.00013342559395823628, "std": 0.023606186732649803, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.2283795177936554, "max": 0.2609671354293823, "mean": -0.029088540002703667, "std": 0.04924432560801506, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.25455695390701294, "max": 0.8167241811752319, "mean": 0.5252928733825684, "std": 0.08043710887432098, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.29693663120269775, "max": 0.26587796211242676, "mean": -0.00042661806219257414, "std": 0.03210223466157913, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09257981181144714, "max": 0.12483392655849457, "mean": 0.0006469582440331578, "std": 0.02571757137775421, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.29060953855514526, "max": 0.281120628118515, "mean": -7.341133459703997e-05, "std": 0.030930932611227036, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.8982954025268555, "max": 5.813107013702393, "mean": -0.009337348863482475, "std": 1.2953522205352783, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.42515280842781067, "max": 0.3437501788139343, "mean": 9.81355260591954e-05, "std": 0.029954733327031136, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028982222080230713, "max": 0.027547072619199753, "mean": -0.0003299822274129838, "std": 0.012570270337164402, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.4541594088077545, "max": 0.44774138927459717, "mean": 2.4147137082763948e-05, "std": 0.02385564148426056, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08854468911886215, "max": 0.09074825048446655, "mean": 0.0022885985672473907, "std": 0.019506951794028282, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.2667747437953949, "max": 1.0526666641235352, "mean": 0.5310115814208984, "std": 0.10401110351085663, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5744121670722961, "max": 0.6080161333084106, "mean": -0.00042898603715002537, "std": 0.038603950291872025, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.1828344166278839, "max": 0.04558030515909195, "mean": -0.02944895066320896, "std": 0.04260854050517082, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.1668061017990112, "max": 1.6334388256072998, "mean": 0.0003250878071412444, "std": 0.02769906260073185, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.1617957502603531, "max": 0.20511887967586517, "mean": -0.021121997386217117, "std": 0.027915872633457184, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22389063239097595, "max": 0.8404398560523987, "mean": 0.48753583431243896, "std": 0.07487782090902328, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.25540560483932495, "max": 0.30576375126838684, "mean": -5.286063242238015e-06, "std": 0.0334775373339653, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09518040716648102, "max": 0.11029241979122162, "mean": 7.437964086420834e-05, "std": 0.026927735656499863, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.29654812812805176, "max": 0.29580071568489075, "mean": 5.465543654281646e-05, "std": 0.03255033493041992, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.152629852294922, "max": 5.073052883148193, "mean": -0.014528467319905758, "std": 1.1556384563446045, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.34482821822166443, "max": 0.3431924283504486, "mean": 7.847632514312863e-05, "std": 0.030065450817346573, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.0359608419239521, "max": 0.03339020535349846, "mean": -0.00013936487084720284, "std": 0.013043079525232315, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.31543099880218506, "max": 0.37475085258483887, "mean": -1.99221267394023e-05, "std": 0.024063827469944954, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.1053055077791214, "max": 0.12205620855093002, "mean": -0.0019772218074649572, "std": 0.028851687908172607, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.31148025393486023, "max": 1.1159186363220215, "mean": 0.6660937070846558, "std": 0.09731028974056244, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8725345730781555, "max": 0.6275786757469177, "mean": 0.0016754826065152884, "std": 0.04743966832756996, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.27123701572418213, "max": 0.034363195300102234, "mean": -0.04658954590559006, "std": 0.040568556636571884, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9233484268188477, "max": 0.9644548296928406, "mean": 0.001022880314849317, "std": 0.040709808468818665, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14417493343353271, "max": 0.07486628741025925, "mean": -0.00909160915762186, "std": 0.025672299787402153, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.24042263627052307, "max": 0.7109521627426147, "mean": 0.4471237063407898, "std": 0.05905117839574814, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.2719106674194336, "max": 0.29774755239486694, "mean": 9.55516952672042e-06, "std": 0.035470303148031235, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11921010911464691, "max": 0.11835695803165436, "mean": 0.0007637137896381319, "std": 0.027623096480965614, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.28068092465400696, "max": 0.2797088027000427, "mean": -7.736143015790731e-05, "std": 0.03509894013404846, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.503926992416382, "max": 2.515892505645752, "mean": 0.02668764814734459, "std": 0.5862060785293579, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.22096332907676697, "max": 0.2714470624923706, "mean": 3.3548758437973447e-06, "std": 0.030734958127141, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.0337090790271759, "max": 0.03134975582361221, "mean": 0.00010986338020302355, "std": 0.012415189296007156, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.2351670116186142, "max": 0.23143303394317627, "mean": 5.6707456678850576e-05, "std": 0.025697972625494003, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13545046746730804, "max": 0.12696555256843567, "mean": -0.00549742579460144, "std": 0.03995845839381218, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.35431793332099915, "max": 1.168055772781372, "mean": 0.7104406356811523, "std": 0.10342107713222504, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6171623468399048, "max": 0.5538070201873779, "mean": 0.0011603726306930184, "std": 0.04612257331609726, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.187709778547287, "max": 0.025375014171004295, "mean": -0.03482068330049515, "std": 0.028561368584632874, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1314054727554321, "max": 0.9714292287826538, "mean": 0.0003602738433983177, "std": 0.0423499159514904, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5970888137817383, "max": 0.06280609965324402, "mean": -0.004877342376857996, "std": 0.028585655614733696, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.37514442205429077, "max": 0.9365863800048828, "mean": 0.5923141837120056, "std": 0.06635680049657822, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3909958004951477, "max": 0.36877286434173584, "mean": 7.174501661211252e-05, "std": 0.037190962582826614, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11852732300758362, "max": 0.13606122136116028, "mean": 0.0009374335058964789, "std": 0.02925141341984272, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6188546419143677, "max": 0.508575975894928, "mean": 1.5391087799798697e-05, "std": 0.03644438832998276, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.168816566467285, "max": 8.769427299499512, "mean": -0.10911353677511215, "std": 1.696131944656372, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.2764376997947693, "max": 0.2397889643907547, "mean": 5.34953796886839e-05, "std": 0.03261784091591835, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.05230281502008438, "max": 0.03951656445860863, "mean": 8.823134703561664e-05, "std": 0.01295400783419609, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23082277178764343, "max": 0.23429568111896515, "mean": -2.1679703422705643e-05, "std": 0.0293941181153059, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20415563881397247, "max": 0.1055976152420044, "mean": -0.004027670249342918, "std": 0.03260914608836174, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3398659825325012, "max": 1.008574366569519, "mean": 0.7007372975349426, "std": 0.09649426490068436, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5645706057548523, "max": 0.8320877552032471, "mean": 0.00041511692688800395, "std": 0.042306262999773026, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21099911630153656, "max": 0.03097626566886902, "mean": -0.032180383801460266, "std": 0.026477735489606857, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7537994980812073, "max": 0.7179465293884277, "mean": -7.129359801183455e-06, "std": 0.03684566915035248, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.2629236578941345, "max": 0.10548774898052216, "mean": -0.00303501239977777, "std": 0.028845027089118958, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.28467807173728943, "max": 0.6921964883804321, "mean": 0.49945610761642456, "std": 0.04626332223415375, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.279328316450119, "max": 0.23436570167541504, "mean": -0.00011136279499623924, "std": 0.03876578062772751, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15460819005966187, "max": 0.12665635347366333, "mean": -0.002232019789516926, "std": 0.03342032432556152, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.41363096237182617, "max": 0.6597210764884949, "mean": -2.0344648874015547e-05, "std": 0.03910161554813385, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.231404781341553, "max": 4.715085029602051, "mean": -0.020485566928982735, "std": 1.0069705247879028, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.2449151873588562, "max": 0.20747897028923035, "mean": 4.346559217083268e-05, "std": 0.033968474715948105, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03452696651220322, "max": 0.04465686157345772, "mean": -1.5960962628014386e-05, "std": 0.012621430680155754, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20041236281394958, "max": 0.20551952719688416, "mean": -2.960992424050346e-05, "std": 0.031025830656290054, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.19978956878185272, "max": 0.11348189413547516, "mean": -0.002926791785284877, "std": 0.034484151750802994, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.36731821298599243, "max": 1.0521864891052246, "mean": 0.6705360412597656, "std": 0.06614020466804504, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.39791443943977356, "max": 0.5023131966590881, "mean": -3.831370850093663e-05, "std": 0.04114069044589996, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.1279803365468979, "max": 0.026696184650063515, "mean": -0.030547261238098145, "std": 0.021858656778931618, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.44846877455711365, "max": 0.43229183554649353, "mean": 8.759970660321414e-05, "std": 0.034898921847343445, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.2670278549194336, "max": 0.07220447063446045, "mean": -0.0011172632221132517, "std": 0.023101668804883957, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.2872157692909241, "max": 0.6838868260383606, "mean": 0.5244971513748169, "std": 0.047394201159477234, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22190631926059723, "max": 0.22351428866386414, "mean": 1.5601781342411414e-05, "std": 0.038955170661211014, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13637839257717133, "max": 0.10904650390148163, "mean": 0.0002307215763721615, "std": 0.02925163321197033, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.37520402669906616, "max": 0.4367537200450897, "mean": -9.730283636599779e-06, "std": 0.03929009288549423, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8370232582092285, "max": 4.988061904907227, "mean": 0.0097434613853693, "std": 0.8443066477775574, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22342386841773987, "max": 0.21985094249248505, "mean": -9.139148460235447e-08, "std": 0.034415289759635925, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.04353320971131325, "max": 0.03576282411813736, "mean": -0.0002566012553870678, "std": 0.012079274281859398, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.2132977545261383, "max": 0.18884801864624023, "mean": -1.671975405770354e-05, "std": 0.031542494893074036, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.1805061399936676, "max": 0.12078476697206497, "mean": -0.0024164910428225994, "std": 0.041246652603149414, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.42202678322792053, "max": 0.9410442113876343, "mean": 0.6627340912818909, "std": 0.056649643927812576, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.3713216483592987, "max": 0.47501668334007263, "mean": -8.242137118941173e-05, "std": 0.04089945927262306, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.20792357623577118, "max": 0.027002831920981407, "mean": -0.03024197369813919, "std": 0.02132386527955532, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.33984270691871643, "max": 0.7327128648757935, "mean": 8.53092860779725e-05, "std": 0.03477407246828079, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.23982134461402893, "max": 0.050322338938713074, "mean": -0.0011965972371399403, "std": 0.020453661680221558, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.3062271773815155, "max": 0.6509252786636353, "mean": 0.5250095725059509, "std": 0.04592073708772659, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.30402758717536926, "max": 0.21729634702205658, "mean": 7.005365478107706e-05, "std": 0.03949893265962601, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.14918896555900574, "max": 0.13127601146697998, "mean": 0.00036064194864593446, "std": 0.030438335612416267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.25730884075164795, "max": 0.20225763320922852, "mean": 3.0886923923389986e-05, "std": 0.03948678448796272, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.334343671798706, "max": 2.3739240169525146, "mean": -0.02623903937637806, "std": 0.4496191143989563, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.1891229748725891, "max": 0.21049852669239044, "mean": 3.720186577993445e-05, "std": 0.03480042889714241, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03178652375936508, "max": 0.03553091734647751, "mean": -0.0002019420498982072, "std": 0.012286705896258354, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.18846523761749268, "max": 0.1703805774450302, "mean": -6.774859502911568e-05, "std": 0.032177072018384933, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13940556347370148, "max": 0.13744769990444183, "mean": -0.0025155385956168175, "std": 0.051295846700668335, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4672105014324188, "max": 0.9528681039810181, "mean": 0.6688433885574341, "std": 0.05244635045528412, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.3241286277770996, "max": 0.3096275329589844, "mean": -1.696625076874625e-06, "std": 0.04095519334077835, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.1246853619813919, "max": 0.025154586881399155, "mean": -0.03071470744907856, "std": 0.019795699045062065, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.43982067704200745, "max": 0.44470375776290894, "mean": 9.459229477215558e-05, "std": 0.03512655198574066, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22400110960006714, "max": 0.05141644552350044, "mean": -0.0011801186483353376, "std": 0.018454499542713165, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.33955061435699463, "max": 0.7357662320137024, "mean": 0.55861496925354, "std": 0.04118064045906067, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.2722431421279907, "max": 0.27798357605934143, "mean": 1.9865790818585083e-05, "std": 0.04106421023607254, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.1370246559381485, "max": 0.1397887021303177, "mean": 0.0004894830053672194, "std": 0.026618896052241325, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.4905315637588501, "max": 0.3558432161808014, "mean": 8.873307524481788e-05, "std": 0.04070229455828667, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.291904926300049, "max": 1.7411547899246216, "mean": -0.02105572447180748, "std": 0.4997440576553345, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.2170916199684143, "max": 0.19797761738300323, "mean": -4.09621607104782e-05, "std": 0.034239448606967926, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.0413656160235405, "max": 0.038547735661268234, "mean": -0.00015065219486132264, "std": 0.012881237082183361, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17731794714927673, "max": 0.18395833671092987, "mean": 4.7481313231401145e-05, "std": 0.03156236186623573, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.17941592633724213, "max": 0.18339262902736664, "mean": -0.0022199342492967844, "std": 0.05482170730829239, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4743531346321106, "max": 1.0208531618118286, "mean": 0.6452549695968628, "std": 0.04991196468472481, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.2717994153499603, "max": 0.3095380365848541, "mean": 0.00011231788084842265, "std": 0.04069165140390396, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10581093281507492, "max": 0.02687394618988037, "mean": -0.029505720362067223, "std": 0.01791212521493435, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.3386741280555725, "max": 0.3290008306503296, "mean": 5.870793393114582e-05, "std": 0.03442065790295601, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.18140022456645966, "max": 0.041891518980264664, "mean": -0.0010755020193755627, "std": 0.017211386933922768, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.32555529475212097, "max": 0.6836872696876526, "mean": 0.5111882090568542, "std": 0.03670286759734154, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.2333182841539383, "max": 0.22538095712661743, "mean": -3.595184534788132e-05, "std": 0.03918481990695, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11544553935527802, "max": 0.13142207264900208, "mean": 0.00015133176930248737, "std": 0.029199015349149704, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.3520807921886444, "max": 0.2848276197910309, "mean": 7.631589141965378e-06, "std": 0.03925250843167305, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.123228073120117, "max": 3.5356757640838623, "mean": -0.011553899385035038, "std": 0.6816845536231995, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.2112175077199936, "max": 0.20856595039367676, "mean": 3.472584648989141e-05, "std": 0.03449223190546036, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03566575422883034, "max": 0.0481027290225029, "mean": 0.0007965473923832178, "std": 0.01284803170710802, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21010246872901917, "max": 0.19273991882801056, "mean": -1.5139250990614528e-06, "std": 0.031702835112810135, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.1862909346818924, "max": 0.17676132917404175, "mean": -0.0028484249487519264, "std": 0.0586179718375206, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.47472548484802246, "max": 1.0383955240249634, "mean": 0.6513745784759521, "std": 0.049231819808483124, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.24837423861026764, "max": 0.3289947211742401, "mean": 0.00018063507741317153, "std": 0.04057996720075607, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.1235797256231308, "max": 0.024505803361535072, "mean": -0.0304916650056839, "std": 0.01757434755563736, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.4211723804473877, "max": 0.48196032643318176, "mean": 1.983910806302447e-06, "std": 0.03540581464767456, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.1518622189760208, "max": 0.04325510933995247, "mean": 3.965849464293569e-05, "std": 0.014866944402456284, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.31559497117996216, "max": 0.6791313290596008, "mean": 0.552861213684082, "std": 0.040544018149375916, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20591191947460175, "max": 0.21929602324962616, "mean": 3.05178873531986e-05, "std": 0.03830549493432045, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13762998580932617, "max": 0.11262793093919754, "mean": 2.1001505956519395e-05, "std": 0.02581183984875679, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.4020220637321472, "max": 0.3705553412437439, "mean": 2.6537300072959624e-05, "std": 0.03818797320127487, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.767557382583618, "max": 2.8661978244781494, "mean": 0.00114790303632617, "std": 0.5165696144104004, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.2021435797214508, "max": 0.19701559841632843, "mean": 2.942326318589039e-05, "std": 0.03430229425430298, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.051028795540332794, "max": 0.03999846801161766, "mean": -0.0004189596220385283, "std": 0.01342750433832407, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19608062505722046, "max": 0.20127296447753906, "mean": -1.228029668709496e-05, "std": 0.0318099670112133, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19270533323287964, "max": 0.1945824921131134, "mean": -0.0029681914020329714, "std": 0.06255524605512619, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.34965983033180237, "max": 1.0794146060943604, "mean": 0.6671044826507568, "std": 0.054688673466444016, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22492384910583496, "max": 0.2511879801750183, "mean": 0.0003592889988794923, "std": 0.04076888784766197, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.0908823236823082, "max": 0.04379650950431824, "mean": -0.030081426724791527, "std": 0.01758776418864727, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.35308927297592163, "max": 0.3038119673728943, "mean": -4.2369181755930185e-05, "std": 0.03713066130876541, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16173776984214783, "max": 0.06332767009735107, "mean": -8.476080256514251e-05, "std": 0.019383691251277924, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.34886276721954346, "max": 0.7204337120056152, "mean": 0.5423545241355896, "std": 0.03890771418809891, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.2189498394727707, "max": 0.22237031161785126, "mean": -1.0949186616926454e-05, "std": 0.03923875838518143, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11818630248308182, "max": 0.1705242395401001, "mean": 0.0002858135849237442, "std": 0.025103183463215828, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24609290063381195, "max": 0.30029821395874023, "mean": -3.647123230621219e-05, "std": 0.03893830627202988, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.5019514560699463, "max": 3.711169481277466, "mean": 0.015843264758586884, "std": 0.7819090485572815, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21829943358898163, "max": 0.23758333921432495, "mean": -1.3816705177305266e-05, "std": 0.03631007671356201, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04714132845401764, "max": 0.051366791129112244, "mean": 0.00047747697681188583, "std": 0.01350868958979845, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21323293447494507, "max": 0.2170214205980301, "mean": 5.658239751937799e-05, "std": 0.033622127026319504, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21135154366493225, "max": 0.23155677318572998, "mean": -0.005110344383865595, "std": 0.06187622249126434, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36206167936325073, "max": 1.097632884979248, "mean": 0.6992448568344116, "std": 0.05318887159228325, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23417295515537262, "max": 0.2448265254497528, "mean": 0.0004635582445189357, "std": 0.04127749800682068, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09782810509204865, "max": 0.06829667091369629, "mean": -0.031430259346961975, "std": 0.018095970153808594, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.30144715309143066, "max": 0.3511406481266022, "mean": -8.084578439593315e-05, "std": 0.04028310999274254, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.15208296477794647, "max": 0.1494162231683731, "mean": 0.0002504626754671335, "std": 0.023021113127470016, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9953764081001282, "max": 1.0005042552947998, "mean": 0.9992995858192444, "std": 0.00161725003272295, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.031269513070583344, "max": 0.031265489757061005, "mean": -1.9295868696644902e-05, "std": 0.018045131117105484, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.031223546713590622, "max": 0.0309983491897583, "mean": -0.0010843857889994979, "std": 0.017954815179109573, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.03126491606235504, "max": 0.03126438334584236, "mean": 3.5442317312117666e-06, "std": 0.018045514822006226, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.031160738319158554, "max": 0.03118434175848961, "mean": 0.00033380728564225137, "std": 0.01806693710386753, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.0004188704479020089, "max": 0.00032652742811478674, "mean": -3.7413692552945577e-06, "std": 9.604167280485854e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9950032234191895, "max": 1.000982403755188, "mean": 0.9997574090957642, "std": 0.0010362789034843445, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.03225700929760933, "max": 0.032385751605033875, "mean": -9.290525667893235e-06, "std": 0.01804504171013832, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.03201417997479439, "max": 0.03202167525887489, "mean": 0.0002501691924408078, "std": 0.018027769401669502, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.0008222123724408448, "max": 0.0007597835501655936, "mean": -1.4037771052244352e-06, "std": 0.0001422762288711965, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.0004344022599980235, "max": 0.000338842801284045, "mean": -5.246626642474439e-06, "std": 8.8350752776023e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.3827516734600067, "max": 0.7182729244232178, "mean": 0.5806694030761719, "std": 0.03871554881334305, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23742133378982544, "max": 0.19636878371238708, "mean": 2.6759680622490123e-05, "std": 0.037471406161785126, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.1184450015425682, "max": 0.16545724868774414, "mean": 0.0009931407403200865, "std": 0.027538597583770752, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.2451958954334259, "max": 0.49966853857040405, "mean": -5.0392896810080856e-05, "std": 0.0376293808221817, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.932778835296631, "max": 3.76035213470459, "mean": -0.003568061627447605, "std": 0.6805727481842041, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22708982229232788, "max": 0.2511258125305176, "mean": -1.143130793934688e-05, "std": 0.037441134452819824, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07165413349866867, "max": 0.08049532026052475, "mean": -0.0005234142299741507, "std": 0.015659447759389877, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22785918414592743, "max": 0.25734248757362366, "mean": -2.8539496270241216e-05, "std": 0.035427965223789215, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.19991812109947205, "max": 0.214930921792984, "mean": -0.005538000259548426, "std": 0.06830835342407227, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.40544652938842773, "max": 1.1868609189987183, "mean": 0.7379507422447205, "std": 0.05492096021771431, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.22111627459526062, "max": 0.2460324913263321, "mean": 0.0005210894159972668, "std": 0.04134552925825119, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10342609882354736, "max": 0.024193264544010162, "mean": -0.03266071155667305, "std": 0.018867699429392815, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.448818176984787, "max": 0.4217819571495056, "mean": -0.000431257882155478, "std": 0.04690708965063095, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.2508312165737152, "max": 0.46896737813949585, "mean": 0.00319076469168067, "std": 0.04450752213597298, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.3169752359390259, "max": 0.33314692974090576, "mean": -2.5337005354231223e-05, "std": 0.021293330937623978, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.32465165853500366, "max": 0.6822460889816284, "mean": 0.5709546208381653, "std": 0.04454142227768898, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16416817903518677, "max": 0.1733636111021042, "mean": -4.858425018028356e-05, "std": 0.03318599984049797, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.18635453283786774, "max": 0.1423773616552353, "mean": 4.034899757243693e-05, "std": 0.02966292016208172, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.37941935658454895, "max": 0.24537599086761475, "mean": -1.0037202628154773e-05, "std": 0.03276722505688667, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6522655487060547, "max": 3.2869510650634766, "mean": -0.014257419854402542, "std": 0.9848745465278625, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23496949672698975, "max": 0.24738511443138123, "mean": -1.7606289475224912e-05, "std": 0.04170484468340874, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07273464649915695, "max": 0.15422259271144867, "mean": 0.0006638166960328817, "std": 0.025166962295770645, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.2664797306060791, "max": 0.248508021235466, "mean": -1.5497178537771106e-05, "std": 0.04014508053660393, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18958289921283722, "max": 0.19478872418403625, "mean": -0.0012272386811673641, "std": 0.06668190658092499, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32911282777786255, "max": 0.9983987808227539, "mean": 0.7191941142082214, "std": 0.0522039495408535, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23135632276535034, "max": 0.24583274126052856, "mean": 0.00018275347247254103, "std": 0.04090878367424011, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11377062648534775, "max": 0.018522411584854126, "mean": -0.04246858134865761, "std": 0.018818210810422897, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.3897111713886261, "max": 0.40687721967697144, "mean": -2.178383874706924e-05, "std": 0.04854356870055199, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6922244429588318, "max": 0.4119531214237213, "mean": 0.0008513483917340636, "std": 0.060246195644140244, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.0007574164774268866, "max": 1.0006382465362549, "mean": 0.0004883571527898312, "std": 0.022093627601861954, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.995638906955719, "max": 1.000357985496521, "mean": 0.9993537068367004, "std": 0.001561639248393476, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.03126733377575874, "max": 0.031276635825634, "mean": -2.102728103636764e-05, "std": 0.01803644187748432, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.03121519461274147, "max": 0.031229794025421143, "mean": -0.000677098985761404, "std": 0.017830997705459595, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03127024322748184, "max": 0.03126488998532295, "mean": -8.836910637910478e-06, "std": 0.018035493791103363, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.031232407316565514, "max": 0.031246833503246307, "mean": -0.0007298535201698542, "std": 0.0179455429315567, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.00021961150923743844, "max": 0.00025036477018147707, "mean": -8.001849209904321e-07, "std": 8.148775668814778e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.995234489440918, "max": 1.0012273788452148, "mean": 0.9999035596847534, "std": 0.001056881621479988, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.03210779279470444, "max": 0.03193911164999008, "mean": 5.988833436276764e-06, "std": 0.018047882243990898, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.031279150396585464, "max": 0.031749434769153595, "mean": 0.00044275011168792844, "std": 0.018095213919878006, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.0007249970221891999, "max": 0.0007807987276464701, "mean": -3.5197314218748943e-07, "std": 0.00014107293100096285, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.00022946292301639915, "max": 0.00021843933791387826, "mean": -1.2389690482450533e-06, "std": 7.586943684145808e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23457658290863037, "max": 0.2724316418170929, "mean": 7.120183454389917e-06, "std": 0.01881435327231884, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.32128995656967163, "max": 0.692602813243866, "mean": 0.5816522836685181, "std": 0.04586285352706909, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18137724697589874, "max": 0.19706015288829803, "mean": -1.1772945072152652e-05, "std": 0.03318871185183525, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.1606057584285736, "max": 0.12942680716514587, "mean": -0.0010653780773282051, "std": 0.03413666784763336, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.3314096927642822, "max": 0.3108590841293335, "mean": -1.029382929118583e-05, "std": 0.03223954886198044, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.800930500030518, "max": 8.760626792907715, "mean": 0.09345310181379318, "std": 1.6193360090255737, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23322908580303192, "max": 0.24158968031406403, "mean": 4.1257830162066966e-05, "std": 0.040864504873752594, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07589098066091537, "max": 0.06572694331407547, "mean": 0.00047726332559250295, "std": 0.019406452775001526, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24502328038215637, "max": 0.23352351784706116, "mean": -2.668632077984512e-06, "std": 0.039439182728528976, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16295023262500763, "max": 0.16059955954551697, "mean": 0.0016356806736439466, "std": 0.06525918841362, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.556554913520813, "max": 0.9408271312713623, "mean": 0.7128406167030334, "std": 0.039769869297742844, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22860386967658997, "max": 0.25511136651039124, "mean": -4.539915607892908e-05, "std": 0.04058451950550079, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13515348732471466, "max": 0.02234305441379547, "mean": -0.04134881868958473, "std": 0.01836741715669632, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.4212746024131775, "max": 0.39222264289855957, "mean": -4.234017978888005e-06, "std": 0.047794174402952194, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6065890789031982, "max": 0.6503084897994995, "mean": 0.0015799436951056123, "std": 0.056790802627801895, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.25144556164741516, "max": 0.3204054832458496, "mean": -5.961472197668627e-06, "std": 0.019617972895503044, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.36011484265327454, "max": 0.6801881790161133, "mean": 0.5707067251205444, "std": 0.04279083386063576, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22022095322608948, "max": 0.17668727040290833, "mean": -3.4830391086870804e-05, "std": 0.034304577857255936, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16363094747066498, "max": 0.2328542321920395, "mean": 0.0003622955409809947, "std": 0.03286634013056755, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.26301464438438416, "max": 0.23922747373580933, "mean": -5.2115137805230916e-05, "std": 0.03390384837985039, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.843376159667969, "max": 5.079013824462891, "mean": 0.043839357793331146, "std": 1.2277964353561401, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24616090953350067, "max": 0.24996501207351685, "mean": 7.23035482224077e-05, "std": 0.04399650916457176, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.06268942356109619, "max": 0.054509397596120834, "mean": 0.0006487497594207525, "std": 0.017188087105751038, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.2859387695789337, "max": 0.27142879366874695, "mean": -4.999006341677159e-05, "std": 0.04299502447247505, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16028116643428802, "max": 0.1701924204826355, "mean": -0.00288166431710124, "std": 0.05925562232732773, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5195892453193665, "max": 0.9285021424293518, "mean": 0.71345454454422, "std": 0.03798013553023338, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23824341595172882, "max": 0.24957609176635742, "mean": 0.0004649516486097127, "std": 0.040465425699949265, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.1440071016550064, "max": 0.041583579033613205, "mean": -0.03968297317624092, "std": 0.020529083907604218, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5325517058372498, "max": 0.5824555158615112, "mean": 5.4546726460102946e-06, "std": 0.04887215048074722, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5182770490646362, "max": 0.4927639365196228, "mean": 0.002359384670853615, "std": 0.05340024083852768, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.27337488532066345, "max": 0.3148258626461029, "mean": 1.8105949948221678e-06, "std": 0.020055659115314484, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.36668556928634644, "max": 0.7091761827468872, "mean": 0.5931493639945984, "std": 0.04574775695800781, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.2106715887784958, "max": 0.1992705911397934, "mean": 3.0829094612272456e-05, "std": 0.03486945852637291, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18688145279884338, "max": 0.2038576900959015, "mean": 0.0009574516443535686, "std": 0.03150374814867973, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.2888670563697815, "max": 0.33895108103752136, "mean": -4.766129131894559e-05, "std": 0.03459092602133751, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.8705790042877197, "max": 3.3815643787384033, "mean": 0.014464044943451881, "std": 0.8578398823738098, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.2241480052471161, "max": 0.24975183606147766, "mean": -4.014226306026103e-06, "std": 0.04223877936601639, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.0549103245139122, "max": 0.04695763811469078, "mean": -1.4065793948248029e-05, "std": 0.015847966074943542, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2923896610736847, "max": 0.2908935844898224, "mean": -7.1035901783034205e-06, "std": 0.04195380210876465, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12478198111057281, "max": 0.2591152787208557, "mean": -0.003229282796382904, "std": 0.053138162940740585, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.45623326301574707, "max": 0.8426384925842285, "mean": 0.7055743336677551, "std": 0.034994304180145264, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5110324621200562, "max": 0.3488520383834839, "mean": 0.00034251363831572235, "std": 0.04021010175347328, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18705947697162628, "max": 0.03953401744365692, "mean": -0.03937750309705734, "std": 0.02131262607872486, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5440298318862915, "max": 0.5563207864761353, "mean": -7.213428762042895e-05, "std": 0.050746381282806396, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5106754302978516, "max": 0.662798285484314, "mean": 0.002447732724249363, "std": 0.04947002977132797, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.33220773935317993, "max": 0.2652227580547333, "mean": 3.882123110088287e-06, "std": 0.01939382590353489, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.32238951325416565, "max": 0.764789879322052, "mean": 0.6509858965873718, "std": 0.0451430045068264, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.24893951416015625, "max": 0.219136044383049, "mean": -2.739794126682682e-06, "std": 0.036503732204437256, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.32658451795578003, "max": 0.28703945875167847, "mean": -0.0006784016732126474, "std": 0.038509681820869446, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.3096793591976166, "max": 0.3693031072616577, "mean": 6.47535634925589e-05, "std": 0.036244187504053116, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.706123352050781, "max": 5.793623447418213, "mean": 0.03790595382452011, "std": 1.4113690853118896, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.22146277129650116, "max": 0.20545163750648499, "mean": -7.498646300518885e-05, "std": 0.042494479566812515, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07756227254867554, "max": 0.05129515379667282, "mean": -0.0009279022924602032, "std": 0.016406826674938202, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.33102676272392273, "max": 0.3289909064769745, "mean": -5.028288796893321e-06, "std": 0.042801517993211746, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.28435027599334717, "max": 0.111260324716568, "mean": -0.001205979730002582, "std": 0.04699746519327164, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4868572950363159, "max": 0.8827712535858154, "mean": 0.7374467849731445, "std": 0.03787440061569214, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3608104884624481, "max": 0.2736315429210663, "mean": 5.1337454351596534e-05, "std": 0.04065750911831856, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.24695155024528503, "max": 0.04662873595952988, "mean": -0.039258524775505066, "std": 0.023203320801258087, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6257067322731018, "max": 0.5967472195625305, "mean": -6.336745718726888e-05, "std": 0.05312981456518173, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7091463208198547, "max": 0.26562684774398804, "mean": 0.0009212760487571359, "std": 0.051211755722761154, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.34325337409973145, "max": 0.30324116349220276, "mean": 1.430171323590912e-07, "std": 0.019143851473927498, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.34994906187057495, "max": 0.7801994681358337, "mean": 0.6388012170791626, "std": 0.04902452602982521, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20566730201244354, "max": 0.2065981775522232, "mean": -6.0025900893379e-05, "std": 0.03770073875784874, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.25845062732696533, "max": 0.268261194229126, "mean": -0.00040606403490528464, "std": 0.04461587592959404, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.3532998263835907, "max": 0.3217300474643707, "mean": -7.498586455767509e-06, "std": 0.037208717316389084, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.249058246612549, "max": 4.194725036621094, "mean": -0.02638459950685501, "std": 1.005539894104004, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.2386980652809143, "max": 0.24372872710227966, "mean": -2.586210030131042e-05, "std": 0.04321879521012306, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.062367696315050125, "max": 0.05657341331243515, "mean": 0.0003560591721907258, "std": 0.01414806954562664, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.43753641843795776, "max": 0.37365373969078064, "mean": 1.460490602767095e-05, "std": 0.044131483882665634, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09578664600849152, "max": 0.17602641880512238, "mean": -0.0006584142101928592, "std": 0.0351262167096138, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.42189696431159973, "max": 1.0643466711044312, "mean": 0.7485300302505493, "std": 0.04179271310567856, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.265593945980072, "max": 0.29676973819732666, "mean": -7.866104715503752e-05, "std": 0.04081883281469345, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18380795419216156, "max": 0.04289933666586876, "mean": -0.036790553480386734, "std": 0.02553965151309967, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4579704999923706, "max": 0.4863548278808594, "mean": 4.272036676411517e-05, "std": 0.05422580987215042, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.2855266034603119, "max": 0.5506117939949036, "mean": -0.0008784987148828804, "std": 0.047787394374608994, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.2924049496650696, "max": 0.32256847620010376, "mean": 5.68283303437056e-06, "std": 0.01997658796608448, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.29146960377693176, "max": 0.7568098902702332, "mean": 0.6507450938224792, "std": 0.05195383355021477, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.2434154599905014, "max": 0.26121068000793457, "mean": -5.642844371323008e-06, "std": 0.039615679532289505, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2669755518436432, "max": 0.19996695220470428, "mean": -0.0008783398079685867, "std": 0.051739659160375595, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.27164191007614136, "max": 0.25313133001327515, "mean": 5.889336534892209e-06, "std": 0.03871198371052742, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.952698707580566, "max": 15.9312744140625, "mean": 0.03322799503803253, "std": 1.9877989292144775, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.20647653937339783, "max": 0.2256641685962677, "mean": -7.246333552757278e-05, "std": 0.040561433881521225, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06935624778270721, "max": 0.06306472420692444, "mean": 0.00016317634435836226, "std": 0.014748629182577133, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.4654642939567566, "max": 0.31973931193351746, "mean": 1.960094778041821e-05, "std": 0.04059756174683571, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06414826959371567, "max": 0.11558651179075241, "mean": 0.0012002706062048674, "std": 0.024707410484552383, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.3750652074813843, "max": 0.9275709390640259, "mean": 0.7511184215545654, "std": 0.03999503329396248, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.2787969410419464, "max": 0.2728310525417328, "mean": -0.00016816731658764184, "std": 0.0410102978348732, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19773395359516144, "max": 0.05162842571735382, "mean": -0.03201429173350334, "std": 0.025033777579665184, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6583139300346375, "max": 0.5351659655570984, "mean": -5.119909474160522e-05, "std": 0.05286192148923874, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.1919519156217575, "max": 0.5808603763580322, "mean": -0.0005111135542392731, "std": 0.04104519635438919, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.4174348711967468, "max": 0.3718706965446472, "mean": 6.703614417347126e-06, "std": 0.021633952856063843, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21479681134223938, "max": 0.7478918433189392, "mean": 0.6493618488311768, "std": 0.054201409220695496, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.20870910584926605, "max": 0.1947445124387741, "mean": 4.020327469334006e-05, "std": 0.03945876285433769, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.32888734340667725, "max": 0.25908946990966797, "mean": -0.003229741007089615, "std": 0.05623537674546242, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.2056186944246292, "max": 0.2540878653526306, "mean": 5.3863834182266146e-05, "std": 0.03856115788221359, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.228662967681885, "max": 6.915782928466797, "mean": 0.04823269695043564, "std": 1.3832472562789917, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.20932167768478394, "max": 0.22993139922618866, "mean": -4.4988796616962645e-06, "std": 0.04132062569260597, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.04368359223008156, "max": 0.035936541855335236, "mean": -1.0926916729658842e-05, "std": 0.012798542156815529, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.3968988060951233, "max": 0.34478238224983215, "mean": -5.5305037676589563e-05, "std": 0.04239818826317787, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.05508316308259964, "max": 0.06261169910430908, "mean": 0.0003532343253027648, "std": 0.018669025972485542, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.3511422276496887, "max": 1.0404622554779053, "mean": 0.7897100448608398, "std": 0.048514608293771744, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.3338225483894348, "max": 0.38620951771736145, "mean": -0.00016899823094718158, "std": 0.04149709641933441, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15740133821964264, "max": 0.058948904275894165, "mean": -0.0318116769194603, "std": 0.025069545954465866, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6960089206695557, "max": 0.46894899010658264, "mean": -8.237230940721929e-05, "std": 0.05181308463215828, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24741840362548828, "max": 0.3286932408809662, "mean": -0.00026996995438821614, "std": 0.04144337400794029, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.28653645515441895, "max": 0.35008078813552856, "mean": -2.9175917006796226e-06, "std": 0.024247299879789352, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19693201780319214, "max": 0.7785046696662903, "mean": 0.670115053653717, "std": 0.058539655059576035, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.228579580783844, "max": 0.23089821636676788, "mean": -2.1206951714702882e-05, "std": 0.040444690734148026, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.22008375823497772, "max": 0.24102427065372467, "mean": 0.0007767346687614918, "std": 0.055866289883852005, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21646404266357422, "max": 0.2256259322166443, "mean": -7.261607970576733e-05, "std": 0.03937656059861183, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.884381294250488, "max": 9.046843528747559, "mean": -0.0012065814808011055, "std": 1.8454406261444092, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2685357332229614, "max": 0.2581280469894409, "mean": 4.3568383262027055e-05, "std": 0.03841337561607361, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.057995330542325974, "max": 0.05802358686923981, "mean": 0.00035532776382751763, "std": 0.014707793481647968, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.2625483274459839, "max": 0.2874881625175476, "mean": -6.166227103676647e-05, "std": 0.039080966264009476, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.0441780760884285, "max": 0.03726305067539215, "mean": -0.00010403832129668444, "std": 0.013333701528608799, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.3392186760902405, "max": 1.088745355606079, "mean": 0.8640130758285522, "std": 0.06376548111438751, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.42300641536712646, "max": 0.41883379220962524, "mean": 0.00031391510856337845, "std": 0.04352227598428726, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21468287706375122, "max": 0.1707322746515274, "mean": -0.02942698448896408, "std": 0.03183940798044205, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5976030826568604, "max": 0.559415340423584, "mean": -0.00014561890566255897, "std": 0.05347010865807533, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17889779806137085, "max": 0.3772771656513214, "mean": 0.001343069365248084, "std": 0.03730209544301033, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.39426180720329285, "max": 0.36868590116500854, "mean": 3.8257519918261096e-05, "std": 0.0286222156137228, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2908227741718292, "max": 0.8264791369438171, "mean": 0.7054398655891418, "std": 0.0677274614572525, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.926691472530365, "max": 1.0270028114318848, "mean": -2.8848577130702324e-05, "std": 0.04765753820538521, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.878186821937561, "max": 0.8147233724594116, "mean": -0.0002844139817170799, "std": 0.09543365985155106, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.27030670642852783, "max": 0.24055372178554535, "mean": -2.2271982743404806e-05, "std": 0.038951653987169266, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.68506622314453, "max": 22.795772552490234, "mean": -0.09177836775779724, "std": 4.062017440795898, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22721800208091736, "max": 0.24524104595184326, "mean": -2.5419916710234247e-05, "std": 0.038644734770059586, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.059977784752845764, "max": 0.04509967938065529, "mean": -0.00013076608593109995, "std": 0.01468411460518837, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.3371436893939972, "max": 0.3742288053035736, "mean": 7.546843335148878e-06, "std": 0.04082665964961052, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.04609467089176178, "max": 0.19514600932598114, "mean": 0.00027449309709481895, "std": 0.013541752472519875, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.37357744574546814, "max": 1.125421166419983, "mean": 0.8902103900909424, "std": 0.06386467814445496, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.447258859872818, "max": 0.5423630475997925, "mean": 2.548232805565931e-05, "std": 0.045591775327920914, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22343683242797852, "max": 0.08690512925386429, "mean": -0.03200257197022438, "std": 0.03771420195698738, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7260164022445679, "max": 0.6879873275756836, "mean": 3.631926665548235e-05, "std": 0.05180613696575165, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.17385190725326538, "max": 0.21751302480697632, "mean": 3.567736712284386e-05, "std": 0.03174319490790367, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.3385016918182373, "max": 0.37161216139793396, "mean": 4.3165768147446215e-05, "std": 0.0341353677213192, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.31760096549987793, "max": 1.2830872535705566, "mean": 0.6014329195022583, "std": 0.08317635953426361, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.28283271193504333, "max": 0.26012101769447327, "mean": -2.921331542893313e-06, "std": 0.035985857248306274, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.23526135087013245, "max": 0.20543411374092102, "mean": 0.00024757458595559, "std": 0.05601666867733002, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.4347652792930603, "max": 0.32389530539512634, "mean": 2.395988121861592e-05, "std": 0.03412287309765816, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.541207790374756, "max": 7.30653190612793, "mean": -0.00736255943775177, "std": 0.6987443566322327, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.3433501720428467, "max": 0.361217200756073, "mean": 0.0001032147411024198, "std": 0.04784071072936058, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07378581166267395, "max": 0.060352873057127, "mean": 0.0009383288561366498, "std": 0.01492984127253294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.2561882436275482, "max": 0.28616371750831604, "mean": 5.244153726380318e-06, "std": 0.04157177358865738, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05515698716044426, "max": 0.062612384557724, "mean": 0.00012199293996673077, "std": 0.007132581900805235, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.49441853165626526, "max": 1.2188090085983276, "mean": 1.013464331626892, "std": 0.11732637882232666, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0939745903015137, "max": 1.0476189851760864, "mean": -4.830169564229436e-05, "std": 0.05242462456226349, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22291964292526245, "max": 0.17299318313598633, "mean": -0.027209078893065453, "std": 0.03627277910709381, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8831630349159241, "max": 0.9219300150871277, "mean": -0.00014596671098843217, "std": 0.05330995097756386, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17071670293807983, "max": 0.3785896301269531, "mean": 0.0033629729878157377, "std": 0.03981942683458328, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7773581147193909, "max": 0.721552848815918, "mean": 1.7906297216541134e-05, "std": 0.0461493544280529, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.33866649866104126, "max": 1.4223623275756836, "mean": 0.9482957124710083, "std": 0.20650897920131683, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7458724975585938, "max": 1.7043527364730835, "mean": 0.0002272979763802141, "std": 0.1587107926607132, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.1964622735977173, "max": 1.0986626148223877, "mean": -0.009530629962682724, "std": 0.20347940921783447, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.420305460691452, "max": 0.42840367555618286, "mean": 6.361818668665364e-05, "std": 0.04802125319838524, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.700023651123047, "max": 19.49565315246582, "mean": -0.24793246388435364, "std": 4.7666015625, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.3232991695404053, "max": 0.4378996789455414, "mean": -1.1727358469215687e-05, "std": 0.04616958647966385, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.033631421625614166, "max": 0.03664267063140869, "mean": 0.0006392866489477456, "std": 0.012905232608318329, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7025362849235535, "max": 0.6701837778091431, "mean": 4.212657222524285e-05, "std": 0.057898350059986115, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07234562933444977, "max": 0.06781232357025146, "mean": -0.00013423134805634618, "std": 0.012877929955720901, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.3804936408996582, "max": 1.3917937278747559, "mean": 1.0666232109069824, "std": 0.21957866847515106, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6164886951446533, "max": 0.7186930179595947, "mean": 0.00011397639173083007, "std": 0.05803186818957329, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21819192171096802, "max": 0.22446297109127045, "mean": 0.006146667059510946, "std": 0.04965293034911156, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6298643350601196, "max": 0.8897628784179688, "mean": 1.269071981369052e-05, "std": 0.023556767031550407, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.50624680519104, "max": 0.4730708599090576, "mean": -0.0030176215805113316, "std": 0.06914978474378586, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5384271144866943, "max": 1.1763767004013062, "mean": 0.7825473546981812, "std": 0.09825034439563751, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.26688942313194275, "max": 0.21287617087364197, "mean": -0.00022272299975156784, "std": 0.0540103055536747, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23796546459197998, "max": 0.014876163564622402, "mean": -0.04389083757996559, "std": 0.03420323133468628, "sparsity": 0.0, "shape": [ 100 ] } } }