{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.43045347929000854, "max": 0.2989708483219147, "mean": -0.002559528686106205, "std": 0.042551685124635696, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06319475919008255, "max": 0.10763752460479736, "mean": 0.0005878363735973835, "std": 0.0341116227209568, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4125802516937256, "max": 0.8362879157066345, "mean": -0.00021037086844444275, "std": 0.024107296019792557, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11548256129026413, "max": 0.3214675784111023, "mean": -0.0009404525626450777, "std": 0.01957694999873638, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.7917673587799072, "max": 2.87048602104187, "mean": -0.000364800012903288, "std": 0.6153724193572998, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.2789378762245178, "max": 0.38190650939941406, "mean": 0.00042029444011859596, "std": 0.04275033250451088, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22229844331741333, "max": 0.20966938138008118, "mean": -0.004494193941354752, "std": 0.04090972617268562, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.42792314291000366, "max": 0.4753040671348572, "mean": 2.5448428004892776e-06, "std": 0.02450907975435257, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.3254566490650177, "max": 0.15697774291038513, "mean": -0.046701110899448395, "std": 0.05157899484038353, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.41040217876434326, "max": 0.3545200824737549, "mean": -0.00012632929428946227, "std": 0.023601176217198372, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.22976312041282654, "max": 0.26262250542640686, "mean": -0.029148582369089127, "std": 0.049347616732120514, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.25461843609809875, "max": 0.8200721740722656, "mean": 0.5254405736923218, "std": 0.08080819994211197, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.2969217598438263, "max": 0.2653011679649353, "mean": -0.00042407598812133074, "std": 0.03210418298840523, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09274514019489288, "max": 0.12481185793876648, "mean": 0.0006486732745543122, "std": 0.025742683559656143, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.29045799374580383, "max": 0.28142276406288147, "mean": -7.696857210248709e-05, "std": 0.03093627467751503, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.8994598388671875, "max": 5.814236164093018, "mean": -0.009332070127129555, "std": 1.2954570055007935, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.4248283803462982, "max": 0.3437764346599579, "mean": 9.760602551978081e-05, "std": 0.029952971264719963, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028973544016480446, "max": 0.027646001428365707, "mean": -0.000311461859382689, "std": 0.01257230993360281, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.45393574237823486, "max": 0.4486967921257019, "mean": 2.2734935555490665e-05, "std": 0.023855067789554596, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08869241178035736, "max": 0.09115342795848846, "mean": 0.0022729213815182447, "std": 0.019511748105287552, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.26661837100982666, "max": 1.0562738180160522, "mean": 0.5311292409896851, "std": 0.10441415756940842, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5744583010673523, "max": 0.6083983182907104, "mean": -0.0004310230724513531, "std": 0.03859498351812363, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18180975317955017, "max": 0.04576439782977104, "mean": -0.029441392049193382, "std": 0.0425901859998703, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.1666754484176636, "max": 1.6346206665039062, "mean": 0.00031845836201682687, "std": 0.027693821117281914, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16254130005836487, "max": 0.20572608709335327, "mean": -0.021116478368639946, "std": 0.02794043906033039, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22449950873851776, "max": 0.8436615467071533, "mean": 0.48752841353416443, "std": 0.07519911974668503, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.25530511140823364, "max": 0.30584144592285156, "mean": -9.390279956278391e-06, "std": 0.03347048908472061, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09549093246459961, "max": 0.1104247123003006, "mean": 5.642877658829093e-05, "std": 0.02698560617864132, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.29746732115745544, "max": 0.29597631096839905, "mean": 5.020098251407035e-05, "std": 0.03253835067152977, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.164289474487305, "max": 5.084513187408447, "mean": -0.014594512060284615, "std": 1.157379150390625, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.34489384293556213, "max": 0.34349551796913147, "mean": 7.88411489338614e-05, "std": 0.030058156698942184, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.03615685552358627, "max": 0.033247072249650955, "mean": -0.0001437932369299233, "std": 0.0130230151116848, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.31528598070144653, "max": 0.3752017617225647, "mean": -2.1658630430465564e-05, "std": 0.02405543439090252, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10527704656124115, "max": 0.12188438326120377, "mean": -0.001954131992533803, "std": 0.028842832893133163, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.3118007183074951, "max": 1.1209547519683838, "mean": 0.6662399172782898, "std": 0.09774922579526901, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8724845051765442, "max": 0.6275652050971985, "mean": 0.0016756996046751738, "std": 0.04743832349777222, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.2710001766681671, "max": 0.034087300300598145, "mean": -0.04660267010331154, "std": 0.040595393627882004, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9202765226364136, "max": 0.964392364025116, "mean": 0.0010208573658019304, "std": 0.040701836347579956, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14455123245716095, "max": 0.07482488453388214, "mean": -0.009084297344088554, "std": 0.025694943964481354, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.2397618293762207, "max": 0.7124034762382507, "mean": 0.4472024440765381, "std": 0.0593235045671463, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.2730177044868469, "max": 0.29747670888900757, "mean": 8.653647455503233e-06, "std": 0.03547436371445656, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11902837455272675, "max": 0.1184682548046112, "mean": 0.0007503863889724016, "std": 0.027607794851064682, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.28101518750190735, "max": 0.27942103147506714, "mean": -7.649646431673318e-05, "std": 0.03510240092873573, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.509594440460205, "max": 2.5215904712677, "mean": 0.026745371520519257, "std": 0.5867790579795837, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.2210964858531952, "max": 0.2716039717197418, "mean": 2.442306140437722e-06, "std": 0.030731501057744026, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.03315361589193344, "max": 0.031151030212640762, "mean": 0.00011695168359437957, "std": 0.012393992394208908, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23539169132709503, "max": 0.23184844851493835, "mean": 5.725533628719859e-05, "std": 0.025697585195302963, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13603144884109497, "max": 0.12801550328731537, "mean": -0.005497873295098543, "std": 0.039962731301784515, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.35472768545150757, "max": 1.1723560094833374, "mean": 0.7105388641357422, "std": 0.10377441346645355, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6173874735832214, "max": 0.5556294322013855, "mean": 0.0011603377060964704, "std": 0.04611397534608841, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.18947651982307434, "max": 0.024928653612732887, "mean": -0.03484659641981125, "std": 0.028622934594750404, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1309525966644287, "max": 0.9703920483589172, "mean": 0.0003591428976505995, "std": 0.04234250634908676, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.59785395860672, "max": 0.0627356544137001, "mean": -0.004881600849330425, "std": 0.028621168807148933, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.3753371834754944, "max": 0.9404803514480591, "mean": 0.5924646854400635, "std": 0.06694936007261276, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3917739689350128, "max": 0.36935487389564514, "mean": 7.001077028689906e-05, "std": 0.03718659654259682, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11900685727596283, "max": 0.1365460306406021, "mean": 0.0009158444590866566, "std": 0.029187751933932304, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6190850138664246, "max": 0.5087974667549133, "mean": 1.5220098248391878e-05, "std": 0.036439333111047745, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.186792373657227, "max": 8.788902282714844, "mean": -0.10927547514438629, "std": 1.698854923248291, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.2765258252620697, "max": 0.23972086608409882, "mean": 5.2279683586675674e-05, "std": 0.03261309862136841, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.051504429429769516, "max": 0.0394677110016346, "mean": 9.376452362630516e-05, "std": 0.012969755567610264, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23076090216636658, "max": 0.23486877977848053, "mean": -2.2034959329175763e-05, "std": 0.02938973717391491, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20423753559589386, "max": 0.10524258017539978, "mean": -0.004020648077130318, "std": 0.03263989835977554, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3396383821964264, "max": 1.0124459266662598, "mean": 0.7007039785385132, "std": 0.09675922244787216, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5647669434547424, "max": 0.8336009979248047, "mean": 0.00041507231071591377, "std": 0.042294517159461975, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21213513612747192, "max": 0.029952630400657654, "mean": -0.03217371925711632, "std": 0.026498902589082718, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7548895478248596, "max": 0.7191285490989685, "mean": -1.5825342416064814e-05, "std": 0.03683512657880783, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.26342180371284485, "max": 0.106303870677948, "mean": -0.0030142769683152437, "std": 0.028873054310679436, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.2839457094669342, "max": 0.695040225982666, "mean": 0.4993869960308075, "std": 0.04653431475162506, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.27824723720550537, "max": 0.23382486402988434, "mean": -0.00011091126361861825, "std": 0.03875747323036194, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.1535824090242386, "max": 0.12643294036388397, "mean": -0.0022276602685451508, "std": 0.03332621976733208, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.4143897294998169, "max": 0.6594640016555786, "mean": -1.8512728274799883e-05, "std": 0.03909672051668167, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.237905979156494, "max": 4.722469329833984, "mean": -0.020456835627555847, "std": 1.0076903104782104, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.24504739046096802, "max": 0.2075919508934021, "mean": 4.4300948502495885e-05, "std": 0.033962640911340714, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03446675091981888, "max": 0.04485952481627464, "mean": -2.2283929865807295e-05, "std": 0.01263953372836113, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20111070573329926, "max": 0.2064419686794281, "mean": -2.9351647754083388e-05, "std": 0.031020889058709145, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.1998225450515747, "max": 0.11318594217300415, "mean": -0.002895027631893754, "std": 0.034535519778728485, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.3667519986629486, "max": 1.0576496124267578, "mean": 0.6704938411712646, "std": 0.06640732288360596, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.3984535038471222, "max": 0.5021195411682129, "mean": -3.873988316627219e-05, "std": 0.04113014414906502, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12865276634693146, "max": 0.02695303224027157, "mean": -0.0305329579859972, "std": 0.021882230415940285, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.44948050379753113, "max": 0.43325671553611755, "mean": 7.534700125688687e-05, "std": 0.03489053621888161, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.26749807596206665, "max": 0.07307979464530945, "mean": -0.0010903773363679647, "std": 0.023135719820857048, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.28755003213882446, "max": 0.6852815747261047, "mean": 0.5245311260223389, "std": 0.047535065561532974, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22266581654548645, "max": 0.22331343591213226, "mean": 1.5911335140117444e-05, "std": 0.038949206471443176, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13634715974330902, "max": 0.10933983325958252, "mean": 0.00024775456404313445, "std": 0.02920820191502571, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.37493303418159485, "max": 0.43759024143218994, "mean": -9.405484888702631e-06, "std": 0.03928741440176964, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8458516597747803, "max": 4.99931526184082, "mean": 0.0097417663782835, "std": 0.8452187180519104, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22269685566425323, "max": 0.22029872238636017, "mean": -3.309251042082906e-07, "std": 0.03441028296947479, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.043786074966192245, "max": 0.03593028709292412, "mean": -0.0002595169935375452, "std": 0.012078601866960526, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.2127062827348709, "max": 0.18842767179012299, "mean": -1.7018646758515388e-05, "std": 0.03153670206665993, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18093673884868622, "max": 0.12075397372245789, "mean": -0.0023954270873218775, "std": 0.0412798747420311, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.4229053258895874, "max": 0.9417746663093567, "mean": 0.6626519560813904, "std": 0.05681704729795456, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.3708723485469818, "max": 0.4765413999557495, "mean": -8.208492363337427e-05, "std": 0.040889330208301544, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.2084851861000061, "max": 0.02737521566450596, "mean": -0.03023434244096279, "std": 0.021364057436585426, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3406664729118347, "max": 0.7341601848602295, "mean": 8.241336036007851e-05, "std": 0.03476617485284805, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.24016013741493225, "max": 0.05046252906322479, "mean": -0.0011865145061165094, "std": 0.02045980468392372, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.3058834671974182, "max": 0.6534616947174072, "mean": 0.5251225829124451, "std": 0.04612237960100174, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.3043194115161896, "max": 0.2172033190727234, "mean": 6.997850869083777e-05, "std": 0.039497096091508865, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.14911659061908722, "max": 0.1309829205274582, "mean": 0.00032657815609127283, "std": 0.030455630272626877, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.2569442689418793, "max": 0.2018917053937912, "mean": 3.1276180379791185e-05, "std": 0.039488255977630615, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.3362653255462646, "max": 2.3758890628814697, "mean": -0.026241008192300797, "std": 0.44977059960365295, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.18858182430267334, "max": 0.21028441190719604, "mean": 3.710644523380324e-05, "std": 0.034793708473443985, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03167951852083206, "max": 0.03567720949649811, "mean": -0.0001978189975488931, "std": 0.012288851663470268, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.1882917732000351, "max": 0.1702534258365631, "mean": -6.83729158481583e-05, "std": 0.03217038884758949, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13942022621631622, "max": 0.1372338831424713, "mean": -0.0025149777065962553, "std": 0.05129906162619591, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4670557677745819, "max": 0.9555894136428833, "mean": 0.668860912322998, "std": 0.052772559225559235, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.32439276576042175, "max": 0.30925771594047546, "mean": -1.0448575267218985e-06, "std": 0.04094531387090683, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12486255913972855, "max": 0.025668619200587273, "mean": -0.030689772218465805, "std": 0.019822947680950165, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.4394906163215637, "max": 0.4453367292881012, "mean": 9.582463098922744e-05, "std": 0.03511909395456314, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22461967170238495, "max": 0.051830437034368515, "mean": -0.0011815722100436687, "std": 0.018466372042894363, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.3391477167606354, "max": 0.739862322807312, "mean": 0.558701753616333, "std": 0.04139617085456848, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.27299270033836365, "max": 0.27884820103645325, "mean": 2.0352346837171353e-05, "std": 0.04105763137340546, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13679315149784088, "max": 0.13977941870689392, "mean": 0.0004920524079352617, "std": 0.026632016524672508, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.4905266761779785, "max": 0.35576674342155457, "mean": 8.910118776839226e-05, "std": 0.04069532826542854, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.297072649002075, "max": 1.7451610565185547, "mean": -0.02107967808842659, "std": 0.5001281499862671, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.21811611950397491, "max": 0.19743309915065765, "mean": -4.0164730307878926e-05, "std": 0.034233368933200836, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.04114250838756561, "max": 0.03886367008090019, "mean": -0.0001361201866529882, "std": 0.01288355328142643, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17762865126132965, "max": 0.1828955113887787, "mean": 4.802473995368928e-05, "std": 0.031556740403175354, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.17992889881134033, "max": 0.18389376997947693, "mean": -0.002214584732428193, "std": 0.054829709231853485, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4741877317428589, "max": 1.025841474533081, "mean": 0.6452314257621765, "std": 0.050352681428194046, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.27164527773857117, "max": 0.30913278460502625, "mean": 0.00011245411587879062, "std": 0.04068151116371155, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10526741296052933, "max": 0.0267398189753294, "mean": -0.029518909752368927, "std": 0.017934836447238922, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.33933401107788086, "max": 0.3291725814342499, "mean": 5.2628944104071707e-05, "std": 0.034412726759910583, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.18180128931999207, "max": 0.04250966012477875, "mean": -0.0010595148196443915, "std": 0.017209524288773537, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.3251764476299286, "max": 0.686564564704895, "mean": 0.5111627578735352, "std": 0.03695236146450043, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.23392871022224426, "max": 0.22538572549819946, "mean": -3.6134006222710013e-05, "std": 0.03917535021901131, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11511560529470444, "max": 0.13181880116462708, "mean": 0.0001504624669905752, "std": 0.029160819947719574, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.3522934317588806, "max": 0.28486883640289307, "mean": 6.553360890393378e-06, "std": 0.03924445062875748, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.132338523864746, "max": 3.5437686443328857, "mean": -0.011590493842959404, "std": 0.6826138496398926, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21074621379375458, "max": 0.20937031507492065, "mean": 3.468795330263674e-05, "std": 0.03448443114757538, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03586054965853691, "max": 0.04796382784843445, "mean": 0.0007884950027801096, "std": 0.012871338985860348, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21027511358261108, "max": 0.1930612176656723, "mean": -9.818363650992978e-07, "std": 0.03169528394937515, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.18642070889472961, "max": 0.1772109568119049, "mean": -0.0028416060376912355, "std": 0.058615587651729584, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.4746147096157074, "max": 1.0414643287658691, "mean": 0.6513273119926453, "std": 0.04965711012482643, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.24834445118904114, "max": 0.3291000425815582, "mean": 0.00018075655680149794, "std": 0.04056985676288605, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12541179358959198, "max": 0.02496136911213398, "mean": -0.030498577281832695, "std": 0.017614111304283142, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.42039719223976135, "max": 0.48143431544303894, "mean": 1.1528718459885567e-06, "std": 0.03539694473147392, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15133655071258545, "max": 0.04343574121594429, "mean": 4.278856431483291e-05, "std": 0.014885962940752506, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.3155560791492462, "max": 0.6816220879554749, "mean": 0.5528930425643921, "std": 0.04069439694285393, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20635411143302917, "max": 0.21984520554542542, "mean": 3.190069764968939e-05, "std": 0.038299400359392166, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13771876692771912, "max": 0.1125807911157608, "mean": 2.632014366099611e-05, "std": 0.025809206068515778, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.4028305411338806, "max": 0.3708246946334839, "mean": 2.552652767917607e-05, "std": 0.03817948326468468, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.770878791809082, "max": 2.8686978816986084, "mean": 0.001155341975390911, "std": 0.5168278217315674, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.2037316858768463, "max": 0.1975933313369751, "mean": 2.9730301321251318e-05, "std": 0.03429727256298065, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.05053940787911415, "max": 0.039879124611616135, "mean": -0.00042120314901694655, "std": 0.013415130786597729, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19603155553340912, "max": 0.20171792805194855, "mean": -1.2456664080673363e-05, "std": 0.0318053737282753, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19293668866157532, "max": 0.19509124755859375, "mean": -0.0029669972136616707, "std": 0.06252549588680267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.348905086517334, "max": 1.0837733745574951, "mean": 0.6670998334884644, "std": 0.05524366348981857, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22561387717723846, "max": 0.25142621994018555, "mean": 0.00035854580346494913, "std": 0.04075940325856209, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09107953310012817, "max": 0.04363439604640007, "mean": -0.030079854652285576, "std": 0.017611680552363396, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.353360116481781, "max": 0.30395275354385376, "mean": -4.4715885451296344e-05, "std": 0.03712251037359238, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16167744994163513, "max": 0.06346611678600311, "mean": -7.887817628215998e-05, "std": 0.019426995888352394, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.3487050533294678, "max": 0.7219327092170715, "mean": 0.5423474907875061, "std": 0.0390637181699276, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.21929427981376648, "max": 0.22339415550231934, "mean": -1.152800177806057e-05, "std": 0.039230845868587494, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11837491393089294, "max": 0.17054983973503113, "mean": 0.0002821336966007948, "std": 0.025116898119449615, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24647969007492065, "max": 0.3006535768508911, "mean": -3.7006771890446544e-05, "std": 0.038930293172597885, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.505005121231079, "max": 3.7144510746002197, "mean": 0.01584703102707863, "std": 0.782384991645813, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21911682188510895, "max": 0.237393319606781, "mean": -1.3131610103300773e-05, "std": 0.03630334511399269, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04719853773713112, "max": 0.051371362060308456, "mean": 0.00048090319614857435, "std": 0.013523470610380173, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21416644752025604, "max": 0.21722018718719482, "mean": 5.635957859340124e-05, "std": 0.033615801483392715, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.2113313376903534, "max": 0.2312089204788208, "mean": -0.005099226720631123, "std": 0.06185970827937126, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.3619273602962494, "max": 1.1010714769363403, "mean": 0.699254035949707, "std": 0.053593844175338745, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23512133955955505, "max": 0.24475844204425812, "mean": 0.00046337698586285114, "std": 0.04126880317926407, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09808072447776794, "max": 0.06809643656015396, "mean": -0.03143021836876869, "std": 0.01812811754643917, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.3017187714576721, "max": 0.3516466021537781, "mean": -8.262180926976725e-05, "std": 0.040274444967508316, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.15225963294506073, "max": 0.149653360247612, "mean": 0.00026317729498259723, "std": 0.023038743063807487, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9992579817771912, "max": 1.0015391111373901, "mean": 1.0000743865966797, "std": 0.0006371568888425827, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.03125908225774765, "max": 0.0312553308904171, "mean": -1.9290733689558692e-05, "std": 0.01804095134139061, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.03122831881046295, "max": 0.030987922102212906, "mean": -0.001084161689504981, "std": 0.017950566485524178, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.031255997717380524, "max": 0.031259775161743164, "mean": 3.548155291355215e-06, "std": 0.01804135926067829, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.031154237687587738, "max": 0.03117459826171398, "mean": 0.0003339198010507971, "std": 0.018062766641378403, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.000624487001914531, "max": 0.0007099520298652351, "mean": 4.385071406431962e-06, "std": 0.00018961619934998453, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.99758380651474, "max": 1.0029877424240112, "mean": 0.9999918341636658, "std": 0.0008515770896337926, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.0335661917924881, "max": 0.03370394930243492, "mean": -6.065281013434287e-06, "std": 0.018047738820314407, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.03307846933603287, "max": 0.033399470150470734, "mean": -0.00018566125072538853, "std": 0.017954055219888687, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.001481670537032187, "max": 0.001570003922097385, "mean": 1.885646042865119e-06, "std": 0.0002906274457927793, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.0005839330260641873, "max": 0.0007720313151367009, "mean": 7.4740901254699565e-06, "std": 0.00017145519086625427, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.3833076059818268, "max": 0.7191433310508728, "mean": 0.5806823968887329, "std": 0.03885458782315254, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23893095552921295, "max": 0.19658136367797852, "mean": 2.6083449483849108e-05, "std": 0.03746617212891579, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11882374435663223, "max": 0.16677531599998474, "mean": 0.0009812903590500355, "std": 0.027557166293263435, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.24655194580554962, "max": 0.49992480874061584, "mean": -5.045527359470725e-05, "std": 0.0376235656440258, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.941847801208496, "max": 3.7689895629882812, "mean": -0.0035720239393413067, "std": 0.6813404560089111, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22746945917606354, "max": 0.25183355808258057, "mean": -1.1859048754558899e-05, "std": 0.037434790283441544, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07158222794532776, "max": 0.08058217912912369, "mean": -0.0005094742518849671, "std": 0.01565464586019516, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22813726961612701, "max": 0.2576807737350464, "mean": -2.8760241548297927e-05, "std": 0.03542162850499153, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20052045583724976, "max": 0.21483510732650757, "mean": -0.005527016241103411, "std": 0.06832844763994217, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.40501996874809265, "max": 1.1893715858459473, "mean": 0.7378885746002197, "std": 0.055228959769010544, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.22087574005126953, "max": 0.2456100732088089, "mean": 0.0005211896495893598, "std": 0.04133577644824982, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.1032254695892334, "max": 0.024186622351408005, "mean": -0.03266698122024536, "std": 0.018890688195824623, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.44966718554496765, "max": 0.4224751591682434, "mean": -0.00043509487295523286, "std": 0.04689602553844452, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.2515262961387634, "max": 0.47013524174690247, "mean": 0.0032045203261077404, "std": 0.04452691972255707, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.31688347458839417, "max": 0.33314356207847595, "mean": -2.516225868021138e-05, "std": 0.021287811920046806, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.32447901368141174, "max": 0.6856404542922974, "mean": 0.5710100531578064, "std": 0.04470637068152428, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.1645602136850357, "max": 0.17448709905147552, "mean": -4.871720739174634e-05, "std": 0.033182382583618164, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.1869296431541443, "max": 0.14326152205467224, "mean": 3.4562835935503244e-05, "std": 0.029701465740799904, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.3810564875602722, "max": 0.24595260620117188, "mean": -9.857794793788344e-06, "std": 0.032763585448265076, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6554818153381348, "max": 3.289768695831299, "mean": -0.014251366257667542, "std": 0.9850791096687317, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.2347586303949356, "max": 0.24735252559185028, "mean": -1.8151138647226617e-05, "std": 0.041698191314935684, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07252755761146545, "max": 0.154456228017807, "mean": 0.0006656115292571485, "std": 0.025164911523461342, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.2663172781467438, "max": 0.24813731014728546, "mean": -1.5164550859481096e-05, "std": 0.04013926163315773, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18959401547908783, "max": 0.19463232159614563, "mean": -0.0012374802026897669, "std": 0.06668464839458466, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32920119166374207, "max": 0.99962317943573, "mean": 0.7191556692123413, "std": 0.052332110702991486, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.2317013144493103, "max": 0.24530917406082153, "mean": 0.00018264415848534554, "std": 0.04090017080307007, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11453195661306381, "max": 0.01904553547501564, "mean": -0.04247689247131348, "std": 0.01886470802128315, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.38964197039604187, "max": 0.4074561595916748, "mean": -2.184425829909742e-05, "std": 0.048533279448747635, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.692954957485199, "max": 0.41268306970596313, "mean": 0.0008480865508317947, "std": 0.060282234102487564, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.0014125935267657042, "max": 1.0007404088974, "mean": 0.00048819734365679324, "std": 0.02208949252963066, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9992830157279968, "max": 1.0015500783920288, "mean": 1.0000728368759155, "std": 0.0006243661628104746, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.03125380352139473, "max": 0.03125770390033722, "mean": -2.1020379790570587e-05, "std": 0.018032291904091835, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.031215310096740723, "max": 0.031232142820954323, "mean": -0.0006769997999072075, "std": 0.017826829105615616, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03125686198472977, "max": 0.03126228600740433, "mean": -8.83147367858328e-06, "std": 0.018031319603323936, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.031232059001922607, "max": 0.031244752928614616, "mean": -0.0007297524134628475, "std": 0.017941756173968315, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.0005121154244989157, "max": 0.000419745163526386, "mean": -3.856697276205523e-06, "std": 0.00015613996947649866, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9973074197769165, "max": 1.0023618936538696, "mean": 0.9995496869087219, "std": 0.0008333163568750024, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.03326094523072243, "max": 0.03284362331032753, "mean": -2.9510356398532167e-06, "std": 0.018027810379862785, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.03245115652680397, "max": 0.03129417076706886, "mean": -0.0005187825299799442, "std": 0.018035637214779854, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.001710034441202879, "max": 0.001517186756245792, "mean": -1.1187451036676066e-06, "std": 0.00028821235173381865, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.0004745775950141251, "max": 0.00038665023748762906, "mean": -3.4791635243891506e-06, "std": 0.00014281406765803695, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23430979251861572, "max": 0.27249982953071594, "mean": 6.625029982387787e-06, "std": 0.018810328096151352, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.32144901156425476, "max": 0.6939529180526733, "mean": 0.5816143751144409, "std": 0.04593788832426071, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18192411959171295, "max": 0.19777271151542664, "mean": -1.1577552868402563e-05, "std": 0.03318414464592934, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16048845648765564, "max": 0.12929441034793854, "mean": -0.0010730556678026915, "std": 0.03413493558764458, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.3323971629142761, "max": 0.31116729974746704, "mean": -1.0262037903885357e-05, "std": 0.032234691083431244, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.802551746368408, "max": 8.761726379394531, "mean": 0.0934542790055275, "std": 1.6194651126861572, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23397472500801086, "max": 0.24182309210300446, "mean": 4.162585537414998e-05, "std": 0.040856119245290756, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07595551013946533, "max": 0.06575819849967957, "mean": 0.00048204767517745495, "std": 0.019416553899645805, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24591538310050964, "max": 0.23388886451721191, "mean": -3.2548523449804634e-06, "std": 0.039430882781744, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16298307478427887, "max": 0.16088849306106567, "mean": 0.0016233095666393638, "std": 0.06529011577367783, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5571500658988953, "max": 0.9436134696006775, "mean": 0.7128155827522278, "std": 0.0401235930621624, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22800666093826294, "max": 0.2548002004623413, "mean": -4.557950160233304e-05, "std": 0.0405743233859539, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13472457230091095, "max": 0.022118322551250458, "mean": -0.04135219752788544, "std": 0.01838735118508339, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.42162197828292847, "max": 0.39239510893821716, "mean": -4.3281570469844155e-06, "std": 0.0477834977209568, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6071848273277283, "max": 0.6512866020202637, "mean": 0.0015846553724259138, "std": 0.05683678016066551, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.25181835889816284, "max": 0.32083579897880554, "mean": -6.167530045786407e-06, "std": 0.01961352303624153, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.35955187678337097, "max": 0.6821539998054504, "mean": 0.5706835389137268, "std": 0.04298859089612961, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22016161680221558, "max": 0.17701253294944763, "mean": -3.445023321546614e-05, "std": 0.03429866582155228, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16314493119716644, "max": 0.23276831209659576, "mean": 0.000363295606803149, "std": 0.032813575118780136, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.26391661167144775, "max": 0.23982854187488556, "mean": -5.2968603995395824e-05, "std": 0.03389734774827957, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.854297161102295, "max": 5.090524673461914, "mean": 0.04387897625565529, "std": 1.229095458984375, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24643158912658691, "max": 0.2503342926502228, "mean": 7.21608375897631e-05, "std": 0.04398628696799278, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.06249094381928444, "max": 0.05441959202289581, "mean": 0.0006457456620410085, "std": 0.017188476398587227, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.28642886877059937, "max": 0.2721048593521118, "mean": -5.0093196477973834e-05, "std": 0.04298442229628563, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16100700199604034, "max": 0.1703459769487381, "mean": -0.002886796835809946, "std": 0.05929969996213913, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5198073983192444, "max": 0.9330060482025146, "mean": 0.7133970260620117, "std": 0.03842265531420708, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.2378769814968109, "max": 0.2487393021583557, "mean": 0.00046459035365842283, "std": 0.04045308753848076, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.1450035721063614, "max": 0.0410858653485775, "mean": -0.03969570994377136, "std": 0.020541729405522346, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5323667526245117, "max": 0.5824663043022156, "mean": 5.913888344366569e-06, "std": 0.048858821392059326, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5189786553382874, "max": 0.49333813786506653, "mean": 0.0023667975328862667, "std": 0.0534440316259861, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.2737034261226654, "max": 0.31558021903038025, "mean": 1.935112777573522e-06, "std": 0.02005006931722164, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.36589479446411133, "max": 0.7117040157318115, "mean": 0.5931321382522583, "std": 0.0459616482257843, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21081827580928802, "max": 0.19904154539108276, "mean": 3.062835457967594e-05, "std": 0.03486720845103264, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18713217973709106, "max": 0.20344023406505585, "mean": 0.000952105619944632, "std": 0.031497493386268616, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.28968340158462524, "max": 0.33981209993362427, "mean": -4.6875291445758194e-05, "std": 0.03458764776587486, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.876854181289673, "max": 3.3869495391845703, "mean": 0.014455719850957394, "std": 0.8583089709281921, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.22449138760566711, "max": 0.2498161643743515, "mean": -3.885651949531166e-06, "std": 0.04222925379872322, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.05526581034064293, "max": 0.04652895778417587, "mean": -2.1849831682629883e-05, "std": 0.015840303152799606, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2932976484298706, "max": 0.29035061597824097, "mean": -7.6227315730648115e-06, "std": 0.041944343596696854, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12483495473861694, "max": 0.2589971721172333, "mean": -0.003243764629587531, "std": 0.05317297205328941, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.45624497532844543, "max": 0.8444257378578186, "mean": 0.705470621585846, "std": 0.03522758185863495, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5120490789413452, "max": 0.3481951355934143, "mean": 0.00034297475940547884, "std": 0.040198490023612976, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18573546409606934, "max": 0.03953690081834793, "mean": -0.03938683122396469, "std": 0.021360911428928375, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5439703464508057, "max": 0.5556368231773376, "mean": -7.127778371796012e-05, "std": 0.05073383450508118, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5116579532623291, "max": 0.6641839742660522, "mean": 0.0024420106783509254, "std": 0.04951965808868408, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.33250588178634644, "max": 0.2653454840183258, "mean": 3.314120021968847e-06, "std": 0.019387103617191315, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.32199233770370483, "max": 0.7664577960968018, "mean": 0.6510406136512756, "std": 0.04532792791724205, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.24981507658958435, "max": 0.21987095475196838, "mean": -1.8786176951834932e-06, "std": 0.03650160878896713, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.32696181535720825, "max": 0.286738783121109, "mean": -0.0006850577774457633, "std": 0.038556959480047226, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.310026079416275, "max": 0.3700660765171051, "mean": 6.51663140160963e-05, "std": 0.03624221682548523, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.716763496398926, "max": 5.807004928588867, "mean": 0.03795414790511131, "std": 1.4130035638809204, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.22150921821594238, "max": 0.20585696399211884, "mean": -7.512117736041546e-05, "std": 0.0424848347902298, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07765647768974304, "max": 0.05150295048952103, "mean": -0.0009257810888811946, "std": 0.01641261577606201, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.3305484354496002, "max": 0.3292558491230011, "mean": -4.674302545026876e-06, "std": 0.042791128158569336, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2847578823566437, "max": 0.11202681809663773, "mean": -0.0012038333807140589, "std": 0.04701409116387367, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4860534965991974, "max": 0.8868187069892883, "mean": 0.7373650074005127, "std": 0.03824280574917793, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3623279929161072, "max": 0.2745623290538788, "mean": 5.109083213028498e-05, "std": 0.04064391553401947, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.24753618240356445, "max": 0.046382758766412735, "mean": -0.039263010025024414, "std": 0.023289302363991737, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6261420249938965, "max": 0.5965140461921692, "mean": -5.986806354485452e-05, "std": 0.05311597138643265, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7094455361366272, "max": 0.2657928168773651, "mean": 0.0009170880075544119, "std": 0.05122483894228935, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3433697819709778, "max": 0.30368947982788086, "mean": 2.3889015210443176e-07, "std": 0.019135670736432076, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.3497507870197296, "max": 0.7829343676567078, "mean": 0.638809323310852, "std": 0.04924893379211426, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20543725788593292, "max": 0.20679403841495514, "mean": -5.990585486870259e-05, "std": 0.037696100771427155, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.25862252712249756, "max": 0.26803287863731384, "mean": -0.00040157014154829085, "std": 0.04459596797823906, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.3540649712085724, "max": 0.32237085700035095, "mean": -6.968005436647218e-06, "std": 0.03720472380518913, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.260965824127197, "max": 4.203993797302246, "mean": -0.026412349194288254, "std": 1.006641149520874, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.23861756920814514, "max": 0.24335098266601562, "mean": -2.5078054022742435e-05, "std": 0.043209534138441086, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06233251839876175, "max": 0.056672900915145874, "mean": 0.00034255694481544197, "std": 0.014151446521282196, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.4369187653064728, "max": 0.373432457447052, "mean": 1.4437458048632834e-05, "std": 0.044120825827121735, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09643160551786423, "max": 0.1759035885334015, "mean": -0.0006591043202206492, "std": 0.035157084465026855, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4216560423374176, "max": 1.0694262981414795, "mean": 0.7483175992965698, "std": 0.04205932468175888, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.2665790617465973, "max": 0.29692915081977844, "mean": -7.955127512104809e-05, "std": 0.04080403223633766, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18574701249599457, "max": 0.043912798166275024, "mean": -0.03681863471865654, "std": 0.025608953088521957, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.45691967010498047, "max": 0.486579954624176, "mean": 4.3823405576404184e-05, "std": 0.05420882627367973, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.28651049733161926, "max": 0.5512732267379761, "mean": -0.0008804658427834511, "std": 0.04782622680068016, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.2928602397441864, "max": 0.3227991461753845, "mean": 6.5394251578254625e-06, "std": 0.019969874992966652, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.2909410297870636, "max": 0.7601505517959595, "mean": 0.6508233547210693, "std": 0.05213586986064911, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.2434667944908142, "max": 0.2616351246833801, "mean": -6.0445322560553905e-06, "std": 0.039612967520952225, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2675444483757019, "max": 0.1998518854379654, "mean": -0.0008808361599221826, "std": 0.05175328254699707, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.2721408009529114, "max": 0.2537347078323364, "mean": 4.015575541416183e-06, "std": 0.03871006891131401, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.963685989379883, "max": 15.945606231689453, "mean": 0.033225029706954956, "std": 1.9889812469482422, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.20711851119995117, "max": 0.22583316266536713, "mean": -7.227503374451771e-05, "std": 0.04055361449718475, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06934971362352371, "max": 0.06323137879371643, "mean": 0.00015275523765012622, "std": 0.014742234721779823, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.4650252163410187, "max": 0.3206908702850342, "mean": 1.950068872247357e-05, "std": 0.04058856889605522, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06406640261411667, "max": 0.11521138995885849, "mean": 0.0011922243284061551, "std": 0.02470523677766323, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.3746338486671448, "max": 0.9322722554206848, "mean": 0.7508488893508911, "std": 0.040187884122133255, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.2793060839176178, "max": 0.2731705904006958, "mean": -0.00016857523587532341, "std": 0.04099458083510399, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.198820099234581, "max": 0.05085344612598419, "mean": -0.03202417492866516, "std": 0.025111379101872444, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6573337912559509, "max": 0.5352881550788879, "mean": -4.8675712605472654e-05, "std": 0.05284544453024864, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.19310522079467773, "max": 0.5820621848106384, "mean": -0.000515035935677588, "std": 0.04106917232275009, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.4177176356315613, "max": 0.37193918228149414, "mean": 6.035062597220531e-06, "std": 0.02162161096930504, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21426703035831451, "max": 0.7471129894256592, "mean": 0.649559497833252, "std": 0.05437251552939415, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.20954684913158417, "max": 0.19578267633914948, "mean": 4.0035050915321335e-05, "std": 0.03946496546268463, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.3292764723300934, "max": 0.2593560516834259, "mean": -0.0032243705354630947, "std": 0.056255340576171875, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.20562483370304108, "max": 0.2547135651111603, "mean": 5.434878767118789e-05, "std": 0.038567062467336655, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.242823600769043, "max": 6.931674957275391, "mean": 0.04833440110087395, "std": 1.384947657585144, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.20960542559623718, "max": 0.2301599383354187, "mean": -5.232992862147512e-06, "std": 0.04131288081407547, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.04387415945529938, "max": 0.03594405576586723, "mean": 4.847475793212652e-06, "std": 0.012800573371350765, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.3978384733200073, "max": 0.34482401609420776, "mean": -5.554188828682527e-05, "std": 0.04238930344581604, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.05505719780921936, "max": 0.06286165118217468, "mean": 0.00037010322557762265, "std": 0.018672354519367218, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.3501102924346924, "max": 1.0451011657714844, "mean": 0.7893368601799011, "std": 0.04874463006854057, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.33344367146492004, "max": 0.3858579397201538, "mean": -0.00016948734992183745, "std": 0.041480328887701035, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15724380314350128, "max": 0.05914618447422981, "mean": -0.03183374181389809, "std": 0.0251409150660038, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6964119076728821, "max": 0.4686836302280426, "mean": -9.159947512671351e-05, "std": 0.05179150402545929, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24826322495937347, "max": 0.3285461962223053, "mean": -0.00024742598179727793, "std": 0.0414327010512352, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.2872468829154968, "max": 0.35023656487464905, "mean": -2.1327541617210954e-06, "std": 0.024238986894488335, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19656415283679962, "max": 0.7792240381240845, "mean": 0.6702939867973328, "std": 0.05869279056787491, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.22861742973327637, "max": 0.2311892956495285, "mean": -1.9813087419606745e-05, "std": 0.04044097661972046, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.2196549028158188, "max": 0.24067850410938263, "mean": 0.0007784939371049404, "std": 0.055799830704927444, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21562136709690094, "max": 0.22666974365711212, "mean": -7.154869672376662e-05, "std": 0.039377160370349884, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.90437126159668, "max": 9.067243576049805, "mean": -0.0012503080070018768, "std": 1.8481035232543945, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2693117558956146, "max": 0.2589534521102905, "mean": 4.357095167506486e-05, "std": 0.038407646119594574, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05761706829071045, "max": 0.05768207088112831, "mean": 0.0003497683210298419, "std": 0.01472416240721941, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.2650063633918762, "max": 0.2886802554130554, "mean": -6.175818271003664e-05, "std": 0.039074014872312546, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.04376920685172081, "max": 0.03731464967131615, "mean": -8.56523183756508e-05, "std": 0.013365812599658966, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.33950644731521606, "max": 1.0926629304885864, "mean": 0.8637055158615112, "std": 0.06385361403226852, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.42327165603637695, "max": 0.41919341683387756, "mean": 0.00031273282365873456, "std": 0.0435028038918972, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21477100253105164, "max": 0.17062197625637054, "mean": -0.02948208898305893, "std": 0.0319497250020504, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5996779799461365, "max": 0.5596659183502197, "mean": -0.00015256987535394728, "std": 0.05344602093100548, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17847254872322083, "max": 0.37667688727378845, "mean": 0.0013643621932715178, "std": 0.03730973228812218, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.3942721486091614, "max": 0.36895284056663513, "mean": 3.6433208151720464e-05, "std": 0.028621351346373558, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2903022766113281, "max": 0.826566219329834, "mean": 0.7055737376213074, "std": 0.06789274513721466, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9261494278907776, "max": 1.0264488458633423, "mean": -2.5618217478040606e-05, "std": 0.04762551560997963, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8783111572265625, "max": 0.81496262550354, "mean": -0.0003140262851957232, "std": 0.09553777426481247, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.269389808177948, "max": 0.24095474183559418, "mean": -2.2922709831618704e-05, "std": 0.0389564111828804, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.739809036254883, "max": 22.848268508911133, "mean": -0.0918719619512558, "std": 4.069859504699707, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22777613997459412, "max": 0.24508334696292877, "mean": -2.581250009825453e-05, "std": 0.038639314472675323, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.06041834130883217, "max": 0.04605862498283386, "mean": -0.00014601447037421167, "std": 0.014698855578899384, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.3384549617767334, "max": 0.37450915575027466, "mean": 7.243736035889015e-06, "std": 0.04081535339355469, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.046464741230010986, "max": 0.19570393860340118, "mean": 0.0002726423554122448, "std": 0.013569480739533901, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.37450751662254333, "max": 1.1300209760665894, "mean": 0.8900179862976074, "std": 0.06398562341928482, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.4477962851524353, "max": 0.5424686074256897, "mean": 2.4588229280197993e-05, "std": 0.04556749016046524, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22407831251621246, "max": 0.08827000111341476, "mean": -0.032015662640333176, "std": 0.03776349499821663, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7253148555755615, "max": 0.6892704367637634, "mean": 3.4532837162259966e-05, "std": 0.051778074353933334, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.174549400806427, "max": 0.21855904161930084, "mean": 3.998563624918461e-05, "std": 0.03177855163812637, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.34027042984962463, "max": 0.37425076961517334, "mean": 4.2934465454891324e-05, "std": 0.03414500877261162, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.31756624579429626, "max": 1.2868921756744385, "mean": 0.6014678478240967, "std": 0.08346211910247803, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.2833472490310669, "max": 0.26022085547447205, "mean": -3.076446546401712e-06, "std": 0.03598489984869957, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.2355138659477234, "max": 0.2053714245557785, "mean": 0.0002318831393495202, "std": 0.05601060390472412, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.43542587757110596, "max": 0.32521018385887146, "mean": 2.451425461913459e-05, "std": 0.034135881811380386, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.544894218444824, "max": 7.312623977661133, "mean": -0.007366415113210678, "std": 0.6992328763008118, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.34384819865226746, "max": 0.3634955585002899, "mean": 0.00010338952415622771, "std": 0.047827959060668945, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07375156134366989, "max": 0.06036222726106644, "mean": 0.0009326444123871624, "std": 0.014949453994631767, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.25554174184799194, "max": 0.28655222058296204, "mean": 4.425931365403812e-06, "std": 0.04155518114566803, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05532686412334442, "max": 0.06282556056976318, "mean": 0.00014147879846859723, "std": 0.0071739982813596725, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.49367740750312805, "max": 1.2208529710769653, "mean": 1.0134257078170776, "std": 0.11743961274623871, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0936145782470703, "max": 1.0469423532485962, "mean": -4.9777743697632104e-05, "std": 0.05241077393293381, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.2236752212047577, "max": 0.1727852076292038, "mean": -0.027246128767728806, "std": 0.03635065257549286, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8845650553703308, "max": 0.9224934577941895, "mean": -0.00014609616482630372, "std": 0.05328214913606644, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17102202773094177, "max": 0.3799096643924713, "mean": 0.0033686563838273287, "std": 0.039898186922073364, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7772517800331116, "max": 0.7235067486763, "mean": 1.9145372789353132e-05, "std": 0.04616532474756241, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.3385593295097351, "max": 1.4277493953704834, "mean": 0.948319673538208, "std": 0.20673821866512299, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7455862760543823, "max": 1.7045449018478394, "mean": 0.00022695529332850128, "std": 0.1586858183145523, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.1996264457702637, "max": 1.0995841026306152, "mean": -0.009535307995975018, "std": 0.20383313298225403, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.42129284143447876, "max": 0.42636537551879883, "mean": 6.450986256822944e-05, "std": 0.04801839217543602, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.74388885498047, "max": 19.53899383544922, "mean": -0.24829958379268646, "std": 4.776181221008301, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.32387763261795044, "max": 0.43839961290359497, "mean": -1.2020052054140251e-05, "std": 0.04616132006049156, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.03405960276722908, "max": 0.03712477907538414, "mean": 0.000642063794657588, "std": 0.012921381741762161, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7034934759140015, "max": 0.6645202040672302, "mean": 4.349739174358547e-05, "std": 0.05788357928395271, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07222776859998703, "max": 0.06750176101922989, "mean": -0.00013276952086016536, "std": 0.012919425964355469, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.38018617033958435, "max": 1.3909327983856201, "mean": 1.0665558576583862, "std": 0.21971333026885986, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.616411030292511, "max": 0.71701979637146, "mean": 0.00011130145139759406, "std": 0.05802119895815849, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.2196073830127716, "max": 0.22519457340240479, "mean": 0.006242883857339621, "std": 0.049728427082300186, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6296560168266296, "max": 0.889208972454071, "mean": 1.1700575669237878e-05, "std": 0.023527782410383224, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5068318247795105, "max": 0.47398847341537476, "mean": -0.0030159649904817343, "std": 0.06930278241634369, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5377185344696045, "max": 1.1807185411453247, "mean": 0.782741904258728, "std": 0.09885998070240021, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.2669522166252136, "max": 0.2126760631799698, "mean": -0.00022303443984128535, "std": 0.053996436297893524, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23790661990642548, "max": 0.01483356487005949, "mean": -0.043959345668554306, "std": 0.03433229774236679, "sparsity": 0.0, "shape": [ 100 ] } } }