{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.43058744072914124, "max": 0.29903075098991394, "mean": -0.0025567002594470978, "std": 0.04255249723792076, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06321248412132263, "max": 0.107655830681324, "mean": 0.0005928671453148127, "std": 0.03411800414323807, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4126332402229309, "max": 0.8362816572189331, "mean": -0.00021067322813905776, "std": 0.024107061326503754, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11544923484325409, "max": 0.3215144872665405, "mean": -0.0009406265453435481, "std": 0.01957659050822258, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.791715383529663, "max": 2.870434045791626, "mean": -0.0003647833364084363, "std": 0.6153609752655029, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.27896371483802795, "max": 0.3819044828414917, "mean": 0.0004220041155349463, "std": 0.04275014251470566, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22224494814872742, "max": 0.20959755778312683, "mean": -0.004497884772717953, "std": 0.040913522243499756, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4279385209083557, "max": 0.4752762019634247, "mean": 2.009033551075845e-06, "std": 0.024508582428097725, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.32550832629203796, "max": 0.1569339483976364, "mean": -0.046702392399311066, "std": 0.0515773706138134, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.4104919135570526, "max": 0.3544883131980896, "mean": -0.00012644486560020596, "std": 0.02360026352107525, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.229718416929245, "max": 0.26262396574020386, "mean": -0.02914787270128727, "std": 0.04934746399521828, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.2545970380306244, "max": 0.8200467824935913, "mean": 0.5254305601119995, "std": 0.08080543577671051, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.29690292477607727, "max": 0.26533740758895874, "mean": -0.00042425302672199905, "std": 0.0321030355989933, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09272623807191849, "max": 0.12487658858299255, "mean": 0.0006494724657386541, "std": 0.025737110525369644, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.29031693935394287, "max": 0.2813326120376587, "mean": -7.68666504882276e-05, "std": 0.03093528188765049, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.899355888366699, "max": 5.814132213592529, "mean": -0.00933213159441948, "std": 1.29543137550354, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.42477670311927795, "max": 0.3437301814556122, "mean": 9.746497380547225e-05, "std": 0.029952634125947952, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028919341042637825, "max": 0.027677638456225395, "mean": -0.00031004834454506636, "std": 0.012572667561471462, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.4539007246494293, "max": 0.4487650692462921, "mean": 2.293557918164879e-05, "std": 0.023855043575167656, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08868313580751419, "max": 0.09119853377342224, "mean": 0.0022740147542208433, "std": 0.019512386992573738, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.2666647434234619, "max": 1.0563400983810425, "mean": 0.5311195850372314, "std": 0.10441721975803375, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5746223330497742, "max": 0.6085677742958069, "mean": -0.0004311846860218793, "std": 0.038594383746385574, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18177427351474762, "max": 0.04579279571771622, "mean": -0.029445737600326538, "std": 0.04258440434932709, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.1666346788406372, "max": 1.6346005201339722, "mean": 0.0003186643880326301, "std": 0.027693353593349457, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16253599524497986, "max": 0.20575034618377686, "mean": -0.02111678197979927, "std": 0.027937985956668854, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22444167733192444, "max": 0.8436422944068909, "mean": 0.4875181317329407, "std": 0.07519698888063431, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.25531357526779175, "max": 0.3059065341949463, "mean": -9.770956239663064e-06, "std": 0.03346950560808182, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.0954207256436348, "max": 0.11047575622797012, "mean": 5.4158546845428646e-05, "std": 0.026984980329871178, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.2974885404109955, "max": 0.29604607820510864, "mean": 5.041498661739752e-05, "std": 0.03253797069191933, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.164185523986816, "max": 5.084409236907959, "mean": -0.014593909494578838, "std": 1.1573563814163208, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.34487831592559814, "max": 0.34348052740097046, "mean": 7.885653030825779e-05, "std": 0.030057402327656746, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.03615832328796387, "max": 0.03314381092786789, "mean": -0.00014287084923125803, "std": 0.01301794033497572, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.31527891755104065, "max": 0.3751768469810486, "mean": -2.1734818801633082e-05, "std": 0.02405463345348835, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10528924316167831, "max": 0.12185486406087875, "mean": -0.0019566768314689398, "std": 0.028841182589530945, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.3117589056491852, "max": 1.1208702325820923, "mean": 0.6662365198135376, "std": 0.09775208681821823, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.872468888759613, "max": 0.6275586485862732, "mean": 0.0016758753918111324, "std": 0.047438040375709534, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.2710355520248413, "max": 0.03406016156077385, "mean": -0.04659765958786011, "std": 0.04059656709432602, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9201626181602478, "max": 0.9643434882164001, "mean": 0.0010215931106358767, "std": 0.04070163145661354, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14462199807167053, "max": 0.07486966252326965, "mean": -0.009085646830499172, "std": 0.02570141665637493, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.23963269591331482, "max": 0.7123461365699768, "mean": 0.4472006559371948, "std": 0.05932367965579033, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.2729354500770569, "max": 0.29745981097221375, "mean": 8.72666532814037e-06, "std": 0.03547453135251999, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11902111023664474, "max": 0.1184910237789154, "mean": 0.0007516429759562016, "std": 0.02761562168598175, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.28102290630340576, "max": 0.27947571873664856, "mean": -7.658830872969702e-05, "std": 0.03510264679789543, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.509542465209961, "max": 2.521538496017456, "mean": 0.026744995266199112, "std": 0.5867680311203003, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.2209818959236145, "max": 0.2715614438056946, "mean": 2.5364215616718866e-06, "std": 0.0307310800999403, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.03315867856144905, "max": 0.0312359519302845, "mean": 0.00011449654994066805, "std": 0.012396099045872688, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23535357415676117, "max": 0.23171932995319366, "mean": 5.724863876821473e-05, "std": 0.025697464123368263, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13585864007472992, "max": 0.12803053855895996, "mean": -0.0054976665414869785, "std": 0.039962489157915115, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.3546965718269348, "max": 1.1723699569702148, "mean": 0.7105212211608887, "std": 0.10377959161996841, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6174826622009277, "max": 0.5556296706199646, "mean": 0.001160400453954935, "std": 0.04611344262957573, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.18955032527446747, "max": 0.024929288774728775, "mean": -0.03484814986586571, "std": 0.02862328663468361, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.130905032157898, "max": 0.970402181148529, "mean": 0.00035809652763418853, "std": 0.04234178364276886, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5977792143821716, "max": 0.06286704540252686, "mean": -0.004878203850239515, "std": 0.028615841642022133, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.3753129839897156, "max": 0.9404288530349731, "mean": 0.5924519896507263, "std": 0.06695062667131424, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3918393850326538, "max": 0.3694100081920624, "mean": 7.003510108916089e-05, "std": 0.03718580678105354, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11892382800579071, "max": 0.1364460289478302, "mean": 0.0009139248286373913, "std": 0.02918536402285099, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6189467310905457, "max": 0.5086581707000732, "mean": 1.522459842817625e-05, "std": 0.036438774317502975, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.18658447265625, "max": 8.788694381713867, "mean": -0.10927355289459229, "std": 1.6988238096237183, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.27650272846221924, "max": 0.2397344559431076, "mean": 5.2208531997166574e-05, "std": 0.03261270374059677, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.051591187715530396, "max": 0.039499007165431976, "mean": 9.101108298636973e-05, "std": 0.01296647172421217, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.2308182418346405, "max": 0.23492185771465302, "mean": -2.198125366703607e-05, "std": 0.0293892789632082, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20422494411468506, "max": 0.10520327836275101, "mean": -0.004020952619612217, "std": 0.032637566328048706, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3395068645477295, "max": 1.0124397277832031, "mean": 0.7006875872612, "std": 0.09675538539886475, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5645881295204163, "max": 0.8335761427879333, "mean": 0.00041510065784677863, "std": 0.04229363799095154, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.2121758759021759, "max": 0.0300263874232769, "mean": -0.032174285501241684, "std": 0.026499440893530846, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7549118995666504, "max": 0.7191137671470642, "mean": -1.6272973880404606e-05, "std": 0.03683432564139366, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.2633835971355438, "max": 0.10630631446838379, "mean": -0.00301279011182487, "std": 0.028871648013591766, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.2839854061603546, "max": 0.695024311542511, "mean": 0.49937066435813904, "std": 0.04653334617614746, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.2781727910041809, "max": 0.23389220237731934, "mean": -0.00011100011033704504, "std": 0.0387568399310112, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15358875691890717, "max": 0.12641564011573792, "mean": -0.0022295925300568342, "std": 0.03333538770675659, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.41443270444869995, "max": 0.6594027280807495, "mean": -1.858997711678967e-05, "std": 0.03909648209810257, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.237802028656006, "max": 4.722365379333496, "mean": -0.020456280559301376, "std": 1.0076717138290405, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.24511729180812836, "max": 0.20752397179603577, "mean": 4.432153218658641e-05, "std": 0.03396220877766609, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03445148468017578, "max": 0.044871583580970764, "mean": -1.9065962987951934e-05, "std": 0.012637496925890446, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20115934312343597, "max": 0.20639759302139282, "mean": -2.9241522497613914e-05, "std": 0.031020423397421837, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.19977232813835144, "max": 0.1132478341460228, "mean": -0.002891883021220565, "std": 0.03452973812818527, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.3667006194591522, "max": 1.0575865507125854, "mean": 0.6704831123352051, "std": 0.06640235334634781, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.39832764863967896, "max": 0.5020085573196411, "mean": -3.8792531995568424e-05, "std": 0.041129473596811295, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12865175306797028, "max": 0.02696564421057701, "mean": -0.030531559139490128, "std": 0.021883869543671608, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.44955554604530334, "max": 0.4331819415092468, "mean": 7.46890582377091e-05, "std": 0.034889888018369675, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.26744911074638367, "max": 0.07309805601835251, "mean": -0.0010887861717492342, "std": 0.023132896050810814, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.28746652603149414, "max": 0.6852710843086243, "mean": 0.5245163440704346, "std": 0.04753531143069267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.2225414365530014, "max": 0.2233862727880478, "mean": 1.5953022739267908e-05, "std": 0.038948602974414825, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13633988797664642, "max": 0.10930000245571136, "mean": 0.00024919791030697525, "std": 0.029206812381744385, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.3749636113643646, "max": 0.43756094574928284, "mean": -9.44960629567504e-06, "std": 0.03928674757480621, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.845799684524536, "max": 4.999211311340332, "mean": 0.009741385467350483, "std": 0.8452029228210449, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.22279420495033264, "max": 0.22023756802082062, "mean": -3.8509870137204416e-07, "std": 0.03440963104367256, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.04381667822599411, "max": 0.03586551547050476, "mean": -0.0002609736402519047, "std": 0.012077639810740948, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21273735165596008, "max": 0.18841038644313812, "mean": -1.714246354822535e-05, "std": 0.031536102294921875, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18087971210479736, "max": 0.12077755481004715, "mean": -0.0023926026187837124, "std": 0.04127210006117821, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.4229143261909485, "max": 0.941786527633667, "mean": 0.6626389026641846, "std": 0.056811243295669556, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.37079188227653503, "max": 0.47652140259742737, "mean": -8.189280197257176e-05, "std": 0.040888600051403046, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.20858491957187653, "max": 0.027342500165104866, "mean": -0.03023093193769455, "std": 0.021366029977798462, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3407646119594574, "max": 0.7343085408210754, "mean": 8.227993384934962e-05, "std": 0.03476560488343239, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.2401275634765625, "max": 0.05064300820231438, "mean": -0.0011859382502734661, "std": 0.020460018888115883, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.3059234321117401, "max": 0.6536071300506592, "mean": 0.5251041054725647, "std": 0.046117961406707764, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.30434539914131165, "max": 0.21718497574329376, "mean": 6.997769378358498e-05, "std": 0.03949679434299469, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.1491607427597046, "max": 0.1309996247291565, "mean": 0.00032534098136238754, "std": 0.030453510582447052, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.25696200132369995, "max": 0.20183700323104858, "mean": 3.1303323339670897e-05, "std": 0.0394880436360836, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.3362133502960205, "max": 2.3758370876312256, "mean": -0.026241015642881393, "std": 0.4497620761394501, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.1885133534669876, "max": 0.21026504039764404, "mean": 3.72500107914675e-05, "std": 0.03479313850402832, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03166966885328293, "max": 0.035711731761693954, "mean": -0.00019632275507319719, "std": 0.012291603721678257, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.18826794624328613, "max": 0.17029285430908203, "mean": -6.840371497673914e-05, "std": 0.03216983750462532, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13950176537036896, "max": 0.13710856437683105, "mean": -0.002513276878744364, "std": 0.05129357427358627, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.46702930331230164, "max": 0.9555635452270508, "mean": 0.6688482761383057, "std": 0.05276886373758316, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.3244642913341522, "max": 0.30925843119621277, "mean": -9.10853486857377e-07, "std": 0.04094461724162102, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12482384592294693, "max": 0.02569793164730072, "mean": -0.03068721666932106, "std": 0.019822420552372932, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.43951860070228577, "max": 0.4452158510684967, "mean": 9.512923134025186e-05, "std": 0.03511851280927658, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22458022832870483, "max": 0.051897209137678146, "mean": -0.0011794487945735455, "std": 0.018467247486114502, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.3391944468021393, "max": 0.7399035096168518, "mean": 0.558688759803772, "std": 0.04139659181237221, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.27298545837402344, "max": 0.2789517045021057, "mean": 2.041603875113651e-05, "std": 0.041056908667087555, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13676847517490387, "max": 0.1398179680109024, "mean": 0.0004908779519610107, "std": 0.026629263535141945, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.49038437008857727, "max": 0.35562369227409363, "mean": 8.908439485821873e-05, "std": 0.04069468006491661, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.297020673751831, "max": 1.7451350688934326, "mean": -0.02108073979616165, "std": 0.5001184940338135, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.2181541919708252, "max": 0.19748014211654663, "mean": -4.031343632959761e-05, "std": 0.034232787787914276, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.0411330908536911, "max": 0.03885316848754883, "mean": -0.00013403715274762362, "std": 0.012882057577371597, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17773869633674622, "max": 0.18285222351551056, "mean": 4.8017449444159865e-05, "std": 0.03155619651079178, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.18002314865589142, "max": 0.18396146595478058, "mean": -0.0022139688953757286, "std": 0.05483314022421837, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.474223792552948, "max": 1.025842308998108, "mean": 0.6452140212059021, "std": 0.05035461485385895, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.2715917229652405, "max": 0.30928391218185425, "mean": 0.00011250950046814978, "std": 0.04068081080913544, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10539427399635315, "max": 0.026698507368564606, "mean": -0.02951802872121334, "std": 0.017934730276465416, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.3393958806991577, "max": 0.3293214440345764, "mean": 5.262523700366728e-05, "std": 0.03441222757101059, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.18173128366470337, "max": 0.04261557012796402, "mean": -0.001059417612850666, "std": 0.017207711935043335, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.32517319917678833, "max": 0.6865989565849304, "mean": 0.5111718773841858, "std": 0.03694766014814377, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.2340274453163147, "max": 0.22541004419326782, "mean": -3.624596502049826e-05, "std": 0.039175089448690414, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11520740389823914, "max": 0.1319286823272705, "mean": 0.00015029977657832205, "std": 0.029165174812078476, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.3522850573062897, "max": 0.28482842445373535, "mean": 6.6099587456847075e-06, "std": 0.03924406319856644, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.132234573364258, "max": 3.5437166690826416, "mean": -0.011590443551540375, "std": 0.6826013326644897, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21073928475379944, "max": 0.20945559442043304, "mean": 3.4624928957782686e-05, "std": 0.03448405861854553, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.035892292857170105, "max": 0.0479779876768589, "mean": 0.0007904525264166296, "std": 0.012872384861111641, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21030081808567047, "max": 0.19305069744586945, "mean": -9.318873708252795e-07, "std": 0.03169514983892441, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.18656854331493378, "max": 0.17726241052150726, "mean": -0.002840438624843955, "std": 0.0586128756403923, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.4746079444885254, "max": 1.041317105293274, "mean": 0.6513123512268066, "std": 0.04965612292289734, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.24824990332126617, "max": 0.32916077971458435, "mean": 0.0001809034583857283, "std": 0.04056909307837486, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.1252717822790146, "max": 0.024853328242897987, "mean": -0.03049679473042488, "std": 0.01761467382311821, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.4204847514629364, "max": 0.4814334511756897, "mean": 1.0858502719202079e-06, "std": 0.03539634868502617, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.1512894481420517, "max": 0.0435330905020237, "mean": 4.2967651097569615e-05, "std": 0.014878639951348305, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.31564587354660034, "max": 0.6816184520721436, "mean": 0.5528937578201294, "std": 0.04068783298134804, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20636627078056335, "max": 0.2197655737400055, "mean": 3.1909676181385294e-05, "std": 0.038298994302749634, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13777659833431244, "max": 0.11261031776666641, "mean": 2.2643122065346688e-05, "std": 0.025812044739723206, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.40279680490493774, "max": 0.3708725571632385, "mean": 2.5475083020864986e-05, "std": 0.03817913681268692, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.770826816558838, "max": 2.8686459064483643, "mean": 0.001154756173491478, "std": 0.5168185234069824, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20366711914539337, "max": 0.1976872831583023, "mean": 2.9746484869974665e-05, "std": 0.03429698571562767, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.050587497651576996, "max": 0.039878759533166885, "mean": -0.00042467116145417094, "std": 0.013416356407105923, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19594806432724, "max": 0.20180270075798035, "mean": -1.2511954992078245e-05, "std": 0.031805265694856644, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.1929892897605896, "max": 0.19512949883937836, "mean": -0.002963980659842491, "std": 0.06252874433994293, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.3488827645778656, "max": 1.0837209224700928, "mean": 0.6670882701873779, "std": 0.05524449050426483, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22563330829143524, "max": 0.25133612751960754, "mean": 0.00035861917422153056, "std": 0.040758710354566574, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09100860357284546, "max": 0.04368036612868309, "mean": -0.03007863275706768, "std": 0.01761433854699135, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.35325002670288086, "max": 0.3038857877254486, "mean": -4.542069655144587e-05, "std": 0.037121765315532684, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16173334419727325, "max": 0.06341976672410965, "mean": -7.59128452045843e-05, "std": 0.019423963502049446, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.348746657371521, "max": 0.7219499945640564, "mean": 0.5423322916030884, "std": 0.03906194120645523, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.21932680904865265, "max": 0.22335435450077057, "mean": -1.1452927537902724e-05, "std": 0.03923005238175392, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11840008199214935, "max": 0.1704910695552826, "mean": 0.00028676993679255247, "std": 0.025109266862273216, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24656711518764496, "max": 0.30068346858024597, "mean": -3.68916334991809e-05, "std": 0.03892939165234566, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.504953145980835, "max": 3.7143990993499756, "mean": 0.015847017988562584, "std": 0.7823704481124878, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21910522878170013, "max": 0.23737633228302002, "mean": -1.3034959920332767e-05, "std": 0.036302801221609116, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04721483215689659, "max": 0.051370855420827866, "mean": 0.00048040057299658656, "std": 0.013522167690098286, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.2142011672258377, "max": 0.21717870235443115, "mean": 5.644252087222412e-05, "std": 0.03361529856920242, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21134759485721588, "max": 0.23112934827804565, "mean": -0.005099965259432793, "std": 0.061861325055360794, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.361937016248703, "max": 1.1009857654571533, "mean": 0.6992422342300415, "std": 0.053594909608364105, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.2350708544254303, "max": 0.24471336603164673, "mean": 0.00046341665438376367, "std": 0.041268061846494675, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.0980960875749588, "max": 0.06807035952806473, "mean": -0.03142966330051422, "std": 0.018127702176570892, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.30174583196640015, "max": 0.3516803979873657, "mean": -8.28510383144021e-05, "std": 0.04027377441525459, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.1523003727197647, "max": 0.1496732383966446, "mean": 0.00026386568788439035, "std": 0.023037536069750786, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9992449879646301, "max": 1.001513123512268, "mean": 1.0000585317611694, "std": 0.0006324834539555013, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.031258270144462585, "max": 0.031254518777132034, "mean": -1.929036807268858e-05, "std": 0.018040649592876434, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.03122791275382042, "max": 0.030987516045570374, "mean": -0.0010841463226824999, "std": 0.01795026659965515, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.03125518560409546, "max": 0.0312589630484581, "mean": 3.5481098166201264e-06, "std": 0.018041057512164116, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.031153831630945206, "max": 0.03117419220507145, "mean": 0.00033391290344297886, "std": 0.018062464892864227, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.0006552772247232497, "max": 0.0007129037985578179, "mean": 5.131376383360475e-06, "std": 0.0001946619595400989, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.997419536113739, "max": 1.0028407573699951, "mean": 0.9999656081199646, "std": 0.000851841235999018, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.03356073051691055, "max": 0.03384723141789436, "mean": -5.6891162785177585e-06, "std": 0.018047483637928963, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.03327289596199989, "max": 0.03337877616286278, "mean": -0.00020134463557042181, "std": 0.017954064533114433, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.001495416508987546, "max": 0.0016743302112445235, "mean": 2.175480403820984e-06, "std": 0.00029829132836312056, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.0005666155484504998, "max": 0.0007540585356764495, "mean": 8.17788895801641e-06, "std": 0.00017612945521250367, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.3832930624485016, "max": 0.7191212773323059, "mean": 0.5806662440299988, "std": 0.03885548189282417, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.239033043384552, "max": 0.19648200273513794, "mean": 2.5991641450673342e-05, "std": 0.03746527060866356, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11883819848299026, "max": 0.1667412370443344, "mean": 0.0009821474086493254, "std": 0.02755241096019745, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.24662744998931885, "max": 0.4999285340309143, "mean": -5.0414026190992445e-05, "std": 0.037622544914484024, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.941795825958252, "max": 3.768937587738037, "mean": -0.0035722628235816956, "std": 0.681327760219574, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22736430168151855, "max": 0.25185492634773254, "mean": -1.1772199286497198e-05, "std": 0.037433888763189316, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07156982272863388, "max": 0.08060310035943985, "mean": -0.0005125089664943516, "std": 0.01565583609044552, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22800227999687195, "max": 0.25769373774528503, "mean": -2.863763802452013e-05, "std": 0.035420775413513184, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.20050473511219025, "max": 0.2148960828781128, "mean": -0.005524474661797285, "std": 0.06832842528820038, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.405087411403656, "max": 1.1892733573913574, "mean": 0.7378814816474915, "std": 0.05523177236318588, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.2209046483039856, "max": 0.24561487138271332, "mean": 0.000521098030731082, "std": 0.041335128247737885, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.1032090112566948, "max": 0.02416798658668995, "mean": -0.032665450125932693, "std": 0.018891815096139908, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.4496724605560303, "max": 0.4224262237548828, "mean": -0.0004358820151537657, "std": 0.04689519852399826, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.2515088617801666, "max": 0.47011902928352356, "mean": 0.003207466099411249, "std": 0.044524550437927246, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.3168761134147644, "max": 0.3331414461135864, "mean": -2.506819146219641e-05, "std": 0.02128741703927517, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.3245299160480499, "max": 0.6855776906013489, "mean": 0.5709930658340454, "std": 0.04470643773674965, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.1645486205816269, "max": 0.1745065301656723, "mean": -4.8789879656396806e-05, "std": 0.03318168222904205, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.18692335486412048, "max": 0.14329002797603607, "mean": 3.758035018108785e-05, "std": 0.029700448736548424, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.3810470402240753, "max": 0.24586895108222961, "mean": -9.737135769682936e-06, "std": 0.03276293724775314, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6554298400878906, "max": 3.2897167205810547, "mean": -0.014251163229346275, "std": 0.9850608110427856, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23475222289562225, "max": 0.2473384439945221, "mean": -1.814275310607627e-05, "std": 0.041697416454553604, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.0725652277469635, "max": 0.15448249876499176, "mean": 0.0006658083875663579, "std": 0.02517012506723404, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.2663499712944031, "max": 0.2480984330177307, "mean": -1.5296925994334742e-05, "std": 0.04013863205909729, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18960432708263397, "max": 0.194618359208107, "mean": -0.0012379353865981102, "std": 0.06668508052825928, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32916781306266785, "max": 0.9996783137321472, "mean": 0.7191422581672668, "std": 0.0523388646543026, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23172040283679962, "max": 0.2451343685388565, "mean": 0.00018265256949234754, "std": 0.04089942201972008, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11451541632413864, "max": 0.01910208724439144, "mean": -0.04247751086950302, "std": 0.0188636165112257, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.38971978425979614, "max": 0.40751317143440247, "mean": -2.1620868210447952e-05, "std": 0.04853251948952675, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6930332779884338, "max": 0.4125932455062866, "mean": 0.0008482532575726509, "std": 0.06028350815176964, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.0015386008890345693, "max": 1.0007996559143066, "mean": 0.00048813552712090313, "std": 0.022089246660470963, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9992700219154358, "max": 1.0015240907669067, "mean": 1.0000568628311157, "std": 0.000619773636572063, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.031252991408109665, "max": 0.031256891787052155, "mean": -2.1020092390244827e-05, "std": 0.01803199015557766, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.03121490404009819, "max": 0.03123173676431179, "mean": -0.0006769870524294674, "std": 0.01782653108239174, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.0312560498714447, "max": 0.03126147389411926, "mean": -8.831357263261452e-06, "std": 0.01803101785480976, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.031231652945280075, "max": 0.031244346871972084, "mean": -0.0007297407719306648, "std": 0.01794145628809929, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.0005350728752091527, "max": 0.0004281355068087578, "mean": -3.930799721274525e-06, "std": 0.00015574153803754598, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9972792267799377, "max": 1.0023835897445679, "mean": 0.9995018243789673, "std": 0.0008350047282874584, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.03338531777262688, "max": 0.03282884135842323, "mean": -2.971738467749674e-06, "std": 0.018026772886514664, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.03250397369265556, "max": 0.031224608421325684, "mean": -0.0005561817670240998, "std": 0.01803283393383026, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.001761053572408855, "max": 0.0016201753169298172, "mean": -9.977067065847223e-07, "std": 0.00029509843443520367, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.0005179685540497303, "max": 0.00046010586083866656, "mean": -3.1889690035313834e-06, "std": 0.00014008936705067754, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23426799476146698, "max": 0.2724316120147705, "mean": 6.618206498387735e-06, "std": 0.01881008967757225, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.32140958309173584, "max": 0.6938180923461914, "mean": 0.58160400390625, "std": 0.045936692506074905, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18184486031532288, "max": 0.19783763587474823, "mean": -1.1537180398590863e-05, "std": 0.03318366780877113, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16044476628303528, "max": 0.12933249771595, "mean": -0.001071967650204897, "std": 0.03413407504558563, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.33228737115859985, "max": 0.31113728880882263, "mean": -1.0175894203712232e-05, "std": 0.03223416581749916, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.80244779586792, "max": 8.761518478393555, "mean": 0.093451589345932, "std": 1.619434118270874, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23388099670410156, "max": 0.2418091893196106, "mean": 4.1715411498444155e-05, "std": 0.04085543006658554, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07592413574457169, "max": 0.06573085486888885, "mean": 0.00048532572691328824, "std": 0.019415952265262604, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.2459113746881485, "max": 0.23399382829666138, "mean": -3.2584175642114133e-06, "std": 0.039430178701877594, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.1629519760608673, "max": 0.16087952256202698, "mean": 0.0016248535830527544, "std": 0.06528551876544952, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5571001172065735, "max": 0.9435561299324036, "mean": 0.712803840637207, "std": 0.040119532495737076, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.2279409021139145, "max": 0.25474709272384644, "mean": -4.549993900582194e-05, "std": 0.040573619306087494, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13481706380844116, "max": 0.02219359762966633, "mean": -0.041350673884153366, "std": 0.018385522067546844, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.42158395051956177, "max": 0.3924521505832672, "mean": -4.16895818489138e-06, "std": 0.047782838344573975, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.607164204120636, "max": 0.6512984037399292, "mean": 0.0015855339588597417, "std": 0.056834105402231216, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.25181475281715393, "max": 0.32078737020492554, "mean": -6.139540346339345e-06, "std": 0.019613103941082954, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.3595266342163086, "max": 0.6821960806846619, "mean": 0.5706722140312195, "std": 0.042985353618860245, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.2202295958995819, "max": 0.177076518535614, "mean": -3.443878813413903e-05, "std": 0.03429801017045975, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16317804157733917, "max": 0.23287786543369293, "mean": 0.00035837513860315084, "std": 0.03280922770500183, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.2639525532722473, "max": 0.23980671167373657, "mean": -5.297175084706396e-05, "std": 0.03389657661318779, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.854193210601807, "max": 5.090420722961426, "mean": 0.043878111988306046, "std": 1.2290726900100708, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24640515446662903, "max": 0.250241219997406, "mean": 7.21166143193841e-05, "std": 0.043985553085803986, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.06247914582490921, "max": 0.054487086832523346, "mean": 0.0006464287871494889, "std": 0.017190182581543922, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.2863953709602356, "max": 0.27215418219566345, "mean": -5.014354974264279e-05, "std": 0.0429837629199028, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16105736792087555, "max": 0.17032958567142487, "mean": -0.0028887835796922445, "std": 0.05930224433541298, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5198467373847961, "max": 0.9329147338867188, "mean": 0.7133820652961731, "std": 0.03842068091034889, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23785468935966492, "max": 0.2487422525882721, "mean": 0.00046461093006655574, "std": 0.04045235738158226, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14500072598457336, "max": 0.04102769121527672, "mean": -0.039694253355264664, "std": 0.020542506128549576, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.532442569732666, "max": 0.5823614597320557, "mean": 6.013309757690877e-06, "std": 0.04885788634419441, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5189021229743958, "max": 0.4934021234512329, "mean": 0.0023652694653719664, "std": 0.05344180017709732, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.2737113833427429, "max": 0.3155929148197174, "mean": 1.988332769542467e-06, "std": 0.020049693062901497, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.3658766746520996, "max": 0.7116788029670715, "mean": 0.5931248664855957, "std": 0.04595986381173134, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21088893711566925, "max": 0.19901061058044434, "mean": 3.061449388042092e-05, "std": 0.0348670557141304, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18707768619060516, "max": 0.20344795286655426, "mean": 0.0009536991128697991, "std": 0.03149910271167755, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.2897132933139801, "max": 0.3398728668689728, "mean": -4.695481766248122e-05, "std": 0.034587565809488297, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.8768022060394287, "max": 3.386897563934326, "mean": 0.014455738477408886, "std": 0.8582935929298401, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.22446562349796295, "max": 0.24974551796913147, "mean": -3.865096914523747e-06, "std": 0.042228855192661285, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.055283673107624054, "max": 0.046579472720623016, "mean": -2.0229621441103518e-05, "std": 0.015845011919736862, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2932588756084442, "max": 0.29019662737846375, "mean": -7.67192614148371e-06, "std": 0.04194393754005432, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12487518787384033, "max": 0.2589555084705353, "mean": -0.0032450095750391483, "std": 0.053175244480371475, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.45627039670944214, "max": 0.8444806933403015, "mean": 0.7054478526115417, "std": 0.03522774204611778, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.512130856513977, "max": 0.34817978739738464, "mean": 0.00034297071397304535, "std": 0.040197573602199554, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18561507761478424, "max": 0.039553456008434296, "mean": -0.039388205856084824, "std": 0.02135956473648548, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5439714193344116, "max": 0.5556594729423523, "mean": -7.099103095242754e-05, "std": 0.050732966512441635, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5116639137268066, "max": 0.6642246842384338, "mean": 0.002442360855638981, "std": 0.04952433332800865, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.33249908685684204, "max": 0.2653781771659851, "mean": 3.2569464565312956e-06, "std": 0.019386788830161095, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.3219698965549469, "max": 0.766376256942749, "mean": 0.651033878326416, "std": 0.04532676190137863, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.2498074471950531, "max": 0.21987499296665192, "mean": -1.9507724573486485e-06, "std": 0.036501552909612656, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.3268783390522003, "max": 0.2866748869419098, "mean": -0.0006870508659631014, "std": 0.03855406492948532, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.3101723790168762, "max": 0.37016358971595764, "mean": 6.504941848106682e-05, "std": 0.03624220937490463, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.7166595458984375, "max": 5.806900978088379, "mean": 0.03795350342988968, "std": 1.4129759073257446, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.22155693173408508, "max": 0.2057628631591797, "mean": -7.524936518166214e-05, "std": 0.042484089732170105, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07764487713575363, "max": 0.051462698727846146, "mean": -0.000925063737668097, "std": 0.0164109468460083, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.33050650358200073, "max": 0.329324871301651, "mean": -4.5611386667587794e-06, "std": 0.042790405452251434, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2847575545310974, "max": 0.11197607964277267, "mean": -0.0012040773872286081, "std": 0.04701252654194832, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.48601120710372925, "max": 0.8868346214294434, "mean": 0.7373513579368591, "std": 0.038241803646087646, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3624517619609833, "max": 0.27458682656288147, "mean": 5.118873013998382e-05, "std": 0.040643129497766495, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.24757687747478485, "max": 0.046393755823373795, "mean": -0.039262838661670685, "std": 0.023290209472179413, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.626139223575592, "max": 0.5965114235877991, "mean": -6.056673373677768e-05, "std": 0.0531148836016655, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7093748450279236, "max": 0.2657814621925354, "mean": 0.0009187416289933026, "std": 0.05122179910540581, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3433896005153656, "max": 0.3037145733833313, "mean": 3.0547948881576303e-07, "std": 0.019135164096951485, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.34973248839378357, "max": 0.7829060554504395, "mean": 0.6387954354286194, "std": 0.049250222742557526, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20535039901733398, "max": 0.20685911178588867, "mean": -5.973261431790888e-05, "std": 0.03769532963633537, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.25850412249565125, "max": 0.2679128050804138, "mean": -0.00040441699093207717, "std": 0.044591374695301056, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.354056179523468, "max": 0.3223519027233124, "mean": -6.86804014549125e-06, "std": 0.03720388934016228, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.260861873626709, "max": 4.203889846801758, "mean": -0.02641155757009983, "std": 1.0066218376159668, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.23860004544258118, "max": 0.24336647987365723, "mean": -2.503740142856259e-05, "std": 0.043208908289670944, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06237001344561577, "max": 0.05677289888262749, "mean": 0.0003429377684369683, "std": 0.014151404611766338, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.43683916330337524, "max": 0.37347522377967834, "mean": 1.453105596738169e-05, "std": 0.04412021487951279, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.096480593085289, "max": 0.17590999603271484, "mean": -0.0006604294758290052, "std": 0.03515587002038956, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4216778874397278, "max": 1.0693583488464355, "mean": 0.7482997179031372, "std": 0.04205985367298126, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.2665577530860901, "max": 0.2968434989452362, "mean": -7.962346717249602e-05, "std": 0.040803126990795135, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18574897944927216, "max": 0.04386778548359871, "mean": -0.036819178611040115, "std": 0.02561137080192566, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.45699048042297363, "max": 0.4864794611930847, "mean": 4.341273597674444e-05, "std": 0.05420761927962303, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.28645777702331543, "max": 0.5512458086013794, "mean": -0.0008799894712865353, "std": 0.04782594367861748, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.29278504848480225, "max": 0.32276028394699097, "mean": 6.534221029141918e-06, "std": 0.019969386979937553, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.29091978073120117, "max": 0.760124921798706, "mean": 0.6508240699768066, "std": 0.05213485658168793, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.24355527758598328, "max": 0.2617471516132355, "mean": -6.045864211046137e-06, "std": 0.03961271047592163, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2675487995147705, "max": 0.19986717402935028, "mean": -0.0008803302189335227, "std": 0.051758527755737305, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.2720382511615753, "max": 0.25365304946899414, "mean": 3.97135409002658e-06, "std": 0.03870992362499237, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.963478088378906, "max": 15.945467948913574, "mean": 0.03322439640760422, "std": 1.988944411277771, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.20726989209651947, "max": 0.2258823961019516, "mean": -7.221873966045678e-05, "std": 0.04055318236351013, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06934336572885513, "max": 0.06329023838043213, "mean": 0.00015188338875304908, "std": 0.014744000509381294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.46502813696861267, "max": 0.3207668662071228, "mean": 1.9557133782655e-05, "std": 0.04058815911412239, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06403840333223343, "max": 0.11518330872058868, "mean": 0.001191072165966034, "std": 0.02470429427921772, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.3746289610862732, "max": 0.9322671294212341, "mean": 0.7508296370506287, "std": 0.040182456374168396, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.2793700397014618, "max": 0.27312716841697693, "mean": -0.00016854800924193114, "std": 0.040993720293045044, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19878964126110077, "max": 0.050874363631010056, "mean": -0.03202495723962784, "std": 0.02511216513812542, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6572921276092529, "max": 0.5353701114654541, "mean": -4.860567787545733e-05, "std": 0.052844274789094925, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.19308353960514069, "max": 0.5820099115371704, "mean": -0.0005148603231646121, "std": 0.04106666147708893, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.41772764921188354, "max": 0.3719545602798462, "mean": 6.02346335654147e-06, "std": 0.021620826795697212, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21424666047096252, "max": 0.7470943331718445, "mean": 0.6495506763458252, "std": 0.05437405779957771, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.2095523476600647, "max": 0.19568544626235962, "mean": 4.010393604403362e-05, "std": 0.03946491330862045, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.32928818464279175, "max": 0.2594093382358551, "mean": -0.0032241325825452805, "std": 0.05625630542635918, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.2056845873594284, "max": 0.254710853099823, "mean": 5.4258445743471384e-05, "std": 0.038567040115594864, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.242719650268555, "max": 6.931571006774902, "mean": 0.04833323508501053, "std": 1.384921908378601, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.20961354672908783, "max": 0.2300715446472168, "mean": -5.3330231821746565e-06, "std": 0.04131212830543518, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.04391402378678322, "max": 0.03599291667342186, "mean": 3.6780984373763204e-06, "std": 0.012800832279026508, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.39794921875, "max": 0.34475040435791016, "mean": -5.557174881687388e-05, "std": 0.0423884317278862, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.055058449506759644, "max": 0.06288675218820572, "mean": 0.0003690638695843518, "std": 0.018671618774533272, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.3500124216079712, "max": 1.0451101064682007, "mean": 0.789310097694397, "std": 0.048743680119514465, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.33340734243392944, "max": 0.3858667314052582, "mean": -0.00016963679809123278, "std": 0.04147941246628761, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15730711817741394, "max": 0.05913476645946503, "mean": -0.031834498047828674, "std": 0.025142161175608635, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6963925361633301, "max": 0.46865832805633545, "mean": -9.133096318691969e-05, "std": 0.05179010331630707, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.248288094997406, "max": 0.3285192847251892, "mean": -0.0002480646944604814, "std": 0.04143183305859566, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.2872416079044342, "max": 0.35022279620170593, "mean": -2.109378556269803e-06, "std": 0.024238325655460358, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19658105075359344, "max": 0.7791422605514526, "mean": 0.6702942848205566, "std": 0.0586935319006443, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.22860872745513916, "max": 0.2311849147081375, "mean": -1.9817682186840102e-05, "std": 0.04044090211391449, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.21965830028057098, "max": 0.2406904250383377, "mean": 0.0007772702374495566, "std": 0.05579812079668045, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21554625034332275, "max": 0.2266112118959427, "mean": -7.155907223932445e-05, "std": 0.03937710076570511, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.904163360595703, "max": 9.067035675048828, "mean": -0.001250317320227623, "std": 1.848069429397583, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.26928046345710754, "max": 0.2589084208011627, "mean": 4.358497244538739e-05, "std": 0.03840699419379234, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05760843679308891, "max": 0.057633914053440094, "mean": 0.0003498811274766922, "std": 0.014721624553203583, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.265085905790329, "max": 0.2886793613433838, "mean": -6.175917224027216e-05, "std": 0.03907330706715584, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.043753523379564285, "max": 0.03726416453719139, "mean": -8.701729530002922e-05, "std": 0.013365592807531357, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.3394947946071625, "max": 1.092633843421936, "mean": 0.8636797666549683, "std": 0.06384899467229843, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.42328590154647827, "max": 0.4191039204597473, "mean": 0.0003126378287561238, "std": 0.043501876294612885, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.2147369235754013, "max": 0.17059248685836792, "mean": -0.029485618695616722, "std": 0.03195330873131752, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5996397733688354, "max": 0.5595637559890747, "mean": -0.00015250420256052166, "std": 0.05344444885849953, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17845340073108673, "max": 0.37662389874458313, "mean": 0.0013645882718265057, "std": 0.037309858947992325, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.3942789137363434, "max": 0.36899739503860474, "mean": 3.645062679424882e-05, "std": 0.028621336445212364, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2902868390083313, "max": 0.8265326619148254, "mean": 0.7055679559707642, "std": 0.0678958147764206, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.926041305065155, "max": 1.026432991027832, "mean": -2.5475666916463524e-05, "std": 0.0476241335272789, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.87814861536026, "max": 0.8150070905685425, "mean": -0.00031320619746111333, "std": 0.09553563594818115, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.2693868577480316, "max": 0.24089287221431732, "mean": -2.29374309128616e-05, "std": 0.03895637392997742, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.73939323425293, "max": 22.84785270690918, "mean": -0.0918712168931961, "std": 4.0697784423828125, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22775250673294067, "max": 0.24510256946086884, "mean": -2.5825131160672754e-05, "std": 0.03863884136080742, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.06045493483543396, "max": 0.04607832431793213, "mean": -0.00014694462879560888, "std": 0.01469829585403204, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.33846479654312134, "max": 0.37447792291641235, "mean": 7.293592716450803e-06, "std": 0.04081470146775246, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.04649795591831207, "max": 0.19573213160037994, "mean": 0.00027208085521124303, "std": 0.013573010452091694, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.37458330392837524, "max": 1.1300410032272339, "mean": 0.8900002241134644, "std": 0.06398438662290573, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.4478272497653961, "max": 0.5424814224243164, "mean": 2.45622759393882e-05, "std": 0.045566376298666, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22404029965400696, "max": 0.08835332095623016, "mean": -0.032017190009355545, "std": 0.03776315227150917, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7251995801925659, "max": 0.6892821788787842, "mean": 3.438512794673443e-05, "std": 0.05177679285407066, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.1745474934577942, "max": 0.2185421884059906, "mean": 4.038875340484083e-05, "std": 0.03178102895617485, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.3403666019439697, "max": 0.3743104040622711, "mean": 4.2970114009222016e-05, "std": 0.03414527699351311, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.31756705045700073, "max": 1.2868698835372925, "mean": 0.6014533042907715, "std": 0.08345934003591537, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.28337857127189636, "max": 0.26026472449302673, "mean": -3.1064557788340608e-06, "std": 0.03598480299115181, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.23555569350719452, "max": 0.2053573727607727, "mean": 0.0002324726083315909, "std": 0.05600997060537338, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.4354943335056305, "max": 0.3252315819263458, "mean": 2.4552073227823712e-05, "std": 0.03413620963692665, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.544710159301758, "max": 7.31260871887207, "mean": -0.007366638630628586, "std": 0.6992178559303284, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.34383082389831543, "max": 0.3635445833206177, "mean": 0.00010339185246266425, "std": 0.04782695323228836, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07375096529722214, "max": 0.06034737080335617, "mean": 0.000933139817789197, "std": 0.014950517565011978, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.2554619610309601, "max": 0.28651097416877747, "mean": 4.460267518879846e-06, "std": 0.04155408963561058, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.055337581783533096, "max": 0.06284268200397491, "mean": 0.00014179576828610152, "std": 0.007177725899964571, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.4937240481376648, "max": 1.2209070920944214, "mean": 1.01340913772583, "std": 0.11743401736021042, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0935479402542114, "max": 1.0468977689743042, "mean": -4.9845290050143376e-05, "std": 0.05240994319319725, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22365206480026245, "max": 0.17271095514297485, "mean": -0.027249177917838097, "std": 0.03635435923933983, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8846310973167419, "max": 0.9225372672080994, "mean": -0.00014597778499592096, "std": 0.053280774503946304, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17102883756160736, "max": 0.3799268901348114, "mean": 0.0033686391543596983, "std": 0.039900682866573334, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7772161960601807, "max": 0.7236161828041077, "mean": 1.9240971596445888e-05, "std": 0.04616595432162285, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.33854806423187256, "max": 1.4277222156524658, "mean": 0.9483012557029724, "std": 0.20673148334026337, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7455986738204956, "max": 1.7045377492904663, "mean": 0.00022702554997522384, "std": 0.15868352353572845, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.199636459350586, "max": 1.0996308326721191, "mean": -0.009536425583064556, "std": 0.20382796227931976, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4213047921657562, "max": 0.4262976348400116, "mean": 6.459288124460727e-05, "std": 0.04801792651414871, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.743492126464844, "max": 19.538597106933594, "mean": -0.24829509854316711, "std": 4.776083946228027, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.3239092528820038, "max": 0.43836328387260437, "mean": -1.204050931846723e-05, "std": 0.046160612255334854, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.0340789370238781, "max": 0.03713114559650421, "mean": 0.0006417044205591083, "std": 0.012921737506985664, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7034957408905029, "max": 0.664257287979126, "mean": 4.352344694780186e-05, "std": 0.05788278207182884, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07222186028957367, "max": 0.06749024242162704, "mean": -0.00013264152221381664, "std": 0.012920759618282318, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.38012510538101196, "max": 1.3909755945205688, "mean": 1.0665355920791626, "std": 0.21970459818840027, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6164048314094543, "max": 0.7170195579528809, "mean": 0.00011136491957586259, "std": 0.05802035331726074, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21974356472492218, "max": 0.22506725788116455, "mean": 0.006242978852242231, "std": 0.04973088204860687, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6296619176864624, "max": 0.8891851902008057, "mean": 1.1489293683553115e-05, "std": 0.023526353761553764, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5068330764770508, "max": 0.4739985764026642, "mean": -0.0030159270390868187, "std": 0.06930534541606903, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5377116799354553, "max": 1.180783748626709, "mean": 0.7827296257019043, "std": 0.09886873513460159, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.2669491767883301, "max": 0.21265925467014313, "mean": -0.00022343886666931212, "std": 0.05399514362215996, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23786094784736633, "max": 0.014840648509562016, "mean": -0.04396260902285576, "std": 0.034334905445575714, "sparsity": 0.0, "shape": [ 100 ] } } }