{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.43111443519592285, "max": 0.2988463342189789, "mean": -0.0025462331250309944, "std": 0.04255734384059906, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.06311740726232529, "max": 0.10821832716464996, "mean": 0.0006233985768631101, "std": 0.03409506380558014, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.41270628571510315, "max": 0.8365904092788696, "mean": -0.0002062078274320811, "std": 0.024108584970235825, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11594842374324799, "max": 0.323304146528244, "mean": -0.0009396584937348962, "std": 0.019620178267359734, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.8046321868896484, "max": 2.8845088481903076, "mean": -0.00036305765388533473, "std": 0.615403413772583, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.2803097069263458, "max": 0.3821697235107422, "mean": 0.0004250165948178619, "std": 0.042748384177684784, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22351907193660736, "max": 0.21069680154323578, "mean": -0.004498748108744621, "std": 0.04097301885485649, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4281409978866577, "max": 0.47565823793411255, "mean": 3.041478066734271e-06, "std": 0.024508286267518997, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.32690364122390747, "max": 0.15677706897258759, "mean": -0.04671286791563034, "std": 0.05161474645137787, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.41106897592544556, "max": 0.3550392687320709, "mean": -0.00012950549717061222, "std": 0.023600473999977112, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.23076964914798737, "max": 0.2638300061225891, "mean": -0.029151970520615578, "std": 0.049401458352804184, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.25456827878952026, "max": 0.8219638466835022, "mean": 0.525442898273468, "std": 0.08086482435464859, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.2974269390106201, "max": 0.26618602871894836, "mean": -0.0004250289057381451, "std": 0.0321008674800396, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09282378107309341, "max": 0.12510952353477478, "mean": 0.0006503364420495927, "std": 0.025732681155204773, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.29088306427001953, "max": 0.28188201785087585, "mean": -7.563710096292198e-05, "std": 0.030931729823350906, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.909866809844971, "max": 5.824496746063232, "mean": -0.009385589510202408, "std": 1.2966406345367432, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.4253852665424347, "max": 0.34430131316185, "mean": 9.75119328359142e-05, "std": 0.02995217591524124, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.028903231024742126, "max": 0.027659673243761063, "mean": -0.00031527443206869066, "std": 0.012571859173476696, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.45454347133636475, "max": 0.44891107082366943, "mean": 2.3480326490243897e-05, "std": 0.023853568360209465, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08878406882286072, "max": 0.09124661237001419, "mean": 0.002279076725244522, "std": 0.019516194239258766, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.2667350471019745, "max": 1.0590577125549316, "mean": 0.5311722159385681, "std": 0.10455667227506638, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5753205418586731, "max": 0.6092038154602051, "mean": -0.0004317538405302912, "std": 0.038596246391534805, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18242540955543518, "max": 0.04575135558843613, "mean": -0.02945941686630249, "std": 0.04261056333780289, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.167878270149231, "max": 1.6351370811462402, "mean": 0.00032057490898296237, "std": 0.02769383229315281, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.1625949591398239, "max": 0.2059435099363327, "mean": -0.02112039364874363, "std": 0.027941575273871422, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22422762215137482, "max": 0.8458681702613831, "mean": 0.4875890910625458, "std": 0.07528901100158691, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.2560153305530548, "max": 0.3063727021217346, "mean": -8.626433555036783e-06, "std": 0.033470120280981064, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09546571969985962, "max": 0.11066073924303055, "mean": 5.8840945712290704e-05, "std": 0.026972563937306404, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.2978975474834442, "max": 0.29693126678466797, "mean": 5.199259248911403e-05, "std": 0.03254008665680885, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.169106960296631, "max": 5.089260578155518, "mean": -0.014622640796005726, "std": 1.1580101251602173, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.3452591896057129, "max": 0.3437287509441376, "mean": 7.87251628935337e-05, "std": 0.030058259144425392, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.03609376400709152, "max": 0.03314271569252014, "mean": -0.00014089577598497272, "std": 0.013021372258663177, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.3159167468547821, "max": 0.37570273876190186, "mean": -2.126370236510411e-05, "std": 0.024055330082774162, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10549593716859818, "max": 0.1221165731549263, "mean": -0.0019639446400105953, "std": 0.028849009424448013, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.3116210103034973, "max": 1.1235315799713135, "mean": 0.6662613153457642, "std": 0.09780054539442062, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.872847855091095, "max": 0.6278241872787476, "mean": 0.0016755674732849002, "std": 0.047437313944101334, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.2716394066810608, "max": 0.03413696587085724, "mean": -0.0466003455221653, "std": 0.04061445966362953, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9222021102905273, "max": 0.9650114178657532, "mean": 0.0010224997531622648, "std": 0.04070303216576576, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14480018615722656, "max": 0.07504245638847351, "mean": -0.00909046083688736, "std": 0.025704393163323402, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.23979389667510986, "max": 0.7145018577575684, "mean": 0.4472465217113495, "std": 0.059433478862047195, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.2733098268508911, "max": 0.2983761131763458, "mean": 9.066419806913473e-06, "std": 0.03547072410583496, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11928554624319077, "max": 0.11867407709360123, "mean": 0.0007565614068880677, "std": 0.02763325348496437, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.28173530101776123, "max": 0.2804112136363983, "mean": -7.68975296523422e-05, "std": 0.03510041534900665, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.51193904876709, "max": 2.5239455699920654, "mean": 0.026779357343912125, "std": 0.5869050621986389, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.2215055674314499, "max": 0.2721182703971863, "mean": 2.8998874768149108e-06, "std": 0.030730824917554855, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.03334304690361023, "max": 0.031320393085479736, "mean": 0.00011074724898207933, "std": 0.012403324246406555, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23567309975624084, "max": 0.2320062220096588, "mean": 5.707715899916366e-05, "std": 0.025695981457829475, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13582320511341095, "max": 0.1279149055480957, "mean": -0.005496869329363108, "std": 0.03996486961841583, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.3545507788658142, "max": 1.1755321025848389, "mean": 0.7105286121368408, "std": 0.10380106419324875, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.618323802947998, "max": 0.5557036995887756, "mean": 0.0011603902094066143, "std": 0.046115029603242874, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.18935386836528778, "max": 0.024935415014624596, "mean": -0.03484790399670601, "std": 0.028624996542930603, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1329621076583862, "max": 0.9724080562591553, "mean": 0.00035803488572128117, "std": 0.042342979460954666, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5985916256904602, "max": 0.06294681131839752, "mean": -0.0048767137341201305, "std": 0.028625035658478737, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.37523797154426575, "max": 0.9426477551460266, "mean": 0.5925332903862, "std": 0.06714636832475662, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3922964930534363, "max": 0.37001147866249084, "mean": 7.055637979647145e-05, "std": 0.03718561306595802, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11894690245389938, "max": 0.13649211823940277, "mean": 0.0009205802925862372, "std": 0.029216548427939415, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.6203529834747314, "max": 0.509852409362793, "mean": 1.5258530766004696e-05, "std": 0.03643907234072685, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.195601463317871, "max": 8.798324584960938, "mean": -0.10935366153717041, "std": 1.6999714374542236, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.27709993720054626, "max": 0.24029740691184998, "mean": 5.252830669633113e-05, "std": 0.032612841576337814, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.05198528617620468, "max": 0.03960206359624863, "mean": 8.789013372734189e-05, "std": 0.012959298677742481, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23129259049892426, "max": 0.23536467552185059, "mean": -2.1845989977009594e-05, "std": 0.029389241710305214, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.2045416533946991, "max": 0.10547658056020737, "mean": -0.004024041350930929, "std": 0.03263028338551521, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.33950191736221313, "max": 1.0151382684707642, "mean": 0.7007080316543579, "std": 0.09671688079833984, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5657932162284851, "max": 0.8349727988243103, "mean": 0.00041512559982948005, "std": 0.04229608178138733, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21222105622291565, "max": 0.030380746349692345, "mean": -0.03218400478363037, "std": 0.026512378826737404, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7566999793052673, "max": 0.7205860018730164, "mean": -1.3569264410762116e-05, "std": 0.036836523562669754, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.2636493444442749, "max": 0.10622138530015945, "mean": -0.0030191433615982533, "std": 0.0288657546043396, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.284244179725647, "max": 0.6968931555747986, "mean": 0.49943026900291443, "std": 0.046561453491449356, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.27927035093307495, "max": 0.23469851911067963, "mean": -0.00011116769746877253, "std": 0.038758207112550735, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15420791506767273, "max": 0.12671181559562683, "mean": -0.002232905477285385, "std": 0.03338504582643509, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.41528424620628357, "max": 0.6604220271110535, "mean": -1.9215509382775053e-05, "std": 0.03909698873758316, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.243428707122803, "max": 4.728596210479736, "mean": -0.020457647740840912, "std": 1.0080652236938477, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.24574802815914154, "max": 0.20800377428531647, "mean": 4.4111799070378765e-05, "std": 0.0339629240334034, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.03446226194500923, "max": 0.04489393159747124, "mean": -1.5458615962415934e-05, "std": 0.012629742734134197, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.2015937864780426, "max": 0.20673099160194397, "mean": -2.9244030884001404e-05, "std": 0.03102072887122631, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.20010024309158325, "max": 0.11358015239238739, "mean": -0.0029013892635703087, "std": 0.03451463207602501, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.36685705184936523, "max": 1.0600172281265259, "mean": 0.6705178022384644, "std": 0.06640052795410156, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.39914920926094055, "max": 0.5031230449676514, "mean": -3.865663893520832e-05, "std": 0.04113178327679634, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12865233421325684, "max": 0.026885882019996643, "mean": -0.030540671199560165, "std": 0.02188955619931221, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.4503399133682251, "max": 0.4341718554496765, "mean": 7.837524026399478e-05, "std": 0.03489154577255249, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.2677534520626068, "max": 0.07295451313257217, "mean": -0.0010977284982800484, "std": 0.023126663640141487, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.28732216358184814, "max": 0.687613844871521, "mean": 0.5245327353477478, "std": 0.047577910125255585, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22290916740894318, "max": 0.22416770458221436, "mean": 1.5896670447546057e-05, "std": 0.03894934430718422, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13659609854221344, "max": 0.10938586294651031, "mean": 0.0002443990088067949, "std": 0.029240434989333153, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.37579256296157837, "max": 0.43812817335128784, "mean": -9.537441655993462e-06, "std": 0.03928641602396965, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8499395847320557, "max": 5.004647254943848, "mean": 0.009758757427334785, "std": 0.8455180525779724, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.2236318439245224, "max": 0.22071507573127747, "mean": -4.0232407627627254e-07, "std": 0.034410055726766586, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.04383794590830803, "max": 0.03584868088364601, "mean": -0.00026072480250149965, "std": 0.012076611630618572, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21360361576080322, "max": 0.1891404688358307, "mean": -1.7133981600636616e-05, "std": 0.03153670206665993, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18102218210697174, "max": 0.12101027369499207, "mean": -0.002398766577243805, "std": 0.04126044735312462, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.422617107629776, "max": 0.9454182982444763, "mean": 0.6626853942871094, "std": 0.05683305859565735, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.3716322183609009, "max": 0.47696027159690857, "mean": -8.185259503079578e-05, "std": 0.040890805423259735, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.2088262289762497, "max": 0.027207661420106888, "mean": -0.03023664839565754, "std": 0.021368583664298058, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3415319621562958, "max": 0.735925555229187, "mean": 8.314158185385168e-05, "std": 0.034767184406518936, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.24044273793697357, "max": 0.05069386586546898, "mean": -0.0011902841506525874, "std": 0.020465629175305367, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.30604928731918335, "max": 0.6555026769638062, "mean": 0.5250788331031799, "std": 0.04609908536076546, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.3050762414932251, "max": 0.21783104538917542, "mean": 6.997165473876521e-05, "std": 0.039496470242738724, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.14947636425495148, "max": 0.13131970167160034, "mean": 0.00033609665115363896, "std": 0.03047223575413227, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.2578710615634918, "max": 0.20255950093269348, "mean": 3.1238341762218624e-05, "std": 0.03948673978447914, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.339573621749878, "max": 2.379251480102539, "mean": -0.02625335566699505, "std": 0.4500052034854889, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.1892782300710678, "max": 0.21099112927913666, "mean": 3.7314141081878915e-05, "std": 0.03479423746466637, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03169188275933266, "max": 0.03571836277842522, "mean": -0.00019686334417201579, "std": 0.012292133644223213, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.1888986974954605, "max": 0.17091436684131622, "mean": -6.82127574691549e-05, "std": 0.032170820981264114, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13952063024044037, "max": 0.13709284365177155, "mean": -0.0025128263514488935, "std": 0.0512898713350296, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4670536518096924, "max": 0.9585899710655212, "mean": 0.6689007878303528, "std": 0.05285040661692619, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.3248884379863739, "max": 0.3098326325416565, "mean": -1.0356043276260607e-06, "std": 0.04094681516289711, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12497521936893463, "max": 0.02554607018828392, "mean": -0.030699055641889572, "std": 0.019824611023068428, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.4409962594509125, "max": 0.44632241129875183, "mean": 9.430450154468417e-05, "std": 0.03512001410126686, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22476668655872345, "max": 0.051897041499614716, "mean": -0.0011790284188464284, "std": 0.018472088500857353, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.3393557369709015, "max": 0.7416696548461914, "mean": 0.5586937069892883, "std": 0.04142747446894646, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.2734062075614929, "max": 0.2793632745742798, "mean": 2.0294006390031427e-05, "std": 0.04105808213353157, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13707204163074493, "max": 0.14009879529476166, "mean": 0.0004904167726635933, "std": 0.02664206363260746, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.49139103293418884, "max": 0.35644298791885376, "mean": 8.893347694538534e-05, "std": 0.04069600626826286, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.2994801998138428, "max": 1.7469841241836548, "mean": -0.021084124222397804, "std": 0.500186562538147, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.2184700220823288, "max": 0.1981830596923828, "mean": -4.060107676195912e-05, "std": 0.03423382714390755, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.04127173125743866, "max": 0.03881501033902168, "mean": -0.00013771075464319438, "std": 0.012880227528512478, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17825232446193695, "max": 0.18374156951904297, "mean": 4.785084456671029e-05, "std": 0.031557004898786545, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.18023589253425598, "max": 0.18417657911777496, "mean": -0.002215688582509756, "std": 0.05483615770936012, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4742925763130188, "max": 1.0284452438354492, "mean": 0.6453101634979248, "std": 0.05053440108895302, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.27223968505859375, "max": 0.30990350246429443, "mean": 0.00011251836258452386, "std": 0.04068317264318466, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10583628714084625, "max": 0.02672600746154785, "mean": -0.02951621636748314, "std": 0.01793462224304676, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.339975506067276, "max": 0.3303821086883545, "mean": 5.460641114041209e-05, "std": 0.034413956105709076, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.1819038987159729, "max": 0.0424266941845417, "mean": -0.0010654201032593846, "std": 0.01721329055726528, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.3252944052219391, "max": 0.688383936882019, "mean": 0.5112100839614868, "std": 0.036942265927791595, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.2345394641160965, "max": 0.22607795894145966, "mean": -3.624632518040016e-05, "std": 0.039177343249320984, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11556069552898407, "max": 0.13209758698940277, "mean": 0.00015118884039111435, "std": 0.029196659103035927, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.3532617390155792, "max": 0.2856779992580414, "mean": 7.000558980507776e-06, "std": 0.0392458438873291, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.136237621307373, "max": 3.547076940536499, "mean": -0.011597944423556328, "std": 0.6828959584236145, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21137045323848724, "max": 0.20969942212104797, "mean": 3.464317342150025e-05, "std": 0.03448577970266342, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.03584721311926842, "max": 0.048106979578733444, "mean": 0.0007941541844047606, "std": 0.012865344993770123, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.2109234631061554, "max": 0.19350647926330566, "mean": -1.076167109204107e-06, "std": 0.03169678896665573, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.18694967031478882, "max": 0.17746947705745697, "mean": -0.002843617694452405, "std": 0.0586174838244915, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.474641889333725, "max": 1.0443058013916016, "mean": 0.6514294147491455, "std": 0.0498916432261467, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.24857543408870697, "max": 0.3296365737915039, "mean": 0.00018093036487698555, "std": 0.040571410208940506, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12483743578195572, "max": 0.024654541164636612, "mean": -0.030496058985590935, "std": 0.01760769635438919, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.4221171438694, "max": 0.4831203818321228, "mean": 1.3900153135182336e-06, "std": 0.03539836406707764, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15169401466846466, "max": 0.043601393699645996, "mean": 4.186587466392666e-05, "std": 0.014870981685817242, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.31570297479629517, "max": 0.6836181879043579, "mean": 0.5528991222381592, "std": 0.04067207872867584, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20685237646102905, "max": 0.22020350396633148, "mean": 3.1496565497945994e-05, "std": 0.038300175219774246, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.13801881670951843, "max": 0.1128397211432457, "mean": 1.9543484086170793e-05, "std": 0.02582789771258831, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.4035792350769043, "max": 0.37189632654190063, "mean": 2.57877072726842e-05, "std": 0.03818116337060928, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.776683807373047, "max": 2.873103380203247, "mean": 0.0011591403745114803, "std": 0.5172097086906433, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.20364898443222046, "max": 0.19804270565509796, "mean": 2.963895894936286e-05, "std": 0.03429786115884781, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.05086854100227356, "max": 0.03999151289463043, "mean": -0.00042562291491776705, "std": 0.01342119462788105, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19656670093536377, "max": 0.20230703055858612, "mean": -1.2472472008084878e-05, "std": 0.031806014478206635, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19329077005386353, "max": 0.1953459531068802, "mean": -0.002963340375572443, "std": 0.06254669278860092, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.3491152226924896, "max": 1.0867162942886353, "mean": 0.6672079563140869, "std": 0.055482182651758194, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22604526579380035, "max": 0.25199154019355774, "mean": 0.00035888003185391426, "std": 0.04076085984706879, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09107685089111328, "max": 0.043750207871198654, "mean": -0.030080880969762802, "std": 0.017612501978874207, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.354022353887558, "max": 0.3047710955142975, "mean": -4.505186007008888e-05, "std": 0.03712347894906998, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16208632290363312, "max": 0.06347470730543137, "mean": -7.683466537855566e-05, "std": 0.01941368170082569, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.34881117939949036, "max": 0.7244766354560852, "mean": 0.5423683524131775, "std": 0.039119552820920944, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.21985284984111786, "max": 0.22366879880428314, "mean": -1.1181864465470426e-05, "std": 0.03923165425658226, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11856226623058319, "max": 0.17077098786830902, "mean": 0.0002904185967054218, "std": 0.025113951414823532, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24732813239097595, "max": 0.30149152874946594, "mean": -3.663568713818677e-05, "std": 0.03893101587891579, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.509943962097168, "max": 3.719674825668335, "mean": 0.015853645280003548, "std": 0.7831405401229858, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21940433979034424, "max": 0.2380109429359436, "mean": -1.3181561371311545e-05, "std": 0.036304209381341934, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04728918895125389, "max": 0.05147355794906616, "mean": 0.00047950932639651, "std": 0.01351844146847725, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21457946300506592, "max": 0.21772831678390503, "mean": 5.6543191021773964e-05, "std": 0.03361648693680763, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21175915002822876, "max": 0.2316361367702484, "mean": -0.005104508716613054, "std": 0.06187352165579796, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.36198312044143677, "max": 1.1043850183486938, "mean": 0.6993494629859924, "std": 0.0538649819791317, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23541490733623505, "max": 0.24545514583587646, "mean": 0.0004635048389900476, "std": 0.0412699356675148, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09819761663675308, "max": 0.06812109053134918, "mean": -0.03143283352255821, "std": 0.018124457448720932, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.302616149187088, "max": 0.3526079058647156, "mean": -8.239349699579179e-05, "std": 0.04027572274208069, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.1525425761938095, "max": 0.14988082647323608, "mean": 0.00025950101553462446, "std": 0.02303888648748398, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9994731545448303, "max": 1.0051331520080566, "mean": 1.0006828308105469, "std": 0.0018997839652001858, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.031253598630428314, "max": 0.03125074878334999, "mean": -1.9291795979370363e-05, "std": 0.018041806295514107, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.031226053833961487, "max": 0.030990969389677048, "mean": -0.0010842140763998032, "std": 0.01795150525867939, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.03125230595469475, "max": 0.031255852431058884, "mean": 3.5468428905005567e-06, "std": 0.01804220862686634, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.031155752018094063, "max": 0.031177222728729248, "mean": 0.0003338717215228826, "std": 0.018063681200146675, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": -0.00039401825051754713, "max": 0.00042413949267938733, "mean": 2.811485501297284e-06, "std": 0.00013175072672311217, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9984285831451416, "max": 1.0057381391525269, "mean": 1.0001252889633179, "std": 0.0012227989500388503, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.03248310461640358, "max": 0.03276699408888817, "mean": -6.534818567160983e-06, "std": 0.01804283820092678, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.03245115652680397, "max": 0.032321732491254807, "mean": -6.833355291746557e-05, "std": 0.017962154000997543, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": -0.001046429155394435, "max": 0.001021245145238936, "mean": 1.2730889693557401e-06, "std": 0.00019014839199371636, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": -0.00038878852501511574, "max": 0.0004429140826687217, "mean": 4.41432621300919e-06, "std": 0.00012222054647281766, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.3831113874912262, "max": 0.7217056155204773, "mean": 0.5806930065155029, "std": 0.03891616314649582, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23930218815803528, "max": 0.19694408774375916, "mean": 2.6163981601712294e-05, "std": 0.03746587410569191, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11892960965633392, "max": 0.16658687591552734, "mean": 0.0009876482654362917, "std": 0.027559131383895874, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.2469177097082138, "max": 0.5011630058288574, "mean": -5.039005191065371e-05, "std": 0.037623330950737, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.9455182552337646, "max": 3.7725064754486084, "mean": -0.003572634421288967, "std": 0.6815741658210754, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.2276747226715088, "max": 0.25224873423576355, "mean": -1.156590678874636e-05, "std": 0.03743501380085945, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.0717209130525589, "max": 0.08072538673877716, "mean": -0.0005185012123547494, "std": 0.01566058024764061, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.2281697541475296, "max": 0.25840428471565247, "mean": -2.8510152333183214e-05, "std": 0.03542180359363556, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.2006748765707016, "max": 0.21532072126865387, "mean": -0.005526356864720583, "std": 0.06832510232925415, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.4052578806877136, "max": 1.1931043863296509, "mean": 0.7380141019821167, "std": 0.05553331598639488, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.2216469943523407, "max": 0.24624952673912048, "mean": 0.0005209938390180469, "std": 0.04133738949894905, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10345429182052612, "max": 0.024157993495464325, "mean": -0.03266732394695282, "std": 0.018895410001277924, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.4506717622280121, "max": 0.4234609603881836, "mean": -0.00043505526264198124, "std": 0.04689793288707733, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.2517058551311493, "max": 0.4705328345298767, "mean": 0.0032054544426500797, "std": 0.044538334012031555, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.31723225116729736, "max": 0.3334876596927643, "mean": -2.5067403839784674e-05, "std": 0.021288011223077774, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.32461482286453247, "max": 0.6871254444122314, "mean": 0.5709946155548096, "std": 0.044712185859680176, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.16488447785377502, "max": 0.174674391746521, "mean": -4.878301842836663e-05, "std": 0.033181823790073395, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.18708285689353943, "max": 0.14329394698143005, "mean": 4.1025952668860555e-05, "std": 0.02970319241285324, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.3814561367034912, "max": 0.2463892698287964, "mean": -9.789278919924982e-06, "std": 0.03276311233639717, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6606388092041016, "max": 3.2944271564483643, "mean": -0.01427321694791317, "std": 0.9851539731025696, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.23539957404136658, "max": 0.2480521947145462, "mean": -1.7979342374019325e-05, "std": 0.04169878736138344, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07279200851917267, "max": 0.15470217168331146, "mean": 0.0006656068144366145, "std": 0.02517576329410076, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.2668735086917877, "max": 0.2486240267753601, "mean": -1.5421055650222115e-05, "std": 0.04013972356915474, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18993628025054932, "max": 0.19500213861465454, "mean": -0.0012349991593509912, "std": 0.06668674200773239, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32912659645080566, "max": 1.003253698348999, "mean": 0.7192496061325073, "std": 0.052594345062971115, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.2322535365819931, "max": 0.24589639902114868, "mean": 0.00018273374007549137, "std": 0.0409013107419014, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11447025835514069, "max": 0.018959810957312584, "mean": -0.04247897118330002, "std": 0.018857870250940323, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.39094480872154236, "max": 0.4085846245288849, "mean": -2.156081063731108e-05, "std": 0.0485350526869297, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.6941088438034058, "max": 0.413074254989624, "mean": 0.0008494330104440451, "std": 0.060315798968076706, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": -0.0010608690790832043, "max": 1.0004838705062866, "mean": 0.0004881545901298523, "std": 0.0220896415412426, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9995023608207703, "max": 1.004894495010376, "mean": 1.0006191730499268, "std": 0.0017806595424190164, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.031253229826688766, "max": 0.0312533862888813, "mean": -2.1022129658376798e-05, "std": 0.018033137544989586, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.03121466003358364, "max": 0.031230736523866653, "mean": -0.0006770135369151831, "std": 0.017827749252319336, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.031253378838300705, "max": 0.03125477209687233, "mean": -8.833090760163032e-06, "std": 0.018032172694802284, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.031231535598635674, "max": 0.031244806945323944, "mean": -0.0007297678967006505, "std": 0.01794254779815674, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": -0.00039897009264677763, "max": 0.00031239030067808926, "mean": -2.7656624297378585e-06, "std": 0.00010500323696760461, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9984675645828247, "max": 1.005997896194458, "mean": 0.9998568296432495, "std": 0.0012546924408525229, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.032396964728832245, "max": 0.032092805951833725, "mean": -3.513969204504974e-08, "std": 0.018030446022748947, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.03191046044230461, "max": 0.03107621893286705, "mean": -0.00026303951744921505, "std": 0.018048185855150223, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": -0.0011175514664500952, "max": 0.0010112477466464043, "mean": -6.1762216319039e-07, "std": 0.0001866686943685636, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": -0.0003427659103181213, "max": 0.00032113981433212757, "mean": -2.040310619122465e-06, "std": 9.538298763800412e-05, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.23462186753749847, "max": 0.27271148562431335, "mean": 6.776777354389196e-06, "std": 0.018810205161571503, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.32134121656417847, "max": 0.696171224117279, "mean": 0.5816354155540466, "std": 0.045965857803821564, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18210144340991974, "max": 0.19822537899017334, "mean": -1.1569689377211034e-05, "std": 0.03318428248167038, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16075287759304047, "max": 0.1296185702085495, "mean": -0.0010708055924624205, "std": 0.03414905443787575, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.33257541060447693, "max": 0.31164395809173584, "mean": -1.0188834494329058e-05, "std": 0.03223486989736557, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.815314769744873, "max": 8.776156425476074, "mean": 0.09355179965496063, "std": 1.6212124824523926, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.2341691255569458, "max": 0.2423291653394699, "mean": 4.1637467802502215e-05, "std": 0.040857378393411636, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.0760289877653122, "max": 0.065830759704113, "mean": 0.00048469315515831113, "std": 0.019415758550167084, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24639879167079926, "max": 0.23466575145721436, "mean": -3.0853516364004463e-06, "std": 0.03943203389644623, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16285879909992218, "max": 0.16076169908046722, "mean": 0.0016295814421027899, "std": 0.0652732104063034, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5568758845329285, "max": 0.9466937184333801, "mean": 0.7129064202308655, "std": 0.0403011329472065, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22882379591464996, "max": 0.25551655888557434, "mean": -4.5426822907757014e-05, "std": 0.0405760332942009, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.1351136714220047, "max": 0.022313008084893227, "mean": -0.04135293886065483, "std": 0.01838735118508339, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.4227588474750519, "max": 0.3930455446243286, "mean": -4.085732143721543e-06, "std": 0.047785546630620956, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6080650687217712, "max": 0.6521760821342468, "mean": 0.0015855736564844847, "std": 0.05685455724596977, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.2519088387489319, "max": 0.3208920359611511, "mean": -6.068687071092427e-06, "std": 0.01961320824921131, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.3596932888031006, "max": 0.6842364072799683, "mean": 0.5706857442855835, "std": 0.042946916073560715, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22081606090068817, "max": 0.1773088276386261, "mean": -3.454893158050254e-05, "std": 0.03429890051484108, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.1636391431093216, "max": 0.23335042595863342, "mean": 0.00035607549943961203, "std": 0.032843589782714844, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.26433637738227844, "max": 0.24021653831005096, "mean": -5.268204404274002e-05, "std": 0.033897630870342255, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.859472751617432, "max": 5.095940113067627, "mean": 0.043871667236089706, "std": 1.2294032573699951, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24689450860023499, "max": 0.2507416307926178, "mean": 7.20950702088885e-05, "std": 0.04398806765675545, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.062653087079525, "max": 0.05465509742498398, "mean": 0.0006480686133727431, "std": 0.01719220168888569, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.287101686000824, "max": 0.27245277166366577, "mean": -5.0120852392865345e-05, "std": 0.04298638179898262, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16084662079811096, "max": 0.17058779299259186, "mean": -0.002887619426473975, "std": 0.05928964540362358, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.5198022723197937, "max": 0.9352366328239441, "mean": 0.7134757041931152, "std": 0.03851567581295967, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23833467066287994, "max": 0.24947485327720642, "mean": 0.0004647623864002526, "std": 0.040455412119627, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.1449345052242279, "max": 0.041161470115184784, "mean": -0.039693716913461685, "std": 0.020549351349473, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5341992378234863, "max": 0.584149181842804, "mean": 5.933919965173118e-06, "std": 0.048861313611269, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5195870399475098, "max": 0.4941606819629669, "mean": 0.0023631826043128967, "std": 0.05346201732754707, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.27384015917778015, "max": 0.3156191408634186, "mean": 1.960434929060284e-06, "std": 0.020050089806318283, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.3661290407180786, "max": 0.7137707471847534, "mean": 0.5931426286697388, "std": 0.045923035591840744, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21142390370368958, "max": 0.1996057629585266, "mean": 3.067640500376001e-05, "std": 0.034866977483034134, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18729116022586823, "max": 0.20393171906471252, "mean": 0.0009568152017891407, "std": 0.031525619328022, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.29027533531188965, "max": 0.34051838517189026, "mean": -4.7230056225089356e-05, "std": 0.03458789736032486, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.881865978240967, "max": 3.3913497924804688, "mean": 0.014454022981226444, "std": 0.8585575819015503, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.22494949400424957, "max": 0.25041675567626953, "mean": -3.845839273708407e-06, "std": 0.0422312431037426, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.055274393409490585, "max": 0.04683299362659454, "mean": -1.701708242762834e-05, "std": 0.015851490199565887, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.29334571957588196, "max": 0.2907007336616516, "mean": -7.57977295506862e-06, "std": 0.04194618761539459, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.1247822642326355, "max": 0.2594626247882843, "mean": -0.0032404293306171894, "std": 0.0531664676964283, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.4562881588935852, "max": 0.8474717736244202, "mean": 0.7055672407150269, "std": 0.035394009202718735, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5121109485626221, "max": 0.34823864698410034, "mean": 0.0003428200143389404, "std": 0.04020027443766594, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.1863405406475067, "max": 0.039554521441459656, "mean": -0.03938986361026764, "std": 0.02135385014116764, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5456476807594299, "max": 0.5576444864273071, "mean": -7.10671374690719e-05, "std": 0.050736188888549805, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5122882723808289, "max": 0.6650155782699585, "mean": 0.0024437594693154097, "std": 0.049542441964149475, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.3326261341571808, "max": 0.26606664061546326, "mean": 3.3996070669672918e-06, "std": 0.01938733644783497, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.32209691405296326, "max": 0.7689979672431946, "mean": 0.651018500328064, "std": 0.045278150588274, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.25021034479141235, "max": 0.22022569179534912, "mean": -2.263453097839374e-06, "std": 0.0365014486014843, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.32728204131126404, "max": 0.28722772002220154, "mean": -0.0006871280493214726, "std": 0.038576990365982056, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.3110663890838623, "max": 0.37101635336875916, "mean": 6.483237666543573e-05, "std": 0.03624214604496956, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.721696376800537, "max": 5.813023090362549, "mean": 0.037980761379003525, "std": 1.4134187698364258, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.22233453392982483, "max": 0.20630262792110443, "mean": -7.52985361032188e-05, "std": 0.0424862764775753, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07775042951107025, "max": 0.051466166973114014, "mean": -0.0009254277683794498, "std": 0.0164100993424654, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.3309888541698456, "max": 0.3296257257461548, "mean": -4.630289367923979e-06, "std": 0.04279271885752678, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2851186692714691, "max": 0.11168244481086731, "mean": -0.0012053586542606354, "std": 0.04700839892029762, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.4862569272518158, "max": 0.8893836140632629, "mean": 0.7374457716941833, "std": 0.03831757605075836, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3624440133571625, "max": 0.27509352564811707, "mean": 5.130700083100237e-05, "std": 0.040646348148584366, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.24782374501228333, "max": 0.04648653045296669, "mean": -0.0392659492790699, "std": 0.023277943953871727, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6279041171073914, "max": 0.5983599424362183, "mean": -6.208260310813785e-05, "std": 0.05311836674809456, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7105586528778076, "max": 0.266210675239563, "mean": 0.0009207880357280374, "std": 0.05124485120177269, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3435235619544983, "max": 0.30372199416160583, "mean": 2.971426056319615e-07, "std": 0.019135644659399986, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.34978553652763367, "max": 0.7852374911308289, "mean": 0.6388005018234253, "std": 0.04921075701713562, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20607401430606842, "max": 0.20750851929187775, "mean": -5.96779900661204e-05, "std": 0.037695422768592834, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.2588743567466736, "max": 0.2684256136417389, "mean": -0.00040556711610406637, "std": 0.04462844133377075, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.3547278344631195, "max": 0.32300710678100586, "mean": -6.988519089645706e-06, "std": 0.03720381483435631, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.265876293182373, "max": 4.207967281341553, "mean": -0.026429325342178345, "std": 1.0068732500076294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.2394271194934845, "max": 0.24428503215312958, "mean": -2.5281191483372822e-05, "std": 0.04321092739701271, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06252460181713104, "max": 0.056893154978752136, "mean": 0.000347302237059921, "std": 0.014152363874018192, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.4372415244579315, "max": 0.3737826347351074, "mean": 1.467342644900782e-05, "std": 0.04412253573536873, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09628994017839432, "max": 0.17628277838230133, "mean": -0.0006604281952604651, "std": 0.03514600917696953, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4217767119407654, "max": 1.0722668170928955, "mean": 0.7484005689620972, "std": 0.04209807515144348, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.2667092978954315, "max": 0.2975556254386902, "mean": -7.937644113553688e-05, "std": 0.04080634191632271, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.1854698657989502, "max": 0.04349794238805771, "mean": -0.03681644797325134, "std": 0.02560725063085556, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4579220414161682, "max": 0.48784998059272766, "mean": 4.282052395865321e-05, "std": 0.05421200394630432, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.2866349518299103, "max": 0.5520289540290833, "mean": -0.0008793525630608201, "std": 0.04783879220485687, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.29281285405158997, "max": 0.32289794087409973, "mean": 6.245412805583328e-06, "std": 0.019969133660197258, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.29108351469039917, "max": 0.7621498107910156, "mean": 0.6508013010025024, "std": 0.05207887664437294, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.2440386265516281, "max": 0.2621654272079468, "mean": -5.880815479031298e-06, "std": 0.03961231932044029, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2678271532058716, "max": 0.2002498358488083, "mean": -0.0008784097735770047, "std": 0.05178229510784149, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.27257686853408813, "max": 0.2541964650154114, "mean": 4.526807060756255e-06, "std": 0.038709431886672974, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.982023239135742, "max": 15.968067169189453, "mean": 0.03324813023209572, "std": 1.9908379316329956, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.2077104151248932, "max": 0.22651426494121552, "mean": -7.221860869321972e-05, "std": 0.040554750710725784, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.06946562975645065, "max": 0.06337178498506546, "mean": 0.00015520014858338982, "std": 0.01475033164024353, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.46565988659858704, "max": 0.3208334743976593, "mean": 1.9561422959668562e-05, "std": 0.040589939802885056, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.064049631357193, "max": 0.11550958454608917, "mean": 0.0011937393574044108, "std": 0.02470548450946808, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.3747756779193878, "max": 0.9347750544548035, "mean": 0.7509442567825317, "std": 0.04021797329187393, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.2801269292831421, "max": 0.27387121319770813, "mean": -0.00016841593605931848, "std": 0.040997058153152466, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19878797233104706, "max": 0.05111948773264885, "mean": -0.032027605921030045, "std": 0.025102604180574417, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6596145033836365, "max": 0.537032425403595, "mean": -4.937778794555925e-05, "std": 0.05284846946597099, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.1930496245622635, "max": 0.5826522707939148, "mean": -0.0005124770104885101, "std": 0.04108353331685066, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.41787075996398926, "max": 0.37214192748069763, "mean": 6.244237738428637e-06, "std": 0.021621638908982277, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21441777050495148, "max": 0.7472008466720581, "mean": 0.6494799852371216, "std": 0.05431411787867546, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.20989972352981567, "max": 0.19592680037021637, "mean": 4.0151899156626314e-05, "std": 0.039461154490709305, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.329771488904953, "max": 0.25982508063316345, "mean": -0.003228080226108432, "std": 0.056280527263879776, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.2062487006187439, "max": 0.2551846504211426, "mean": 5.400779264164157e-05, "std": 0.038563843816518784, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.2493767738342285, "max": 6.938913345336914, "mean": 0.04840244725346565, "std": 1.3855851888656616, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.21009960770606995, "max": 0.23065192997455597, "mean": -5.2159043661959e-06, "std": 0.041313353925943375, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.0439465157687664, "max": 0.03601067140698433, "mean": -2.0584266167134047e-06, "std": 0.012799846939742565, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.39804428815841675, "max": 0.34499886631965637, "mean": -5.5499749578302726e-05, "std": 0.04238968715071678, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.055174216628074646, "max": 0.06293413788080215, "mean": 0.00036305427784100175, "std": 0.01867016963660717, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.3503042459487915, "max": 1.0480320453643799, "mean": 0.7894532084465027, "std": 0.048786185681819916, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.3337661623954773, "max": 0.3864375650882721, "mean": -0.00016956219042185694, "std": 0.04148184508085251, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15768638253211975, "max": 0.05907022953033447, "mean": -0.031832221895456314, "std": 0.0251291636377573, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6973653435707092, "max": 0.47017383575439453, "mean": -8.81649466464296e-05, "std": 0.051795393228530884, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24848268926143646, "max": 0.32916560769081116, "mean": -0.0002544308081269264, "std": 0.041454534977674484, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.2872900664806366, "max": 0.3505076766014099, "mean": -2.3586867428093683e-06, "std": 0.024236176162958145, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.19670914113521576, "max": 0.7788708806037903, "mean": 0.6702359914779663, "std": 0.05864134803414345, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.2293103188276291, "max": 0.23172836005687714, "mean": -2.0263662008801475e-05, "std": 0.04043755307793617, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.2201755940914154, "max": 0.2412194311618805, "mean": 0.0007778588915243745, "std": 0.05583813413977623, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21645531058311462, "max": 0.2269156575202942, "mean": -7.186527363955975e-05, "std": 0.03937343880534172, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.91368579864502, "max": 9.076720237731934, "mean": -0.0012592850252985954, "std": 1.8490537405014038, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.2699006199836731, "max": 0.2594479024410248, "mean": 4.3596926843747497e-05, "std": 0.03840681165456772, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05783012881875038, "max": 0.057821568101644516, "mean": 0.0003521823091432452, "std": 0.014716818928718567, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.26518943905830383, "max": 0.2887333035469055, "mean": -6.169862172100693e-05, "std": 0.03907295688986778, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.04396004229784012, "max": 0.037220947444438934, "mean": -9.395174856763333e-05, "std": 0.013354334980249405, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.33940210938453674, "max": 1.0958820581436157, "mean": 0.8637964129447937, "std": 0.06389264762401581, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.4235352873802185, "max": 0.41927266120910645, "mean": 0.000313018070301041, "std": 0.04350249841809273, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21509824693202972, "max": 0.17092689871788025, "mean": -0.0294746495783329, "std": 0.03193298354744911, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.6005915999412537, "max": 0.5609812140464783, "mean": -0.00015016092220321298, "std": 0.05344870314002037, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17891772091388702, "max": 0.3774968683719635, "mean": 0.0013590974267572165, "std": 0.03732309862971306, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.39461401104927063, "max": 0.36924391984939575, "mean": 3.7040204915683717e-05, "std": 0.028616365045309067, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.29045382142066956, "max": 0.8264784812927246, "mean": 0.7055213451385498, "std": 0.0678410679101944, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9263020753860474, "max": 1.0267603397369385, "mean": -2.6431953301653266e-05, "std": 0.04762791842222214, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8796241879463196, "max": 0.8164305686950684, "mean": -0.0003041320014744997, "std": 0.0956113338470459, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.27020347118377686, "max": 0.241440087556839, "mean": -2.271639823447913e-05, "std": 0.038950297981500626, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.76431655883789, "max": 22.871889114379883, "mean": -0.09189724177122116, "std": 4.073054313659668, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22821645438671112, "max": 0.24578580260276794, "mean": -2.5681954866740853e-05, "std": 0.03863786533474922, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.06044214218854904, "max": 0.04586166515946388, "mean": -0.00014234766422305256, "std": 0.014693022705614567, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.3386403024196625, "max": 0.3753957748413086, "mean": 7.493808880099095e-06, "std": 0.04081406444311142, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.04647579416632652, "max": 0.19592434167861938, "mean": 0.00027245082310400903, "std": 0.01356989610940218, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.3743247389793396, "max": 1.133009910583496, "mean": 0.8900730609893799, "std": 0.06399820744991302, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.44806551933288574, "max": 0.5433648824691772, "mean": 2.4754037440288812e-05, "std": 0.04556819051504135, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22422385215759277, "max": 0.08793910592794418, "mean": -0.03202162683010101, "std": 0.03776844963431358, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7274155616760254, "max": 0.6907259225845337, "mean": 3.4943295759148896e-05, "std": 0.05178087204694748, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.17463494837284088, "max": 0.2185920923948288, "mean": 3.897436545230448e-05, "std": 0.031783733516931534, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.34052687883377075, "max": 0.37423866987228394, "mean": 4.304847971070558e-05, "std": 0.034138280898332596, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.3175727128982544, "max": 1.290410041809082, "mean": 0.6015003323554993, "std": 0.08363870531320572, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.28354600071907043, "max": 0.260841429233551, "mean": -3.130652658001054e-06, "std": 0.035979557782411575, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.23592722415924072, "max": 0.2057497352361679, "mean": 0.00023727506049908698, "std": 0.056021153926849365, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.43595167994499207, "max": 0.32549113035202026, "mean": 2.434775342408102e-05, "std": 0.034129101783037186, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.553627967834473, "max": 7.324089527130127, "mean": -0.007399275898933411, "std": 0.7001854181289673, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.34464672207832336, "max": 0.3639456331729889, "mean": 0.0001033150329021737, "std": 0.047829318791627884, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.0738968476653099, "max": 0.060446880757808685, "mean": 0.0009350795298814774, "std": 0.014948361553251743, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.2562582790851593, "max": 0.28724488615989685, "mean": 4.657229510485195e-06, "std": 0.0415559858083725, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05538095533847809, "max": 0.06288731843233109, "mean": 0.00013551797019317746, "std": 0.007167231757193804, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.4939861297607422, "max": 1.2202398777008057, "mean": 1.013412356376648, "std": 0.1173911765217781, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0939209461212158, "max": 1.0473735332489014, "mean": -4.927456029690802e-05, "std": 0.05241009593009949, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22382217645645142, "max": 0.1730560064315796, "mean": -0.027248641476035118, "std": 0.03636055067181587, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8865154385566711, "max": 0.9247081279754639, "mean": -0.00014585975441150367, "std": 0.0532848984003067, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17122139036655426, "max": 0.38014623522758484, "mean": 0.0033699313644319773, "std": 0.03990361467003822, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7786033749580383, "max": 0.7243013381958008, "mean": 1.8795288269757293e-05, "std": 0.046159159392118454, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.3385763168334961, "max": 1.4310884475708008, "mean": 0.9482859969139099, "std": 0.20665791630744934, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.745840311050415, "max": 1.7046537399291992, "mean": 0.00022703518334310502, "std": 0.15869012475013733, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.2008079290390015, "max": 1.1013628244400024, "mean": -0.009554527699947357, "std": 0.20401149988174438, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4215790033340454, "max": 0.427647203207016, "mean": 6.439993012463674e-05, "std": 0.048017047345638275, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.76506996154785, "max": 19.559972763061523, "mean": -0.24841785430908203, "std": 4.7801384925842285, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.32463034987449646, "max": 0.4392913281917572, "mean": -1.1934026588278357e-05, "std": 0.046162351965904236, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.03394031897187233, "max": 0.03703805059194565, "mean": 0.0006406006286852062, "std": 0.012916130013763905, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7043119668960571, "max": 0.6668245792388916, "mean": 4.3251380702713504e-05, "std": 0.05788382515311241, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07238046824932098, "max": 0.06770296394824982, "mean": -0.00013378039875533432, "std": 0.012917297892272472, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.38019153475761414, "max": 1.391236424446106, "mean": 1.0665456056594849, "std": 0.21965359151363373, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6170499324798584, "max": 0.718601405620575, "mean": 0.00011217871360713616, "std": 0.058021701872348785, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21975933015346527, "max": 0.22518815100193024, "mean": 0.006216429639607668, "std": 0.049728311598300934, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6300503015518188, "max": 0.8897712826728821, "mean": 1.1653193723759614e-05, "std": 0.023531364277005196, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5075116753578186, "max": 0.47451627254486084, "mean": -0.0030209918040782213, "std": 0.06935632228851318, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5379416942596436, "max": 1.1812505722045898, "mean": 0.7826943397521973, "std": 0.0987553521990776, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.26785895228385925, "max": 0.21342454850673676, "mean": -0.0002236703730886802, "std": 0.05399824678897858, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23829060792922974, "max": 0.014859253540635109, "mean": -0.043948449194431305, "std": 0.034328024834394455, "sparsity": 0.0, "shape": [ 100 ] } } }