{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "recommendations": { "focus_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ] }, "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.43031466007232666, "max": 0.298143208026886, "mean": -0.0025431362446397543, "std": 0.042562514543533325, "sparsity": 0.0, "shape": [ 1024, 256 ] }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.0631568506360054, "max": 0.10771193355321884, "mean": 0.0006426331819966435, "std": 0.03407834470272064, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.4127056896686554, "max": 0.8369137644767761, "mean": -0.00020141302957199514, "std": 0.024111632257699966, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11548846960067749, "max": 0.3221578299999237, "mean": -0.0009410656057298183, "std": 0.019580261781811714, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.text_embed.text_embed.weight": { "min": -2.7946255207061768, "max": 2.873370885848999, "mean": -0.0003634353051893413, "std": 0.6154844164848328, "sparsity": 0.0, "shape": [ 2546, 100 ] }, "transformer.input_embed.proj.weight": { "min": -0.2794482707977295, "max": 0.38173243403434753, "mean": 0.0004242636787239462, "std": 0.042748358100652695, "sparsity": 0.0, "shape": [ 1024, 300 ] }, "transformer.input_embed.proj.bias": { "min": -0.22289495170116425, "max": 0.21001911163330078, "mean": -0.004489608108997345, "std": 0.040950216352939606, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.4283224046230316, "max": 0.4761110544204712, "mean": 3.962942628277233e-06, "std": 0.02451062761247158, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.32575708627700806, "max": 0.1571168750524521, "mean": -0.04673216491937637, "std": 0.051645807921886444, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.4105567932128906, "max": 0.3547790050506592, "mean": -0.0001310346560785547, "std": 0.02360442653298378, "sparsity": 0.0, "shape": [ 1024, 64, 31 ] }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.23018451035022736, "max": 0.2630932033061981, "mean": -0.029156308621168137, "std": 0.04940544068813324, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.1.g": { "min": 0.2545531988143921, "max": 0.8213090300559998, "mean": 0.5256362557411194, "std": 0.08106369525194168, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_q.weight": { "min": -0.2971626818180084, "max": 0.26604607701301575, "mean": -0.0004256928223185241, "std": 0.03210251033306122, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_q.bias": { "min": -0.09291917830705643, "max": 0.1250312328338623, "mean": 0.0006477435817942023, "std": 0.025753259658813477, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_k.weight": { "min": -0.29085373878479004, "max": 0.28159603476524353, "mean": -7.506589463446289e-05, "std": 0.030931703746318817, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_k.bias": { "min": -5.906967639923096, "max": 5.821649074554443, "mean": -0.009350163862109184, "std": 1.296647071838379, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_v.weight": { "min": -0.42530331015586853, "max": 0.3440260589122772, "mean": 9.807322931010276e-05, "std": 0.02995346300303936, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_v.bias": { "min": -0.029081525281071663, "max": 0.02767445333302021, "mean": -0.00032374687725678086, "std": 0.012576405890285969, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.45424115657806396, "max": 0.4482896625995636, "mean": 2.3885608243290335e-05, "std": 0.02385384775698185, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.08883396536111832, "max": 0.09114022552967072, "mean": 0.00228882092051208, "std": 0.01952745020389557, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.3.g": { "min": 0.26677191257476807, "max": 1.0577468872070312, "mean": 0.53135746717453, "std": 0.10473316162824631, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5746102333068848, "max": 0.6084363460540771, "mean": -0.00043127068784087896, "std": 0.03860073536634445, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18297578394412994, "max": 0.0456179715692997, "mean": -0.029477983713150024, "std": 0.042657021433115005, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.0.4.ff.2.weight": { "min": -1.1673263311386108, "max": 1.6341116428375244, "mean": 0.00032315164571627975, "std": 0.02769668586552143, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16250400245189667, "max": 0.20589958131313324, "mean": -0.02113456465303898, "std": 0.027959568426012993, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.1.g": { "min": 0.22410069406032562, "max": 0.8451111912727356, "mean": 0.48777928948402405, "std": 0.07542530447244644, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_q.weight": { "min": -0.25582441687583923, "max": 0.30595168471336365, "mean": -6.705071427859366e-06, "std": 0.03347504884004593, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_q.bias": { "min": -0.09550327807664871, "max": 0.11064136773347855, "mean": 6.668796413578093e-05, "std": 0.026976482942700386, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_k.weight": { "min": -0.2973037660121918, "max": 0.29644775390625, "mean": 5.341449286788702e-05, "std": 0.032546162605285645, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_k.bias": { "min": -5.17097806930542, "max": 5.091113090515137, "mean": -0.01462231483310461, "std": 1.1586002111434937, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_v.weight": { "min": -0.34501704573631287, "max": 0.34340131282806396, "mean": 7.8546792792622e-05, "std": 0.030061908066272736, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_v.bias": { "min": -0.036109186708927155, "max": 0.03340720757842064, "mean": -0.00014173206000123173, "std": 0.013041709549725056, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.3156168460845947, "max": 0.3752053380012512, "mean": -2.0681722162407823e-05, "std": 0.02405940182507038, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10555326193571091, "max": 0.12231862545013428, "mean": -0.0019678983371704817, "std": 0.028872456401586533, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.3.g": { "min": 0.3113996386528015, "max": 1.1224051713943481, "mean": 0.6664633750915527, "std": 0.0980152115225792, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.8727887272834778, "max": 0.6275914907455444, "mean": 0.0016750607173889875, "std": 0.047438763082027435, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.27183517813682556, "max": 0.034259725362062454, "mean": -0.046628981828689575, "std": 0.04063701629638672, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.1.4.ff.2.weight": { "min": -0.9230329394340515, "max": 0.9648618102073669, "mean": 0.0010213888017460704, "std": 0.04070665314793587, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14468412101268768, "max": 0.07505139708518982, "mean": -0.009096229448914528, "std": 0.025706371292471886, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.1.g": { "min": 0.24036771059036255, "max": 0.7140315771102905, "mean": 0.4473647475242615, "std": 0.05951203405857086, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_q.weight": { "min": -0.27264565229415894, "max": 0.29809534549713135, "mean": 9.332510671811178e-06, "std": 0.03546958044171333, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_q.bias": { "min": -0.11950661987066269, "max": 0.11869802325963974, "mean": 0.0007616454968228936, "std": 0.02764517441391945, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_k.weight": { "min": -0.2813079059123993, "max": 0.28023794293403625, "mean": -7.719700079178438e-05, "std": 0.0350990891456604, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_k.bias": { "min": -2.5128581523895264, "max": 2.524867296218872, "mean": 0.026786239817738533, "std": 0.5873143672943115, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_v.weight": { "min": -0.2213059961795807, "max": 0.2717853784561157, "mean": 2.9610819183290005e-06, "std": 0.030732687562704086, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_v.bias": { "min": -0.03361261636018753, "max": 0.03129349276423454, "mean": 0.00011305588122922927, "std": 0.012413612566888332, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.23544403910636902, "max": 0.23186075687408447, "mean": 5.69116891711019e-05, "std": 0.025696195662021637, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.13601461052894592, "max": 0.12754406034946442, "mean": -0.005499254446476698, "std": 0.03998684883117676, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.3.g": { "min": 0.35436785221099854, "max": 1.1737076044082642, "mean": 0.7108283638954163, "std": 0.10403098911046982, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6176053881645203, "max": 0.5545136332511902, "mean": 0.0011602240847423673, "std": 0.04611964151263237, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.18876661360263824, "max": 0.024967461824417114, "mean": -0.03485583886504173, "std": 0.028641268610954285, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.2.4.ff.2.weight": { "min": -1.1323436498641968, "max": 0.9720706939697266, "mean": 0.00035946519346907735, "std": 0.042347442358732224, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.2.4.ff.2.bias": { "min": -0.598772406578064, "max": 0.06287988275289536, "mean": -0.004880873020738363, "std": 0.028635544702410698, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.1.g": { "min": 0.3750710189342499, "max": 0.9418790340423584, "mean": 0.5926927328109741, "std": 0.06721659004688263, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_q.weight": { "min": -0.3915771543979645, "max": 0.3692559599876404, "mean": 7.123942486941814e-05, "std": 0.03718866407871246, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_q.bias": { "min": -0.11907870322465897, "max": 0.13665802776813507, "mean": 0.0009319179225713015, "std": 0.02926611341536045, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_k.weight": { "min": -0.619708240032196, "max": 0.5092929005622864, "mean": 1.5245183021761477e-05, "std": 0.03644217178225517, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_k.bias": { "min": -8.197783470153809, "max": 8.800565719604492, "mean": -0.10938873887062073, "std": 1.7007076740264893, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_v.weight": { "min": -0.2768491804599762, "max": 0.2400088757276535, "mean": 5.314283407642506e-05, "std": 0.032615404576063156, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_v.bias": { "min": -0.05213421210646629, "max": 0.03957239165902138, "mean": 9.133941057370976e-05, "std": 0.012963276356458664, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23089444637298584, "max": 0.2348451018333435, "mean": -2.176157067879103e-05, "std": 0.029391760006546974, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20456741750240326, "max": 0.10572919249534607, "mean": -0.00402758177369833, "std": 0.03263704851269722, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.3.g": { "min": 0.3400026261806488, "max": 1.0141218900680542, "mean": 0.7010252475738525, "std": 0.09696138650178909, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5654259324073792, "max": 0.8335409760475159, "mean": 0.0004151407047174871, "std": 0.04230234771966934, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.2119237780570984, "max": 0.030580509454011917, "mean": -0.03220224380493164, "std": 0.026535935699939728, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7552511096000671, "max": 0.7191816568374634, "mean": -9.422379662282765e-06, "std": 0.036842163652181625, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.3.4.ff.2.bias": { "min": -0.26383838057518005, "max": 0.10599514842033386, "mean": -0.0030335707124322653, "std": 0.028880203142762184, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.1.g": { "min": 0.28429752588272095, "max": 0.6961002945899963, "mean": 0.49966490268707275, "std": 0.046708256006240845, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_q.weight": { "min": -0.27950623631477356, "max": 0.23444026708602905, "mean": -0.0001112212921725586, "std": 0.03876311331987381, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_q.bias": { "min": -0.15468573570251465, "max": 0.12698474526405334, "mean": -0.0022345406468957663, "std": 0.033433251082897186, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_k.weight": { "min": -0.41459208726882935, "max": 0.6603645086288452, "mean": -1.977803731278982e-05, "std": 0.03910015523433685, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_k.bias": { "min": -4.243562698364258, "max": 4.728666305541992, "mean": -0.020446542650461197, "std": 1.0085786581039429, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_v.weight": { "min": -0.24519944190979004, "max": 0.2077825665473938, "mean": 4.388581874081865e-05, "std": 0.033966176211833954, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_v.bias": { "min": -0.034593358635902405, "max": 0.04485077038407326, "mean": -1.7529440810903907e-05, "std": 0.012629235163331032, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20095357298851013, "max": 0.20613527297973633, "mean": -2.959615085273981e-05, "std": 0.03102371282875538, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.20019184052944183, "max": 0.11357004940509796, "mean": -0.0029205437749624252, "std": 0.034529101103544235, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.3.g": { "min": 0.36704930663108826, "max": 1.058448076248169, "mean": 0.6707465052604675, "std": 0.0665469765663147, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.3986629843711853, "max": 0.5028019547462463, "mean": -3.858951822621748e-05, "std": 0.04113718494772911, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12819068133831024, "max": 0.026764869689941406, "mean": -0.03055746480822563, "std": 0.021891731768846512, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.4.4.ff.2.weight": { "min": -0.44944334030151367, "max": 0.43338072299957275, "mean": 8.373618766199797e-05, "std": 0.03489609435200691, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.4.4.ff.2.bias": { "min": -0.2679402530193329, "max": 0.07267966121435165, "mean": -0.0011121004354208708, "std": 0.023136794567108154, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.1.g": { "min": 0.2874027192592621, "max": 0.6862822771072388, "mean": 0.5247019529342651, "std": 0.047706179320812225, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_q.weight": { "min": -0.22247114777565002, "max": 0.2237931489944458, "mean": 1.5673409507144243e-05, "std": 0.03895280137658119, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_q.bias": { "min": -0.13664273917675018, "max": 0.10935632139444351, "mean": 0.00023680762387812138, "std": 0.029263831675052643, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_k.weight": { "min": -0.37552782893180847, "max": 0.43765556812286377, "mean": -9.529509043204598e-06, "std": 0.0392889641225338, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_k.bias": { "min": -3.8507211208343506, "max": 5.005820274353027, "mean": 0.00975782610476017, "std": 0.8459950685501099, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_v.weight": { "min": -0.2234737128019333, "max": 0.22026528418064117, "mean": -2.2568747226614505e-07, "std": 0.03441343083977699, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_v.bias": { "min": -0.043700210750103, "max": 0.0358847938477993, "mean": -0.0002585579641163349, "std": 0.012083812616765499, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21352721750736237, "max": 0.1891147494316101, "mean": -1.673133192525711e-05, "std": 0.031540192663669586, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18098995089530945, "max": 0.12096531689167023, "mean": -0.0024120290763676167, "std": 0.04128490760922432, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.3.g": { "min": 0.4226054847240448, "max": 0.9433368444442749, "mean": 0.6629081964492798, "std": 0.056974004954099655, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.37151336669921875, "max": 0.4759024977684021, "mean": -8.223902113968506e-05, "std": 0.040896181017160416, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.20840628445148468, "max": 0.02712824009358883, "mean": -0.030254749581217766, "std": 0.02136547490954399, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3405216336250305, "max": 0.7342746257781982, "mean": 8.478653035126626e-05, "std": 0.03477146103978157, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.5.4.ff.2.bias": { "min": -0.2405085265636444, "max": 0.05050582066178322, "mean": -0.0011980931740254164, "std": 0.02047325111925602, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.1.g": { "min": 0.305998831987381, "max": 0.6545577049255371, "mean": 0.525275707244873, "std": 0.0462840236723423, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_q.weight": { "min": -0.30443698167800903, "max": 0.2175063043832779, "mean": 6.991640839260072e-05, "std": 0.03949848935008049, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_q.bias": { "min": -0.1496177613735199, "max": 0.1315852701663971, "mean": 0.00034793667146004736, "std": 0.030498284846544266, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_k.weight": { "min": -0.25779959559440613, "max": 0.2024526447057724, "mean": 3.095036663580686e-05, "std": 0.039487626403570175, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_k.bias": { "min": -2.3393359184265137, "max": 2.3790037631988525, "mean": -0.02626325562596321, "std": 0.4501512348651886, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_v.weight": { "min": -0.189274862408638, "max": 0.2107497602701187, "mean": 3.7229168810881674e-05, "std": 0.03479816019535065, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_v.bias": { "min": -0.03176194056868553, "max": 0.035539623349905014, "mean": -0.00020054224296472967, "std": 0.012292396277189255, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.18866902589797974, "max": 0.17066700756549835, "mean": -6.797373498557135e-05, "std": 0.032174721360206604, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13956007361412048, "max": 0.13746821880340576, "mean": -0.0025175614282488823, "std": 0.0513296015560627, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.3.g": { "min": 0.4674248695373535, "max": 0.957923948764801, "mean": 0.6691091656684875, "std": 0.052978649735450745, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.32444727420806885, "max": 0.3098219633102417, "mean": -1.5040723155834712e-06, "std": 0.040952056646347046, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12495888024568558, "max": 0.025304077193140984, "mean": -0.03072468377649784, "std": 0.019833404570817947, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.6.4.ff.2.weight": { "min": -0.44051459431648254, "max": 0.44567734003067017, "mean": 9.530649549560621e-05, "std": 0.03512415289878845, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.6.4.ff.2.bias": { "min": -0.2248658984899521, "max": 0.05171418562531471, "mean": -0.0011846581473946571, "std": 0.018478091806173325, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.1.g": { "min": 0.33937862515449524, "max": 0.7403524518013, "mean": 0.5588580369949341, "std": 0.041548021137714386, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_q.weight": { "min": -0.27266961336135864, "max": 0.2785436511039734, "mean": 1.9886707377736457e-05, "std": 0.041062381118535995, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_q.bias": { "min": -0.13720278441905975, "max": 0.1400555521249771, "mean": 0.0004891848657280207, "std": 0.026654429733753204, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_k.weight": { "min": -0.4912598729133606, "max": 0.3564285337924957, "mean": 8.880282985046506e-05, "std": 0.040700383484363556, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_k.bias": { "min": -2.3000996112823486, "max": 1.7473976612091064, "mean": -0.021102074533700943, "std": 0.5005303025245667, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_v.weight": { "min": -0.21771195530891418, "max": 0.19800876080989838, "mean": -4.054907913086936e-05, "std": 0.03423738107085228, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_v.bias": { "min": -0.04137791320681572, "max": 0.03871942684054375, "mean": -0.00014505225408356637, "std": 0.012883453629910946, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.1777161806821823, "max": 0.1839223951101303, "mean": 4.761077434523031e-05, "std": 0.03156030550599098, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.1801164597272873, "max": 0.18409180641174316, "mean": -0.002218745881691575, "std": 0.05486130341887474, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.3.g": { "min": 0.4742484390735626, "max": 1.027018666267395, "mean": 0.6454694271087646, "std": 0.050571199506521225, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.27197960019111633, "max": 0.3094431757926941, "mean": 0.00011241070023970678, "std": 0.0406884104013443, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10593951493501663, "max": 0.026867138221859932, "mean": -0.02952626720070839, "std": 0.0179454255849123, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.7.4.ff.2.weight": { "min": -0.339232474565506, "max": 0.32961946725845337, "mean": 5.7173179811798036e-05, "std": 0.03441809490323067, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.7.4.ff.2.bias": { "min": -0.1818968504667282, "max": 0.04209613800048828, "mean": -0.001073765684850514, "std": 0.017224203795194626, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.1.g": { "min": 0.3253825902938843, "max": 0.6876205801963806, "mean": 0.5113766193389893, "std": 0.03712678700685501, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_q.weight": { "min": -0.2340041846036911, "max": 0.22588428854942322, "mean": -3.603727600420825e-05, "std": 0.03918161243200302, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_q.bias": { "min": -0.11547420918941498, "max": 0.13177312910556793, "mean": 0.00015100545715540648, "std": 0.029211556538939476, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_k.weight": { "min": -0.353280246257782, "max": 0.28580334782600403, "mean": 7.311312401725445e-06, "std": 0.03925010561943054, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_k.bias": { "min": -4.137877941131592, "max": 3.5483016967773438, "mean": -0.011621923185884953, "std": 0.6833143830299377, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_v.weight": { "min": -0.21149367094039917, "max": 0.20919673144817352, "mean": 3.474394543445669e-05, "std": 0.034489404410123825, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_v.bias": { "min": -0.0357508510351181, "max": 0.048132169991731644, "mean": 0.0007945147808641195, "std": 0.012859269045293331, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21085014939308167, "max": 0.19338075816631317, "mean": -1.279619482374983e-06, "std": 0.03169989585876465, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.18688012659549713, "max": 0.17741110920906067, "mean": -0.0028487846720963717, "std": 0.05866115912795067, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.3.g": { "min": 0.4747392237186432, "max": 1.0433117151260376, "mean": 0.6515810489654541, "std": 0.04988763853907585, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.2485654354095459, "max": 0.32921651005744934, "mean": 0.00018060754518955946, "std": 0.04057681933045387, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12461096793413162, "max": 0.024597609415650368, "mean": -0.030512426048517227, "std": 0.017616724595427513, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.8.4.ff.2.weight": { "min": -0.42169103026390076, "max": 0.4825250208377838, "mean": 2.1487815047294134e-06, "std": 0.03540307283401489, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15202857553958893, "max": 0.04342101141810417, "mean": 3.956547880079597e-05, "std": 0.014885293319821358, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.1.g": { "min": 0.315530002117157, "max": 0.6829717755317688, "mean": 0.5530707240104675, "std": 0.04085434973239899, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_q.weight": { "min": -0.20659124851226807, "max": 0.2201390564441681, "mean": 3.096506407018751e-05, "std": 0.03830333426594734, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_q.bias": { "min": -0.1380155086517334, "max": 0.11290067434310913, "mean": 2.059592225123197e-05, "std": 0.025836361572146416, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_k.weight": { "min": -0.40320274233818054, "max": 0.37160059809684753, "mean": 2.6222376618534327e-05, "std": 0.03818517550826073, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_k.bias": { "min": -3.775665044784546, "max": 2.872361421585083, "mean": 0.0011700298637151718, "std": 0.5173272490501404, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_v.weight": { "min": -0.2030554711818695, "max": 0.19753621518611908, "mean": 2.9474727853084914e-05, "std": 0.03430046886205673, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_v.bias": { "min": -0.05103779584169388, "max": 0.04008523374795914, "mean": -0.000419780844822526, "std": 0.013429902493953705, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19672255218029022, "max": 0.20196260511875153, "mean": -1.2339524801063817e-05, "std": 0.03180818632245064, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19336175918579102, "max": 0.19535411894321442, "mean": -0.0029691390227526426, "std": 0.06259549409151077, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.3.g": { "min": 0.34919390082359314, "max": 1.0855821371078491, "mean": 0.6673611998558044, "std": 0.055458005517721176, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22532346844673157, "max": 0.2517567276954651, "mean": 0.0003590356500353664, "std": 0.04076584428548813, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09113647788763046, "max": 0.04372163116931915, "mean": -0.030099857598543167, "std": 0.01762346550822258, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.9.4.ff.2.weight": { "min": -0.3537713587284088, "max": 0.3043927252292633, "mean": -4.351784446043894e-05, "std": 0.03712814301252365, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.9.4.ff.2.bias": { "min": -0.1622427999973297, "max": 0.0636076033115387, "mean": -8.386171248275787e-05, "std": 0.019415445625782013, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.1.g": { "min": 0.34875378012657166, "max": 0.7230772972106934, "mean": 0.542546272277832, "std": 0.03922481834888458, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_q.weight": { "min": -0.21956898272037506, "max": 0.22326983511447906, "mean": -1.1109572369605303e-05, "std": 0.03923607990145683, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_q.bias": { "min": -0.11858610808849335, "max": 0.1710456758737564, "mean": 0.00028452256810851395, "std": 0.025138530880212784, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_k.weight": { "min": -0.24716253578662872, "max": 0.30147185921669006, "mean": -3.647191624622792e-05, "std": 0.03893563523888588, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_k.bias": { "min": -3.5094945430755615, "max": 3.7191741466522217, "mean": 0.015858110040426254, "std": 0.7832505702972412, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_v.weight": { "min": -0.21879123151302338, "max": 0.2377484291791916, "mean": -1.353577317786403e-05, "std": 0.03630785644054413, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_v.bias": { "min": -0.04725177586078644, "max": 0.05147033557295799, "mean": 0.00048084836453199387, "std": 0.01352026965469122, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21421915292739868, "max": 0.21782870590686798, "mean": 5.651723040500656e-05, "std": 0.03361982852220535, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.2116560935974121, "max": 0.23178474605083466, "mean": -0.005108034238219261, "std": 0.06190710514783859, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.3.g": { "min": 0.3619433343410492, "max": 1.1028457880020142, "mean": 0.6994728446006775, "std": 0.05383099243044853, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.2347707897424698, "max": 0.24507476389408112, "mean": 0.00046346502494998276, "std": 0.041274722665548325, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.098201684653759, "max": 0.06837960332632065, "mean": -0.031449105590581894, "std": 0.01813678629696369, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.10.4.ff.2.weight": { "min": -0.3019881546497345, "max": 0.351855993270874, "mean": -8.162805897882208e-05, "std": 0.040280550718307495, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.10.4.ff.2.bias": { "min": -0.1525154411792755, "max": 0.14985136687755585, "mean": 0.0002546610194258392, "std": 0.02304759994149208, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.1.g": { "min": 0.9988279342651367, "max": 1.0030174255371094, "mean": 1.0003814697265625, "std": 0.0010646688751876354, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_q.weight": { "min": -0.03128192201256752, "max": 0.031278640031814575, "mean": -1.9287415852886625e-05, "std": 0.01804400235414505, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_q.bias": { "min": -0.031218387186527252, "max": 0.03101835958659649, "mean": -0.0010843591298907995, "std": 0.01795342192053795, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_k.weight": { "min": -0.031292207539081573, "max": 0.03128044679760933, "mean": 3.544726496329531e-06, "std": 0.018044408410787582, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_k.bias": { "min": -0.031148849055171013, "max": 0.031187163665890694, "mean": 0.000333936681272462, "std": 0.01806570589542389, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.11.2.to_out.0.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.11.3.g": { "min": 0.9988681674003601, "max": 1.0030490159988403, "mean": 1.0004115104675293, "std": 0.0010549556463956833, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.031293854117393494, "max": 0.03129155561327934, "mean": -8.391638402827084e-06, "std": 0.018043123185634613, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.031248562037944794, "max": 0.03123636171221733, "mean": 0.00015367052401416004, "std": 0.017994463443756104, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.11.4.ff.2.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 4096 ] }, "transformer.layers.11.4.ff.2.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.12.1.g": { "min": 0.382835328578949, "max": 0.7205657362937927, "mean": 0.5808628797531128, "std": 0.03902854025363922, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_q.weight": { "min": -0.23823925852775574, "max": 0.1967414915561676, "mean": 2.6552535928203724e-05, "std": 0.03746962919831276, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_q.bias": { "min": -0.11881034076213837, "max": 0.16626670956611633, "mean": 0.000991516513749957, "std": 0.027575215324759483, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_k.weight": { "min": -0.24632981419563293, "max": 0.5012024641036987, "mean": -5.04429881402757e-05, "std": 0.03762752190232277, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_k.bias": { "min": -3.946824312210083, "max": 3.773773670196533, "mean": -0.0035694693215191364, "std": 0.6819667816162109, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_v.weight": { "min": -0.22745896875858307, "max": 0.2515793740749359, "mean": -1.1545061170181725e-05, "std": 0.03743903711438179, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_v.bias": { "min": -0.07179750502109528, "max": 0.0807880237698555, "mean": -0.0005204002372920513, "std": 0.015668606385588646, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22822564840316772, "max": 0.25826144218444824, "mean": -2.862494147848338e-05, "std": 0.03542570024728775, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.2006409764289856, "max": 0.21548894047737122, "mean": -0.005540885496884584, "std": 0.06836719810962677, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.3.g": { "min": 0.40525123476982117, "max": 1.1910948753356934, "mean": 0.7381879091262817, "std": 0.05550322309136391, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.2213674634695053, "max": 0.2461645007133484, "mean": 0.0005210727686062455, "std": 0.04134247452020645, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10357673466205597, "max": 0.02419574372470379, "mean": -0.03268023580312729, "std": 0.01890200562775135, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.12.4.ff.2.weight": { "min": -0.44974827766418457, "max": 0.42273736000061035, "mean": -0.00043248123256489635, "std": 0.046903859823942184, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.12.4.ff.2.bias": { "min": -0.2517695128917694, "max": 0.4706769287586212, "mean": 0.003199656493961811, "std": 0.04457153007388115, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.0.weight": { "min": -0.3170992434024811, "max": 0.333298921585083, "mean": -2.5289473342127167e-05, "std": 0.021290816366672516, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.13.1.g": { "min": 0.32478415966033936, "max": 0.6863877177238464, "mean": 0.5711605548858643, "std": 0.04484730586409569, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_q.weight": { "min": -0.1647796630859375, "max": 0.17416934669017792, "mean": -4.8634105041855946e-05, "std": 0.03318461403250694, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_q.bias": { "min": -0.1870798021554947, "max": 0.14308109879493713, "mean": 3.898901923093945e-05, "std": 0.02971462905406952, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_k.weight": { "min": -0.38088855147361755, "max": 0.2463647872209549, "mean": -9.938010407495312e-06, "std": 0.03276585787534714, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_k.bias": { "min": -3.6601390838623047, "max": 3.2940189838409424, "mean": -0.01424746960401535, "std": 0.9857901930809021, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_v.weight": { "min": -0.2351982444524765, "max": 0.24773260951042175, "mean": -1.7793041479308158e-05, "std": 0.04170281067490578, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_v.bias": { "min": -0.07287801802158356, "max": 0.15471716225147247, "mean": 0.0006660239887423813, "std": 0.025180837139487267, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.26665613055229187, "max": 0.24858269095420837, "mean": -1.5366244042525068e-05, "std": 0.04014318436384201, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18983444571495056, "max": 0.1949683576822281, "mean": -0.0012304731644690037, "std": 0.06671547889709473, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.3.g": { "min": 0.32925084233283997, "max": 1.0009599924087524, "mean": 0.7193903923034668, "std": 0.052590519189834595, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.23175209760665894, "max": 0.24594298005104065, "mean": 0.00018278483184985816, "std": 0.04090619832277298, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11433617770671844, "max": 0.018662281334400177, "mean": -0.04249466210603714, "std": 0.01887579821050167, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.13.4.ff.2.weight": { "min": -0.3903564512729645, "max": 0.4076610803604126, "mean": -2.190250415878836e-05, "std": 0.04854064807295799, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.13.4.ff.2.bias": { "min": -0.694047212600708, "max": 0.413125216960907, "mean": 0.000851891003549099, "std": 0.06033211946487427, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.0.weight": { "min": 0.0, "max": 0.9999971389770508, "mean": 0.0004882798530161381, "std": 0.022091632708907127, "sparsity": 0.99951171875, "shape": [ 1024, 2048 ] }, "transformer.layers.14.1.g": { "min": 0.9987401366233826, "max": 1.0030049085617065, "mean": 1.0003970861434937, "std": 0.0010890224948525429, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_q.weight": { "min": -0.03128720819950104, "max": 0.03127748519182205, "mean": -2.1021871361881495e-05, "std": 0.018035341054201126, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_q.bias": { "min": -0.031208951026201248, "max": 0.0312366746366024, "mean": -0.0006772055057808757, "std": 0.01782999187707901, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_k.weight": { "min": -0.03128505125641823, "max": 0.0312827005982399, "mean": -8.840423106448725e-06, "std": 0.01803436689078808, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_k.bias": { "min": -0.031223762780427933, "max": 0.031257808208465576, "mean": -0.0007298105047084391, "std": 0.017944179475307465, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ] }, "transformer.layers.14.2.to_out.0.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.14.3.g": { "min": 0.9988026022911072, "max": 1.0031852722167969, "mean": 1.0003986358642578, "std": 0.0010702211875468493, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.03128661960363388, "max": 0.03128815069794655, "mean": 3.5941102396463975e-06, "std": 0.01804072968661785, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.03123682737350464, "max": 0.03124977834522724, "mean": 0.00019563926616683602, "std": 0.018076641485095024, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.14.4.ff.2.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024, 4096 ] }, "transformer.layers.14.4.ff.2.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "sparsity": 1.0, "shape": [ 1024 ] }, "transformer.layers.15.0.weight": { "min": -0.2346186488866806, "max": 0.27259576320648193, "mean": 6.985836080275476e-06, "std": 0.01881217770278454, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.15.1.g": { "min": 0.3213435411453247, "max": 0.6945998072624207, "mean": 0.5817909240722656, "std": 0.04608319699764252, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_q.weight": { "min": -0.18191689252853394, "max": 0.19781433045864105, "mean": -1.1746024938474875e-05, "std": 0.03318719565868378, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_q.bias": { "min": -0.16086804866790771, "max": 0.1296302229166031, "mean": -0.0010684699518606067, "std": 0.034163739532232285, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_k.weight": { "min": -0.33239439129829407, "max": 0.31163647770881653, "mean": -1.0337707863072865e-05, "std": 0.03223792091012001, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_k.bias": { "min": -7.812414169311523, "max": 8.773359298706055, "mean": 0.09355923533439636, "std": 1.6210812330245972, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_v.weight": { "min": -0.23393671214580536, "max": 0.24211150407791138, "mean": 4.141662793699652e-05, "std": 0.04086197167634964, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_v.bias": { "min": -0.07609452307224274, "max": 0.06586258113384247, "mean": 0.00047865102533251047, "std": 0.01942458190023899, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.24614335596561432, "max": 0.23432280123233795, "mean": -2.907749149017036e-06, "std": 0.03943663462996483, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16305704414844513, "max": 0.1610053926706314, "mean": 0.0016310829669237137, "std": 0.06529799103736877, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.3.g": { "min": 0.5568323135375977, "max": 0.9453117847442627, "mean": 0.7130987644195557, "std": 0.040391918271780014, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.2288832664489746, "max": 0.25533148646354675, "mean": -4.5479209802579135e-05, "std": 0.04058132693171501, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13495926558971405, "max": 0.022289777174592018, "mean": -0.0413689985871315, "std": 0.018403179943561554, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.15.4.ff.2.weight": { "min": -0.4220907390117645, "max": 0.3925161063671112, "mean": -4.4413791329134256e-06, "std": 0.04779106378555298, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6081769466400146, "max": 0.652148425579071, "mean": 0.001585810212418437, "std": 0.05687166377902031, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.0.weight": { "min": -0.2517467141151428, "max": 0.32074928283691406, "mean": -6.074779776099604e-06, "std": 0.019615592435002327, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.16.1.g": { "min": 0.36013174057006836, "max": 0.6833459138870239, "mean": 0.570884644985199, "std": 0.04308824613690376, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_q.weight": { "min": -0.22070643305778503, "max": 0.17717598378658295, "mean": -3.468842260190286e-05, "std": 0.03430233895778656, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_q.bias": { "min": -0.16383720934391022, "max": 0.23332805931568146, "mean": 0.0003637468325905502, "std": 0.032890770584344864, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_k.weight": { "min": -0.26396337151527405, "max": 0.2400342971086502, "mean": -5.2375002269400284e-05, "std": 0.03390149027109146, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_k.bias": { "min": -4.860640525817871, "max": 5.097131252288818, "mean": 0.04391013830900192, "std": 1.2302772998809814, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_v.weight": { "min": -0.24682015180587769, "max": 0.25062263011932373, "mean": 7.221732084872201e-05, "std": 0.043993160128593445, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_v.bias": { "min": -0.06271186470985413, "max": 0.05459222570061684, "mean": 0.0006507715443149209, "std": 0.017198268324136734, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.2868800759315491, "max": 0.271938681602478, "mean": -4.989939043298364e-05, "std": 0.04299154132604599, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16084273159503937, "max": 0.1707206517457962, "mean": -0.002884692046791315, "std": 0.059305742383003235, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.3.g": { "min": 0.51964271068573, "max": 0.9341827630996704, "mean": 0.7137263417243958, "std": 0.038649603724479675, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23825131356716156, "max": 0.24959467351436615, "mean": 0.00046492042019963264, "std": 0.04046143591403961, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14443093538284302, "max": 0.04144603759050369, "mean": -0.039705902338027954, "std": 0.020563002675771713, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5333583354949951, "max": 0.5836927890777588, "mean": 5.9018666433985345e-06, "std": 0.048868328332901, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5197700262069702, "max": 0.4940829873085022, "mean": 0.0023609776981174946, "std": 0.05347929149866104, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.0.weight": { "min": -0.27364596724510193, "max": 0.3152502179145813, "mean": 1.8441196516505443e-06, "std": 0.02005275897681713, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.17.1.g": { "min": 0.36628827452659607, "max": 0.7126691937446594, "mean": 0.5933467149734497, "std": 0.046086061745882034, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_q.weight": { "min": -0.21118636429309845, "max": 0.19975997507572174, "mean": 3.079167436226271e-05, "std": 0.0348685048520565, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_q.bias": { "min": -0.18748052418231964, "max": 0.2042539119720459, "mean": 0.000956728239543736, "std": 0.03154991194605827, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_k.weight": { "min": -0.28994736075401306, "max": 0.3401152789592743, "mean": -4.7362642362713814e-05, "std": 0.03458964452147484, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_k.bias": { "min": -3.881758451461792, "max": 3.3913075923919678, "mean": 0.014463461004197598, "std": 0.8590267896652222, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_v.weight": { "min": -0.22456875443458557, "max": 0.2500464916229248, "mean": -3.998348802269902e-06, "std": 0.042235810309648514, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_v.bias": { "min": -0.05513551086187363, "max": 0.046896424144506454, "mean": -1.89729908015579e-05, "std": 0.01585385575890541, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2930184602737427, "max": 0.2910744249820709, "mean": -7.35160028852988e-06, "std": 0.041950810700654984, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12501806020736694, "max": 0.2597162425518036, "mean": -0.003234931267797947, "std": 0.05317143350839615, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.3.g": { "min": 0.4562249779701233, "max": 0.8457176685333252, "mean": 0.705817699432373, "std": 0.035453151911497116, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5114459991455078, "max": 0.3485345244407654, "mean": 0.0003425978356972337, "std": 0.04020640254020691, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.1872977465391159, "max": 0.039509162306785583, "mean": -0.03940243646502495, "std": 0.02136845327913761, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5449100136756897, "max": 0.5570695400238037, "mean": -7.181215914897621e-05, "std": 0.05074289068579674, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5124268531799316, "max": 0.6651233434677124, "mean": 0.002447479637339711, "std": 0.04955451935529709, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.0.weight": { "min": -0.33246591687202454, "max": 0.2658751308917999, "mean": 3.69829467672389e-06, "std": 0.019390346482396126, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.18.1.g": { "min": 0.3222673833370209, "max": 0.7674033641815186, "mean": 0.6512042284011841, "std": 0.04545491561293602, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_q.weight": { "min": -0.2496984899044037, "max": 0.21969059109687805, "mean": -2.5450863176956773e-06, "std": 0.03650245815515518, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_q.bias": { "min": -0.32755619287490845, "max": 0.28763604164123535, "mean": -0.0006797901587560773, "std": 0.03858839347958565, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_k.weight": { "min": -0.3103632628917694, "max": 0.3702820837497711, "mean": 6.481494347099215e-05, "std": 0.03624306991696358, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_k.bias": { "min": -4.7229533195495605, "max": 5.8144097328186035, "mean": 0.03798435255885124, "std": 1.4144145250320435, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_v.weight": { "min": -0.2220195233821869, "max": 0.20613467693328857, "mean": -7.503894448745996e-05, "std": 0.04249141365289688, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_v.bias": { "min": -0.07768063247203827, "max": 0.051408518105745316, "mean": -0.0009253580356016755, "std": 0.01641588658094406, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.3309858441352844, "max": 0.3291884660720825, "mean": -4.9612558541412e-06, "std": 0.04279816150665283, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2853319048881531, "max": 0.11173354089260101, "mean": -0.001206716988235712, "std": 0.04702756926417351, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.3.g": { "min": 0.48654904961586, "max": 0.88804692029953, "mean": 0.7376827001571655, "std": 0.03842971473932266, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3613007962703705, "max": 0.27439025044441223, "mean": 5.118318586028181e-05, "std": 0.04065314307808876, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.2479037493467331, "max": 0.046517688781023026, "mean": -0.039281267672777176, "std": 0.023276478052139282, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.18.4.ff.2.weight": { "min": -0.6269151568412781, "max": 0.5976049900054932, "mean": -6.191668217070401e-05, "std": 0.053125977516174316, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7105785608291626, "max": 0.26612961292266846, "mean": 0.0009194647427648306, "std": 0.051263753324747086, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.0.weight": { "min": -0.3433726131916046, "max": 0.3034554719924927, "mean": 2.0521497390291188e-07, "std": 0.019139625132083893, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.19.1.g": { "min": 0.3501395285129547, "max": 0.783959686756134, "mean": 0.6390355825424194, "std": 0.049371764063835144, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_q.weight": { "min": -0.20602361857891083, "max": 0.20698852837085724, "mean": -5.9928101109107956e-05, "std": 0.037698496133089066, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_q.bias": { "min": -0.25897642970085144, "max": 0.268706738948822, "mean": -0.00040520128095522523, "std": 0.044660814106464386, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_k.weight": { "min": -0.35453060269355774, "max": 0.3229123651981354, "mean": -7.312092748179566e-06, "std": 0.03720676898956299, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_k.bias": { "min": -5.267762184143066, "max": 4.20961332321167, "mean": -0.026448804885149002, "std": 1.0076419115066528, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_v.weight": { "min": -0.23904970288276672, "max": 0.24397821724414825, "mean": -2.552817386458628e-05, "std": 0.04321575164794922, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_v.bias": { "min": -0.06249221786856651, "max": 0.05668818950653076, "mean": 0.0003517880686558783, "std": 0.01415390707552433, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.43751028180122375, "max": 0.3737626075744629, "mean": 1.4619375178881455e-05, "std": 0.04412780702114105, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.0962304174900055, "max": 0.1764947772026062, "mean": -0.0006597821484319866, "std": 0.03515012562274933, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.3.g": { "min": 0.4218544661998749, "max": 1.0707522630691528, "mean": 0.7486886978149414, "std": 0.04222184792160988, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.2660444378852844, "max": 0.2971097230911255, "mean": -7.88940378697589e-05, "std": 0.04081380367279053, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18505463004112244, "max": 0.04312760382890701, "mean": -0.03682396560907364, "std": 0.025607850402593613, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.19.4.ff.2.weight": { "min": -0.4577805697917938, "max": 0.48729538917541504, "mean": 4.396865551825613e-05, "std": 0.05422099307179451, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.19.4.ff.2.bias": { "min": -0.2866191267967224, "max": 0.5523927807807922, "mean": -0.0008822673698887229, "std": 0.04786074161529541, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.0.weight": { "min": -0.29266098141670227, "max": 0.3227379322052002, "mean": 6.034013495082036e-06, "std": 0.01997271552681923, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.20.1.g": { "min": 0.2912082076072693, "max": 0.7611724734306335, "mean": 0.6509549617767334, "std": 0.05223819240927696, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_q.weight": { "min": -0.2437622845172882, "max": 0.2617740035057068, "mean": -5.626710844808258e-06, "std": 0.03961407393217087, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_q.bias": { "min": -0.2678508758544922, "max": 0.20037643611431122, "mean": -0.0008778825285844505, "std": 0.051807109266519547, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_k.weight": { "min": -0.2725064158439636, "max": 0.2540656328201294, "mean": 5.306316325004445e-06, "std": 0.03871078044176102, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_k.bias": { "min": -12.980466842651367, "max": 15.965588569641113, "mean": 0.03327019512653351, "std": 1.9910999536514282, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_v.weight": { "min": -0.20688198506832123, "max": 0.22597242891788483, "mean": -7.254729280248284e-05, "std": 0.04055875167250633, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_v.bias": { "min": -0.069511778652668, "max": 0.06321422755718231, "mean": 0.00015925483603496104, "std": 0.01475309394299984, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.46553534269332886, "max": 0.32018300890922546, "mean": 1.9559764041332528e-05, "std": 0.040594302117824554, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06415701657533646, "max": 0.11569144576787949, "mean": 0.0011994449887424707, "std": 0.024716829881072044, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.3.g": { "min": 0.37491846084594727, "max": 0.9332267045974731, "mean": 0.7511833310127258, "std": 0.04030444473028183, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.2793797552585602, "max": 0.2735174894332886, "mean": -0.00016838237934280187, "std": 0.04100488871335983, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19866259396076202, "max": 0.05138175189495087, "mean": -0.03203893452882767, "std": 0.025100193917751312, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.20.4.ff.2.weight": { "min": -0.6591871976852417, "max": 0.5361859798431396, "mean": -5.0474118324927986e-05, "std": 0.0528571642935276, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.20.4.ff.2.bias": { "min": -0.19288860261440277, "max": 0.582888662815094, "mean": -0.0005087298923172057, "std": 0.0411086231470108, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.0.weight": { "min": -0.41760918498039246, "max": 0.3719828724861145, "mean": 6.52037670079153e-06, "std": 0.02162792719900608, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.21.1.g": { "min": 0.21464084088802338, "max": 0.7477675080299377, "mean": 0.6495819687843323, "std": 0.054441265761852264, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_q.weight": { "min": -0.20966503024101257, "max": 0.1956944614648819, "mean": 4.008584801340476e-05, "std": 0.039459552615880966, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_q.bias": { "min": -0.32997503876686096, "max": 0.25995907187461853, "mean": -0.0032368863467127085, "std": 0.05632346495985985, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_k.weight": { "min": -0.20606832206249237, "max": 0.2548881471157074, "mean": 5.397828499553725e-05, "std": 0.03856222704052925, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_k.bias": { "min": -6.250948905944824, "max": 6.940567493438721, "mean": 0.048394568264484406, "std": 1.3862435817718506, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_v.weight": { "min": -0.20990008115768433, "max": 0.23062950372695923, "mean": -4.797322617378086e-06, "std": 0.04131775721907616, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_v.bias": { "min": -0.043879762291908264, "max": 0.03602854162454605, "mean": -6.735368515364826e-06, "std": 0.012802576646208763, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.3975800573825836, "max": 0.3450191617012024, "mean": -5.543587758438662e-05, "std": 0.04239463433623314, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.055230122059583664, "max": 0.06288789957761765, "mean": 0.00035758066223934293, "std": 0.018682915717363358, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.3.g": { "min": 0.35092663764953613, "max": 1.0465692281723022, "mean": 0.7897400856018066, "std": 0.04884057492017746, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.33373889327049255, "max": 0.3863142132759094, "mean": -0.00016909500118345022, "std": 0.04149040952324867, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15769430994987488, "max": 0.059132885187864304, "mean": -0.03183465823531151, "std": 0.025120330974459648, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6965411305427551, "max": 0.46967917680740356, "mean": -8.504216384608299e-05, "std": 0.05180637910962105, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24813847243785858, "max": 0.3292423188686371, "mean": -0.00026213712408207357, "std": 0.041475165635347366, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.0.weight": { "min": -0.2870347499847412, "max": 0.3504159152507782, "mean": -2.7635057904262794e-06, "std": 0.024241114035248756, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.22.1.g": { "min": 0.1968143880367279, "max": 0.7801634073257446, "mean": 0.67032390832901, "std": 0.058765437453985214, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_q.weight": { "min": -0.22936369478702545, "max": 0.23155838251113892, "mean": -2.0868072169832885e-05, "std": 0.0404399111866951, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_q.bias": { "min": -0.22028712928295135, "max": 0.2412400096654892, "mean": 0.0007798401638865471, "std": 0.05588255077600479, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_k.weight": { "min": -0.21693190932273865, "max": 0.2265695184469223, "mean": -7.217879465315491e-05, "std": 0.039374105632305145, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_k.bias": { "min": -8.916163444519043, "max": 9.079217910766602, "mean": -0.0012825923040509224, "std": 1.8500556945800781, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_v.weight": { "min": -0.269673228263855, "max": 0.2592774033546448, "mean": 4.366856592241675e-05, "std": 0.038410674780607224, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_v.bias": { "min": -0.05804191157221794, "max": 0.05804998800158501, "mean": 0.0003545111685525626, "std": 0.014721807092428207, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.2641296982765198, "max": 0.2882002294063568, "mean": -6.158516043797135e-05, "std": 0.039077457040548325, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.044157613068819046, "max": 0.03739722818136215, "mean": -9.842761210165918e-05, "std": 0.013352800160646439, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.3.g": { "min": 0.3394981026649475, "max": 1.0940546989440918, "mean": 0.8640274405479431, "std": 0.06395779550075531, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.42318135499954224, "max": 0.41912782192230225, "mean": 0.0003136250888928771, "std": 0.04351290315389633, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.215034618973732, "max": 0.17091527581214905, "mean": -0.02945549227297306, "std": 0.031898606568574905, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5991082191467285, "max": 0.5603575706481934, "mean": -0.0001479926722822711, "std": 0.05346138775348663, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17912201583385468, "max": 0.3778008818626404, "mean": 0.0013520645443350077, "std": 0.037332892417907715, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.0.weight": { "min": -0.3943796157836914, "max": 0.3688676655292511, "mean": 3.761224070331082e-05, "std": 0.028617393225431442, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.23.1.g": { "min": 0.2908812463283539, "max": 0.8286238312721252, "mean": 0.7055914402008057, "std": 0.06791043281555176, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_q.weight": { "min": -0.9266071319580078, "max": 1.0270264148712158, "mean": -2.7955527912126854e-05, "std": 0.0476437471807003, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_q.bias": { "min": -0.8803294897079468, "max": 0.8167775273323059, "mean": -0.0002962773141916841, "std": 0.09563106298446655, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_k.weight": { "min": -0.27031898498535156, "max": 0.24110636115074158, "mean": -2.252469494123943e-05, "std": 0.03894982486963272, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_k.bias": { "min": -23.770000457763672, "max": 22.87746810913086, "mean": -0.09194529056549072, "std": 4.074869632720947, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_v.weight": { "min": -0.22796331346035004, "max": 0.2458551675081253, "mean": -2.5422079488635063e-05, "std": 0.038641415536403656, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_v.bias": { "min": -0.060239437967538834, "max": 0.045478228479623795, "mean": -0.00013640533143188804, "std": 0.01469514612108469, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.33809611201286316, "max": 0.3752952516078949, "mean": 7.530758921348024e-06, "std": 0.040820345282554626, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.04625128582119942, "max": 0.1955953687429428, "mean": 0.0002734389272518456, "std": 0.013558450154960155, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.3.g": { "min": 0.37381020188331604, "max": 1.1318634748458862, "mean": 0.8903213143348694, "std": 0.0641312375664711, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.447549432516098, "max": 0.5427570939064026, "mean": 2.5110648493864574e-05, "std": 0.04558061435818672, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22403661906719208, "max": 0.08747347444295883, "mean": -0.03202786669135094, "std": 0.037772756069898605, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7269205451011658, "max": 0.6894555687904358, "mean": 3.6393928894540295e-05, "std": 0.05179436132311821, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.23.4.ff.2.bias": { "min": -0.1745767593383789, "max": 0.21847710013389587, "mean": 3.5673321690410376e-05, "std": 0.03179144486784935, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.0.weight": { "min": -0.339706152677536, "max": 0.37326323986053467, "mean": 4.3032145185861737e-05, "std": 0.03413531556725502, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.24.1.g": { "min": 0.3174583911895752, "max": 1.2890191078186035, "mean": 0.601619303226471, "std": 0.08366930484771729, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_q.weight": { "min": -0.2832256853580475, "max": 0.26046571135520935, "mean": -2.993364205394755e-06, "std": 0.03598063439130783, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_q.bias": { "min": -0.2360483556985855, "max": 0.20603413879871368, "mean": 0.00023948654416017234, "std": 0.05606625974178314, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_k.weight": { "min": -0.4355963468551636, "max": 0.32496193051338196, "mean": 2.4223818400059827e-05, "std": 0.034124087542295456, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_k.bias": { "min": -5.552776336669922, "max": 7.322168350219727, "mean": -0.00738462433218956, "std": 0.7001185417175293, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_v.weight": { "min": -0.34443002939224243, "max": 0.3632832467556, "mean": 0.00010313428356312215, "std": 0.047836337238550186, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_v.bias": { "min": -0.07385826855897903, "max": 0.06043381989002228, "mean": 0.0009369200561195612, "std": 0.014941117726266384, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.2565152943134308, "max": 0.28712597489356995, "mean": 4.846529918722808e-06, "std": 0.041564520448446274, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.05538159981369972, "max": 0.06288077682256699, "mean": 0.00012733059702441096, "std": 0.007154808379709721, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.3.g": { "min": 0.49408578872680664, "max": 1.2223646640777588, "mean": 1.013702154159546, "std": 0.11764581501483917, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0940581560134888, "max": 1.0475841760635376, "mean": -4.863579306402244e-05, "std": 0.0524178184568882, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22388966381549835, "max": 0.1732550710439682, "mean": -0.027240199968218803, "std": 0.03634064644575119, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8849446177482605, "max": 0.9234321713447571, "mean": -0.0001459874474676326, "std": 0.05329861491918564, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.24.4.ff.2.bias": { "min": -0.17124590277671814, "max": 0.38005468249320984, "mean": 0.0033688729163259268, "std": 0.03990017995238304, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.0.weight": { "min": -0.7780460119247437, "max": 0.722984254360199, "mean": 1.8001555872615427e-05, "std": 0.046154171228408813, "sparsity": 0.0, "shape": [ 1024, 2048 ] }, "transformer.layers.25.1.g": { "min": 0.33841073513031006, "max": 1.4301798343658447, "mean": 0.9487167596817017, "std": 0.20710234344005585, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_q.weight": { "min": -1.7458388805389404, "max": 1.704530119895935, "mean": 0.000226972799282521, "std": 0.15870548784732819, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_q.bias": { "min": -1.2008311748504639, "max": 1.1021909713745117, "mean": -0.009556617587804794, "std": 0.20411409437656403, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_k.weight": { "min": -0.4210456311702728, "max": 0.4282980263233185, "mean": 6.39081554254517e-05, "std": 0.04802015796303749, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_k.bias": { "min": -19.769929885864258, "max": 19.564817428588867, "mean": -0.24858255684375763, "std": 4.782279968261719, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_v.weight": { "min": -0.3241115212440491, "max": 0.43888670206069946, "mean": -1.1728005119948648e-05, "std": 0.04616701602935791, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_v.bias": { "min": -0.03380877524614334, "max": 0.036888398230075836, "mean": 0.0006396375247277319, "std": 0.012913818471133709, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7038182020187378, "max": 0.6691953539848328, "mean": 4.2681567720137537e-05, "std": 0.05789203941822052, "sparsity": 0.0, "shape": [ 1024, 1024 ] }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.07242082059383392, "max": 0.06784311681985855, "mean": -0.000134931382490322, "std": 0.01290101557970047, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.3.g": { "min": 0.3802341818809509, "max": 1.39493727684021, "mean": 1.0668972730636597, "std": 0.21994373202323914, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6166523694992065, "max": 0.7187345623970032, "mean": 0.0001129009760916233, "std": 0.0580277256667614, "sparsity": 0.0, "shape": [ 4096, 1024 ] }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.21905651688575745, "max": 0.22523820400238037, "mean": 0.006192180328071117, "std": 0.049731798470020294, "sparsity": 0.0, "shape": [ 4096 ] }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6298801898956299, "max": 0.8897404074668884, "mean": 1.237633296113927e-05, "std": 0.023545268923044205, "sparsity": 0.0, "shape": [ 1024, 4096 ] }, "transformer.layers.25.4.ff.2.bias": { "min": -0.5074089765548706, "max": 0.4742584228515625, "mean": -0.0030243899673223495, "std": 0.06931118667125702, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.norm_out.g": { "min": 0.5381640791893005, "max": 1.182090163230896, "mean": 0.7830706238746643, "std": 0.09912356734275818, "sparsity": 0.0, "shape": [ 1024 ] }, "transformer.proj_out.weight": { "min": -0.2673421800136566, "max": 0.21319416165351868, "mean": -0.0002236190193798393, "std": 0.05400572717189789, "sparsity": 0.0, "shape": [ 100, 1024 ] }, "transformer.proj_out.bias": { "min": -0.23863200843334198, "max": 0.014863962307572365, "mean": -0.04393288493156433, "std": 0.03432033956050873, "sparsity": 0.0, "shape": [ 100 ] } } }