diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,4683 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "recommendations": { + "focus_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ] + }, + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.4310249388217926, + "max": 0.29892200231552124, + "mean": -0.0025504794903099537, + "std": 0.0425548329949379, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ] + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.06312082707881927, + "max": 0.10854886472225189, + "mean": 0.000634247378911823, + "std": 0.03414047509431839, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.4126858711242676, + "max": 0.8365619778633118, + "mean": -0.00020620696886908263, + "std": 0.02410798706114292, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.1163593977689743, + "max": 0.32443463802337646, + "mean": -0.0009363778517581522, + "std": 0.019653797149658203, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.8154137134552, + "max": 2.8935482501983643, + "mean": -0.0003568639513105154, + "std": 0.6153793334960938, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ] + }, + "transformer.input_embed.proj.weight": { + "min": -0.2813769578933716, + "max": 0.38245514035224915, + "mean": 0.00042411635513417423, + "std": 0.04274803400039673, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ] + }, + "transformer.input_embed.proj.bias": { + "min": -0.22421328723430634, + "max": 0.21138469874858856, + "mean": -0.004506870172917843, + "std": 0.04105628281831741, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.4279993176460266, + "max": 0.47548574209213257, + "mean": 4.261187768861419e-06, + "std": 0.02450713701546192, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.327997088432312, + "max": 0.15884317457675934, + "mean": -0.04679153859615326, + "std": 0.05176762491464615, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.4111199676990509, + "max": 0.35511136054992676, + "mean": -0.00012967045768164098, + "std": 0.02359858900308609, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ] + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.23166728019714355, + "max": 0.26478779315948486, + "mean": -0.029217107221484184, + "std": 0.0495423898100853, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.1.g": { + "min": 0.2546941041946411, + "max": 0.8268164992332458, + "mean": 0.5258853435516357, + "std": 0.08176200091838837, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.29768767952919006, + "max": 0.26705101132392883, + "mean": -0.00042415110510773957, + "std": 0.03210066258907318, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.09323342144489288, + "max": 0.12589719891548157, + "mean": 0.0006516888970509171, + "std": 0.02578314207494259, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.2915492653846741, + "max": 0.2830723226070404, + "mean": -7.510973955504596e-05, + "std": 0.03093201108276844, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.933852195739746, + "max": 5.848132610321045, + "mean": -0.009441309608519077, + "std": 1.2997525930404663, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.4259975850582123, + "max": 0.34512922167778015, + "mean": 9.808027243707329e-05, + "std": 0.029951922595500946, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.028870832175016403, + "max": 0.027608035132288933, + "mean": -0.0003159761254210025, + "std": 0.012566526420414448, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.4554309844970703, + "max": 0.44925424456596375, + "mean": 2.2834456103737466e-05, + "std": 0.023853331804275513, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.08927308022975922, + "max": 0.09165928512811661, + "mean": 0.002274596830829978, + "std": 0.019546369090676308, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.3.g": { + "min": 0.26676347851753235, + "max": 1.06475031375885, + "mean": 0.5317091345787048, + "std": 0.1056147962808609, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5758013129234314, + "max": 0.60973060131073, + "mean": -0.00043392262887209654, + "std": 0.03859521821141243, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.18311595916748047, + "max": 0.045692577958106995, + "mean": -0.02953081764280796, + "std": 0.04277201369404793, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.169153094291687, + "max": 1.6363517045974731, + "mean": 0.00031960621709004045, + "std": 0.027692886069417, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.16331635415554047, + "max": 0.20692557096481323, + "mean": -0.02113202027976513, + "std": 0.0279996357858181, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.1.g": { + "min": 0.22424264252185822, + "max": 0.8506074547767639, + "mean": 0.487909197807312, + "std": 0.0759621262550354, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.25719332695007324, + "max": 0.3069766163825989, + "mean": -8.219409210141748e-06, + "std": 0.033469025045633316, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.0958663746714592, + "max": 0.1111140251159668, + "mean": 6.868487980682403e-05, + "std": 0.02699616365134716, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.2987782061100006, + "max": 0.2982846796512604, + "mean": 5.100301495986059e-05, + "std": 0.03253886476159096, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.194380760192871, + "max": 5.11414098739624, + "mean": -0.01477175671607256, + "std": 1.1622190475463867, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.3454170525074005, + "max": 0.3440503478050232, + "mean": 7.885548257036135e-05, + "std": 0.03005816601216793, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.036366600543260574, + "max": 0.033365145325660706, + "mean": -0.00014353547885548323, + "std": 0.013023492880165577, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.3166007697582245, + "max": 0.37669771909713745, + "mean": -2.1011579519836232e-05, + "std": 0.024054987356066704, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.10603390634059906, + "max": 0.12274863570928574, + "mean": -0.0019654321949929, + "std": 0.028894905000925064, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.3.g": { + "min": 0.311918169260025, + "max": 1.1306103467941284, + "mean": 0.666860818862915, + "std": 0.0989983081817627, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.8729648590087891, + "max": 0.6280122995376587, + "mean": 0.0016747020417824388, + "std": 0.047436561435461044, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.27260690927505493, + "max": 0.03427213430404663, + "mean": -0.04665624350309372, + "std": 0.04072800651192665, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.9236066937446594, + "max": 0.9658545255661011, + "mean": 0.0010218569077551365, + "std": 0.04070160537958145, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.14540822803974152, + "max": 0.07539817690849304, + "mean": -0.009104669094085693, + "std": 0.025749636813998222, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.1.g": { + "min": 0.23975443840026855, + "max": 0.7185607552528381, + "mean": 0.44753360748291016, + "std": 0.06007208302617073, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.2746535837650299, + "max": 0.2996414601802826, + "mean": 8.662165782880038e-06, + "std": 0.03547052666544914, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.11975187063217163, + "max": 0.11919566243886948, + "mean": 0.0007501145591959357, + "std": 0.02767573855817318, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.2831306457519531, + "max": 0.2817768156528473, + "mean": -7.67814417486079e-05, + "std": 0.035099856555461884, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.5266785621643066, + "max": 2.5387556552886963, + "mean": 0.026949256658554077, + "std": 0.5885584354400635, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.22260574996471405, + "max": 0.2732996642589569, + "mean": 2.9508364605135284e-06, + "std": 0.030731212347745895, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.0335291288793087, + "max": 0.031390510499477386, + "mean": 0.00011758864275179803, + "std": 0.012400473468005657, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.23621369898319244, + "max": 0.23289528489112854, + "mean": 5.6726221373537555e-05, + "std": 0.025696825236082077, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.13667543232440948, + "max": 0.12879958748817444, + "mean": -0.005504202097654343, + "std": 0.040019236505031586, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.3.g": { + "min": 0.35455986857414246, + "max": 1.1826062202453613, + "mean": 0.7107979655265808, + "std": 0.10437346249818802, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6191003918647766, + "max": 0.5564218759536743, + "mean": 0.0011606740299612284, + "std": 0.04611353576183319, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.19018200039863586, + "max": 0.02485579438507557, + "mean": -0.03489173576235771, + "std": 0.028727849945425987, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.1358468532562256, + "max": 0.9746898412704468, + "mean": 0.00035939598456025124, + "std": 0.04234171286225319, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.6019405722618103, + "max": 0.06334464251995087, + "mean": -0.00488577876240015, + "std": 0.028712771832942963, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.1.g": { + "min": 0.3755652904510498, + "max": 0.9507709741592407, + "mean": 0.5931843519210815, + "std": 0.0686625987291336, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.3929532766342163, + "max": 0.37091946601867676, + "mean": 7.025484228506684e-05, + "std": 0.03718522936105728, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11978376656770706, + "max": 0.13744011521339417, + "mean": 0.0009335688664577901, + "std": 0.029282478615641594, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.6229383945465088, + "max": 0.5121926069259644, + "mean": 1.5349294699262828e-05, + "std": 0.03643808513879776, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.242501258850098, + "max": 8.848700523376465, + "mean": -0.10966195166110992, + "std": 1.7074756622314453, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.2780378460884094, + "max": 0.24072492122650146, + "mean": 5.223074913374148e-05, + "std": 0.03261224925518036, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.05211928114295006, + "max": 0.03976155444979668, + "mean": 9.01424209587276e-05, + "std": 0.012970111332833767, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.23169712722301483, + "max": 0.23602090775966644, + "mean": -2.2195828933035955e-05, + "std": 0.029388954862952232, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.20550638437271118, + "max": 0.10590175539255142, + "mean": -0.004026752896606922, + "std": 0.03266817331314087, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.3.g": { + "min": 0.3396901488304138, + "max": 1.022835612297058, + "mean": 0.7008680701255798, + "std": 0.09710492938756943, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5683938264846802, + "max": 0.8381193280220032, + "mean": 0.00041519341175444424, + "std": 0.04229409247636795, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.21325451135635376, + "max": 0.03037591464817524, + "mean": -0.03223013877868652, + "std": 0.026610074564814568, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7600710391998291, + "max": 0.7236490845680237, + "mean": -1.6499760022270493e-05, + "std": 0.03683502599596977, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.26496192812919617, + "max": 0.10684733092784882, + "mean": -0.0030161943286657333, + "std": 0.028908496722579002, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.1.g": { + "min": 0.28418251872062683, + "max": 0.7011516094207764, + "mean": 0.499736487865448, + "std": 0.047200758010149, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.28040796518325806, + "max": 0.23536527156829834, + "mean": -0.00011076986265834421, + "std": 0.03875643387436867, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15493866801261902, + "max": 0.12730616331100464, + "mean": -0.002237653825432062, + "std": 0.03343982622027397, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.4170800745487213, + "max": 0.6621686220169067, + "mean": -1.8650103811523877e-05, + "std": 0.039095137268304825, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.2626214027404785, + "max": 4.750005722045898, + "mean": -0.020378686487674713, + "std": 1.0105632543563843, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.24659502506256104, + "max": 0.2085939198732376, + "mean": 4.402307604323141e-05, + "std": 0.033962100744247437, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.03477818891406059, + "max": 0.045115940272808075, + "mean": -1.805905776564032e-05, + "std": 0.012638943269848824, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.20247574150562286, + "max": 0.20785965025424957, + "mean": -2.8977701731491834e-05, + "std": 0.031019993126392365, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.2010650485754013, + "max": 0.11400442570447922, + "mean": -0.002901929896324873, + "std": 0.03455876186490059, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.3.g": { + "min": 0.3669453561306, + "max": 1.068376898765564, + "mean": 0.6706770658493042, + "std": 0.06678663939237595, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.4009625017642975, + "max": 0.5047707557678223, + "mean": -3.825509702437557e-05, + "std": 0.04113015532493591, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.12967447936534882, + "max": 0.026864072307944298, + "mean": -0.03057170659303665, + "std": 0.021967768669128418, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.4517863094806671, + "max": 0.4363614320755005, + "mean": 7.544152322225273e-05, + "std": 0.03489035367965698, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.2692056894302368, + "max": 0.07339853048324585, + "mean": -0.0010960557265207171, + "std": 0.023164359852671623, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.1.g": { + "min": 0.2873815894126892, + "max": 0.6924071311950684, + "mean": 0.5248355865478516, + "std": 0.048200905323028564, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.22408804297447205, + "max": 0.22555872797966003, + "mean": 1.55975158122601e-05, + "std": 0.038948412984609604, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.13717913627624512, + "max": 0.10996447503566742, + "mean": 0.00024089610087685287, + "std": 0.02930767834186554, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.37717288732528687, + "max": 0.43975257873535156, + "mean": -9.77939271251671e-06, + "std": 0.03928566351532936, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.868288516998291, + "max": 5.028470516204834, + "mean": 0.009761041030287743, + "std": 0.8478302955627441, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.22423577308654785, + "max": 0.221679225564003, + "mean": -3.3901324059115723e-07, + "std": 0.034409064799547195, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.0438535250723362, + "max": 0.03604500740766525, + "mean": -0.00025803165044635534, + "std": 0.0120812077075243, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.2146783322095871, + "max": 0.1904102861881256, + "mean": -1.7072843547794037e-05, + "std": 0.03153547644615173, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.18190543353557587, + "max": 0.12149464339017868, + "mean": -0.0023945681750774384, + "std": 0.04129800572991371, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.3.g": { + "min": 0.4226498305797577, + "max": 0.9518083333969116, + "mean": 0.6629198789596558, + "std": 0.057358019053936005, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.372251033782959, + "max": 0.47781607508659363, + "mean": -8.197914576157928e-05, + "std": 0.040889132767915726, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.20997951924800873, + "max": 0.027235740795731544, + "mean": -0.030272582545876503, + "std": 0.021444976329803467, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.34334975481033325, + "max": 0.7389779686927795, + "mean": 8.186099876184016e-05, + "std": 0.034765809774398804, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.2415534406900406, + "max": 0.050704218447208405, + "mean": -0.001192720839753747, + "std": 0.02049700915813446, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.1.g": { + "min": 0.3061361312866211, + "max": 0.6592679023742676, + "mean": 0.5253557562828064, + "std": 0.04659049212932587, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.3061867356300354, + "max": 0.2188880741596222, + "mean": 7.013476715655997e-05, + "std": 0.03949468210339546, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.15020529925823212, + "max": 0.13198836147785187, + "mean": 0.00033842536504380405, + "std": 0.030562784522771835, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.25926315784454346, + "max": 0.20377042889595032, + "mean": 3.10853029077407e-05, + "std": 0.039484549313783646, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.3498988151550293, + "max": 2.389754056930542, + "mean": -0.02631671540439129, + "std": 0.4510843753814697, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.19007518887519836, + "max": 0.2122075855731964, + "mean": 3.708741132868454e-05, + "std": 0.03479320555925369, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.03199063614010811, + "max": 0.03580143302679062, + "mean": -0.00019849740783683956, + "std": 0.012292149476706982, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.19011414051055908, + "max": 0.17155633866786957, + "mean": -6.832154031144455e-05, + "std": 0.0321698896586895, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.14033056795597076, + "max": 0.13829410076141357, + "mean": -0.0025126286782324314, + "std": 0.05131656676530838, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.3.g": { + "min": 0.4672001600265503, + "max": 0.9642724394798279, + "mean": 0.6692001819610596, + "std": 0.05353807285428047, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.32512417435646057, + "max": 0.3099176585674286, + "mean": -8.536699169781059e-07, + "std": 0.04094506427645683, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.12580342590808868, + "max": 0.025558948516845703, + "mean": -0.030726371333003044, + "std": 0.019892578944563866, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.44301649928092957, + "max": 0.448657363653183, + "mean": 9.49525274336338e-05, + "std": 0.03511860594153404, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.22610187530517578, + "max": 0.0521467961370945, + "mean": -0.0011865891283378005, + "std": 0.018514476716518402, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.1.g": { + "min": 0.3391834497451782, + "max": 0.7460214495658875, + "mean": 0.5588462352752686, + "std": 0.04179359972476959, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.2743752598762512, + "max": 0.27987486124038696, + "mean": 2.0352064893813804e-05, + "std": 0.04105662927031517, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.13770411908626556, + "max": 0.14076648652553558, + "mean": 0.0004916964680887759, + "std": 0.026698192581534386, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.4935597777366638, + "max": 0.3583414554595947, + "mean": 8.887881995178759e-05, + "std": 0.04069438576698303, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.311286687850952, + "max": 1.7559641599655151, + "mean": -0.02118358016014099, + "std": 0.5012499094009399, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.2191997468471527, + "max": 0.19883301854133606, + "mean": -4.048732444061898e-05, + "std": 0.03423238918185234, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.041594695299863815, + "max": 0.039164409041404724, + "mean": -0.00013954236055724323, + "std": 0.012892705388367176, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17905071377754211, + "max": 0.18448761105537415, + "mean": 4.79043010273017e-05, + "std": 0.03155573084950447, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.1810525357723236, + "max": 0.18478283286094666, + "mean": -0.0022157104685902596, + "std": 0.054884668439626694, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.3.g": { + "min": 0.47422513365745544, + "max": 1.034525752067566, + "mean": 0.6455625891685486, + "std": 0.05127067118883133, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.2727859616279602, + "max": 0.31039154529571533, + "mean": 0.00011223299225093797, + "std": 0.04068140313029289, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.10606198012828827, + "max": 0.026645641773939133, + "mean": -0.02954702451825142, + "std": 0.01799139380455017, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.34065425395965576, + "max": 0.33199548721313477, + "mean": 5.238396261120215e-05, + "std": 0.034412581473588943, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.18290212750434875, + "max": 0.042540330439805984, + "mean": -0.001063595642335713, + "std": 0.017244886606931686, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.1.g": { + "min": 0.32540637254714966, + "max": 0.6927012801170349, + "mean": 0.511530876159668, + "std": 0.037588104605674744, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.23500792682170868, + "max": 0.22661413252353668, + "mean": -3.6375215131556615e-05, + "std": 0.039175912737846375, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.11630432307720184, + "max": 0.1327952891588211, + "mean": 0.00015614689618814737, + "std": 0.02927626110613346, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.35499081015586853, + "max": 0.28717586398124695, + "mean": 7.152914804464672e-06, + "std": 0.03924452140927315, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.1564154624938965, + "max": 3.564419746398926, + "mean": -0.011666063219308853, + "std": 0.6851950883865356, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.21194273233413696, + "max": 0.21046526730060577, + "mean": 3.472749813226983e-05, + "std": 0.0344846174120903, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.03606359288096428, + "max": 0.0485043041408062, + "mean": 0.0007934037130326033, + "std": 0.01287116389721632, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.21187099814414978, + "max": 0.19423909485340118, + "mean": -1.3818132629239699e-06, + "std": 0.03169572353363037, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.1876450628042221, + "max": 0.1781487911939621, + "mean": -0.0028378514107316732, + "std": 0.05868522822856903, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.3.g": { + "min": 0.4746300280094147, + "max": 1.0532299280166626, + "mean": 0.6519026756286621, + "std": 0.0511440671980381, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.24888233840465546, + "max": 0.329919695854187, + "mean": 0.00018074009858537465, + "std": 0.04056980833411217, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.1257043331861496, + "max": 0.024808209389448166, + "mean": -0.03052573651075363, + "std": 0.01766115613281727, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.4241631031036377, + "max": 0.48552921414375305, + "mean": -1.5207942851702683e-06, + "std": 0.03539673238992691, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.15242178738117218, + "max": 0.0436730720102787, + "mean": 4.8590598453301936e-05, + "std": 0.01490879151970148, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.1.g": { + "min": 0.3154313564300537, + "max": 0.68807452917099, + "mean": 0.5530612468719482, + "std": 0.041024595499038696, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.20784315466880798, + "max": 0.22137802839279175, + "mean": 3.199603088432923e-05, + "std": 0.038299061357975006, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.13870200514793396, + "max": 0.11339821666479111, + "mean": 2.9128044843673706e-05, + "std": 0.025894545018672943, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.4055723249912262, + "max": 0.37375950813293457, + "mean": 2.5988052584580146e-05, + "std": 0.038179732859134674, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.7928740978240967, + "max": 2.885420560836792, + "mean": 0.0012225983664393425, + "std": 0.5186418294906616, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.20435325801372528, + "max": 0.1985306441783905, + "mean": 2.9608720069518313e-05, + "std": 0.03429684415459633, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.051018889993429184, + "max": 0.040129613131284714, + "mean": -0.00042048803879879415, + "std": 0.013424505479633808, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.19798687100410461, + "max": 0.20356523990631104, + "mean": -1.2490939298004378e-05, + "std": 0.03180477395653725, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.1941322237253189, + "max": 0.19617649912834167, + "mean": -0.002969961380586028, + "std": 0.06259642541408539, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.3.g": { + "min": 0.3487941026687622, + "max": 1.0952281951904297, + "mean": 0.6676215529441833, + "std": 0.05664284899830818, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22712087631225586, + "max": 0.25315943360328674, + "mean": 0.00035851544816978276, + "std": 0.04075949266552925, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.09184330701828003, + "max": 0.04372864216566086, + "mean": -0.030109990388154984, + "std": 0.017667723819613457, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.35518717765808105, + "max": 0.30635109543800354, + "mean": -4.3967633246211335e-05, + "std": 0.037122078239917755, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.16265232861042023, + "max": 0.06366349011659622, + "mean": -8.268894453067333e-05, + "std": 0.019441038370132446, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.1.g": { + "min": 0.3488224744796753, + "max": 0.7298842668533325, + "mean": 0.5426357388496399, + "std": 0.039679452776908875, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.22033143043518066, + "max": 0.22433431446552277, + "mean": -1.1077730960096233e-05, + "std": 0.03923030197620392, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.11923559010028839, + "max": 0.1716114580631256, + "mean": 0.00028718815883621573, + "std": 0.025185901671648026, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.2481980323791504, + "max": 0.3025566339492798, + "mean": -3.676430060295388e-05, + "std": 0.0389297790825367, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.5254225730895996, + "max": 3.736085891723633, + "mean": 0.01585158333182335, + "std": 0.7859480977058411, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.21972878277301788, + "max": 0.23833929002285004, + "mean": -1.325977427768521e-05, + "std": 0.03630264848470688, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04748326912522316, + "max": 0.051650550216436386, + "mean": 0.0004778398433700204, + "std": 0.01352317538112402, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.21533912420272827, + "max": 0.21868844330310822, + "mean": 5.647652506013401e-05, + "std": 0.03361491113901138, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.21255744993686676, + "max": 0.23268213868141174, + "mean": -0.005099742207676172, + "std": 0.06193498894572258, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.3.g": { + "min": 0.36217188835144043, + "max": 1.112847089767456, + "mean": 0.69975745677948, + "std": 0.05501763895153999, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.23635224997997284, + "max": 0.24658624827861786, + "mean": 0.00046343228314071894, + "std": 0.041268426924943924, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.09862525016069412, + "max": 0.06863635033369064, + "mean": -0.03145936504006386, + "std": 0.018182674422860146, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.30422019958496094, + "max": 0.3540525734424591, + "mean": -8.221832831623033e-05, + "std": 0.04027421772480011, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.1533002257347107, + "max": 0.150687575340271, + "mean": 0.00025470374384894967, + "std": 0.023078717291355133, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.1.g": { + "min": 0.9982896447181702, + "max": 1.017301082611084, + "mean": 1.0001298189163208, + "std": 0.0026745295617729425, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.031271953135728836, + "max": 0.03127208724617958, + "mean": -1.929010068124626e-05, + "std": 0.01804104819893837, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.03122810088098049, + "max": 0.030984606593847275, + "mean": -0.0010841733310371637, + "std": 0.0179507527500391, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.03126660734415054, + "max": 0.03127255663275719, + "mean": 3.5378593565837946e-06, + "std": 0.018041487783193588, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.031172683462500572, + "max": 0.031167395412921906, + "mean": 0.0003339074901305139, + "std": 0.01806284487247467, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": -0.0006182725192047656, + "max": 0.0004164598067291081, + "mean": 1.3710750863538124e-06, + "std": 0.0001378587185172364, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.3.g": { + "min": 0.9979904890060425, + "max": 1.0161197185516357, + "mean": 1.0013301372528076, + "std": 0.004817315377295017, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.032745394855737686, + "max": 0.03283839672803879, + "mean": -6.682760158582823e-06, + "std": 0.018042659386992455, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.03276297450065613, + "max": 0.0325884111225605, + "mean": -0.00013115988986101002, + "std": 0.017956366762518883, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.11.4.ff.2.weight": { + "min": -0.0011839725775644183, + "max": 0.0011610303772613406, + "mean": 3.635812220181833e-07, + "std": 0.00021423342695925385, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.11.4.ff.2.bias": { + "min": -0.0005281989579088986, + "max": 0.0004011568380519748, + "mean": 2.2640601855528075e-06, + "std": 0.00012689748837146908, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.1.g": { + "min": 0.383169025182724, + "max": 0.725769579410553, + "mean": 0.5810222625732422, + "std": 0.039563409984111786, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.23967966437339783, + "max": 0.19745716452598572, + "mean": 2.6129977413802408e-05, + "std": 0.0374654158949852, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.1195952445268631, + "max": 0.16743028163909912, + "mean": 0.0009849121561273932, + "std": 0.02763625606894493, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.24753768742084503, + "max": 0.502853274345398, + "mean": -4.9970258260145783e-05, + "std": 0.0376228392124176, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.9648470878601074, + "max": 3.7909820079803467, + "mean": -0.0036168191581964493, + "std": 0.6834573745727539, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.22818903625011444, + "max": 0.25305306911468506, + "mean": -1.1425543561927043e-05, + "std": 0.037434399127960205, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07215739786624908, + "max": 0.08118511736392975, + "mean": -0.0005145666655153036, + "std": 0.015683691948652267, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.2285011112689972, + "max": 0.25927454233169556, + "mean": -2.8810776711907238e-05, + "std": 0.03542128577828407, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.20174317061901093, + "max": 0.21631476283073425, + "mean": -0.005539278965443373, + "std": 0.06842140108346939, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.3.g": { + "min": 0.4053976237773895, + "max": 1.1997506618499756, + "mean": 0.7383711338043213, + "std": 0.05650194734334946, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.2226068526506424, + "max": 0.24658025801181793, + "mean": 0.0005210487288422883, + "std": 0.04133579134941101, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.10394058376550674, + "max": 0.02423257753252983, + "mean": -0.032700441777706146, + "std": 0.018963389098644257, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.452515184879303, + "max": 0.4254130423069, + "mean": -0.0004341741732787341, + "std": 0.04689616709947586, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.25287455320358276, + "max": 0.4728158116340637, + "mean": 0.003204880515113473, + "std": 0.04463134706020355, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.0.weight": { + "min": -0.31750747561454773, + "max": 0.333750456571579, + "mean": -2.5235824068658985e-05, + "std": 0.021287381649017334, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.13.1.g": { + "min": 0.3244800865650177, + "max": 0.6913307905197144, + "mean": 0.5712176561355591, + "std": 0.045165594667196274, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.16547071933746338, + "max": 0.1755398064851761, + "mean": -4.8899608373176306e-05, + "std": 0.033180754631757736, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18801826238632202, + "max": 0.1438588947057724, + "mean": 4.4942658860236406e-05, + "std": 0.029767248779535294, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.38313359022140503, + "max": 0.24818716943264008, + "mean": -9.953633707482368e-06, + "std": 0.03276177868247032, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.6768205165863037, + "max": 3.3089771270751953, + "mean": -0.014381470158696175, + "std": 0.9868160486221313, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.23584222793579102, + "max": 0.24873286485671997, + "mean": -1.8046124750981107e-05, + "std": 0.0416971780359745, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.07315867394208908, + "max": 0.15554027259349823, + "mean": 0.0006676731863990426, + "std": 0.02520027756690979, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.2670559585094452, + "max": 0.24887487292289734, + "mean": -1.537521166028455e-05, + "std": 0.04013797268271446, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.1908693015575409, + "max": 0.1960526406764984, + "mean": -0.001238689525052905, + "std": 0.06672189384698868, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.3.g": { + "min": 0.3290148973464966, + "max": 1.0089884996414185, + "mean": 0.719682514667511, + "std": 0.053548477590084076, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.23323918879032135, + "max": 0.2469726949930191, + "mean": 0.00018311971507500857, + "std": 0.04089980572462082, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11503507941961288, + "max": 0.019024236127734184, + "mean": -0.04251422733068466, + "std": 0.018931886181235313, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.3927544355392456, + "max": 0.4104294776916504, + "mean": -2.164382931368891e-05, + "std": 0.04853343218564987, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.6971645355224609, + "max": 0.414955198764801, + "mean": 0.0008486253209412098, + "std": 0.060451194643974304, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.0.weight": { + "min": -0.001029345323331654, + "max": 1.0005033016204834, + "mean": 0.00048820505617186427, + "std": 0.022088995203375816, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.14.1.g": { + "min": 0.99776691198349, + "max": 1.0153907537460327, + "mean": 0.9997058510780334, + "std": 0.0012300637317821383, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.031274545937776566, + "max": 0.03127707168459892, + "mean": -2.1027797629358247e-05, + "std": 0.018032420426607132, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.031217729672789574, + "max": 0.031233638525009155, + "mean": -0.0006770637119188905, + "std": 0.017827108502388, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.03128187730908394, + "max": 0.031268589198589325, + "mean": -8.834878826746717e-06, + "std": 0.018031446263194084, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.031228115782141685, + "max": 0.03124588541686535, + "mean": -0.0007299837889149785, + "std": 0.017942119389772415, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": -0.0004204909782856703, + "max": 0.00033413738128729165, + "mean": -3.152099679937237e-06, + "std": 0.0001164414279628545, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.3.g": { + "min": 0.997612476348877, + "max": 1.018494963645935, + "mean": 1.0012025833129883, + "std": 0.0055990261025726795, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.032435424625873566, + "max": 0.032380323857069016, + "mean": -1.7302188553003361e-06, + "std": 0.018027864396572113, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.032131362706422806, + "max": 0.031162748113274574, + "mean": -0.00037396998959593475, + "std": 0.01804373785853386, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.14.4.ff.2.weight": { + "min": -0.0012890547513961792, + "max": 0.001122222631238401, + "mean": -8.950937626650557e-07, + "std": 0.00020965519070159644, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.14.4.ff.2.bias": { + "min": -0.00034396781120449305, + "max": 0.00029873003950342536, + "mean": -3.7820796023879666e-06, + "std": 0.000104848513728939, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.0.weight": { + "min": -0.2348298579454422, + "max": 0.27300530672073364, + "mean": 6.816113909735577e-06, + "std": 0.018809327855706215, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.15.1.g": { + "min": 0.3214486837387085, + "max": 0.7001691460609436, + "mean": 0.5819005370140076, + "std": 0.04646027460694313, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.18254612386226654, + "max": 0.19860517978668213, + "mean": -1.1607673513935879e-05, + "std": 0.03318353369832039, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.1615392416715622, + "max": 0.13018541038036346, + "mean": -0.001078265719115734, + "std": 0.03421453759074211, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.33349576592445374, + "max": 0.31233182549476624, + "mean": -1.0118232239619829e-05, + "std": 0.032234255224466324, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.8480448722839355, + "max": 8.8128080368042, + "mean": 0.09380069375038147, + "std": 1.6259617805480957, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23474065959453583, + "max": 0.24273009598255157, + "mean": 4.155310307396576e-05, + "std": 0.04085606709122658, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07642843574285507, + "max": 0.06617211550474167, + "mean": 0.0004827451193705201, + "std": 0.01944047026336193, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.24762944877147675, + "max": 0.2358739972114563, + "mean": -3.232937160646543e-06, + "std": 0.03943068906664848, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.16411840915679932, + "max": 0.1619885265827179, + "mean": 0.001625007251277566, + "std": 0.06529368460178375, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.3.g": { + "min": 0.5569814443588257, + "max": 0.9541290402412415, + "mean": 0.7133999466896057, + "std": 0.04144103080034256, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.22980599105358124, + "max": 0.2567155957221985, + "mean": -4.5827197027392685e-05, + "std": 0.04057452455163002, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.13575804233551025, + "max": 0.02213761769235134, + "mean": -0.04138356074690819, + "std": 0.01845938339829445, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.4245927333831787, + "max": 0.39355969429016113, + "mean": -4.580877430271357e-06, + "std": 0.04778376594185829, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.6110193133354187, + "max": 0.6553415656089783, + "mean": 0.001590792671777308, + "std": 0.056976497173309326, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.0.weight": { + "min": -0.251875638961792, + "max": 0.3209821879863739, + "mean": -6.120833859313279e-06, + "std": 0.019612718373537064, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.16.1.g": { + "min": 0.35964423418045044, + "max": 0.6887573599815369, + "mean": 0.5708860754966736, + "std": 0.04330369085073471, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.2213190197944641, + "max": 0.17759515345096588, + "mean": -3.466910129645839e-05, + "std": 0.03429858386516571, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.16418921947479248, + "max": 0.23438312113285065, + "mean": 0.0003640234936028719, + "std": 0.03290766850113869, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.2654394805431366, + "max": 0.24140575528144836, + "mean": -5.2719900850206614e-05, + "std": 0.03389739617705345, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.882589817047119, + "max": 5.12019157409668, + "mean": 0.04409287869930267, + "std": 1.233181118965149, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.2474043071269989, + "max": 0.2517080307006836, + "mean": 7.239622209453955e-05, + "std": 0.0439867228269577, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.0629691556096077, + "max": 0.054786957800388336, + "mean": 0.0006426851614378393, + "std": 0.017202140763401985, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.28832921385765076, + "max": 0.2730186879634857, + "mean": -5.011680332245305e-05, + "std": 0.04298482462763786, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.16195480525493622, + "max": 0.1713690608739853, + "mean": -0.002885536290705204, + "std": 0.05930813401937485, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.3.g": { + "min": 0.5195947885513306, + "max": 0.9433215260505676, + "mean": 0.713985800743103, + "std": 0.0396861806511879, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23872706294059753, + "max": 0.24947769939899445, + "mean": 0.000464944401755929, + "std": 0.04045351594686508, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.14595000445842743, + "max": 0.041102174669504166, + "mean": -0.03972803056240082, + "std": 0.020616797730326653, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.5366718769073486, + "max": 0.5868415236473083, + "mean": 5.812449671793729e-06, + "std": 0.04885939508676529, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5220040678977966, + "max": 0.4962327182292938, + "mean": 0.0023680159356445074, + "std": 0.05358637124300003, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.0.weight": { + "min": -0.2740743160247803, + "max": 0.31590986251831055, + "mean": 1.968129254237283e-06, + "std": 0.02004937082529068, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.17.1.g": { + "min": 0.36616218090057373, + "max": 0.718187689781189, + "mean": 0.5934113264083862, + "std": 0.04643949121236801, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.21206998825073242, + "max": 0.20034025609493256, + "mean": 3.0636681913165376e-05, + "std": 0.03486590087413788, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.18825751543045044, + "max": 0.20496514439582825, + "mean": 0.000955467636231333, + "std": 0.03160287067294121, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.2913488745689392, + "max": 0.34160566329956055, + "mean": -4.710702705779113e-05, + "std": 0.03458679839968681, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.8994882106781006, + "max": 3.406729221343994, + "mean": 0.014544591307640076, + "std": 0.8605263829231262, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.2257968783378601, + "max": 0.2514858543872833, + "mean": -3.6003511922899634e-06, + "std": 0.042229436337947845, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.055651042610406876, + "max": 0.04694758728146553, + "mean": -1.666278694756329e-05, + "std": 0.015861017629504204, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.2935076653957367, + "max": 0.2909187078475952, + "mean": -7.359203209489351e-06, + "std": 0.04194429889321327, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.12573029100894928, + "max": 0.2607214152812958, + "mean": -0.003240898484364152, + "std": 0.05319065600633621, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.3.g": { + "min": 0.45657190680503845, + "max": 0.8538610339164734, + "mean": 0.7059471011161804, + "std": 0.03630220517516136, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.5123063325881958, + "max": 0.3483346104621887, + "mean": 0.00034276110818609595, + "std": 0.04019864276051521, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.18701618909835815, + "max": 0.03957710787653923, + "mean": -0.03942158818244934, + "std": 0.021421542391180992, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.5481660962104797, + "max": 0.5603045225143433, + "mean": -7.152351463446394e-05, + "std": 0.050734106451272964, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5146781802177429, + "max": 0.6680049300193787, + "mean": 0.002443398116156459, + "std": 0.04963434487581253, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.0.weight": { + "min": -0.3329000473022461, + "max": 0.2665855884552002, + "mean": 3.3853375498438254e-06, + "std": 0.01938658207654953, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.18.1.g": { + "min": 0.32180243730545044, + "max": 0.7734456062316895, + "mean": 0.6512116193771362, + "std": 0.04565456882119179, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.2506096363067627, + "max": 0.2205670177936554, + "mean": -2.243723429273814e-06, + "std": 0.0365004725754261, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.32875651121139526, + "max": 0.28859665989875793, + "mean": -0.0006945514469407499, + "std": 0.03869060054421425, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.31226253509521484, + "max": 0.3726266324520111, + "mean": 6.49260327918455e-05, + "std": 0.03624095767736435, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.75054407119751, + "max": 5.848582744598389, + "mean": 0.0380375012755394, + "std": 1.4184556007385254, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.22316591441631317, + "max": 0.2069820612668991, + "mean": -7.529938011430204e-05, + "std": 0.042484965175390244, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.07815916836261749, + "max": 0.051765959709882736, + "mean": -0.0009295076015405357, + "std": 0.016425304114818573, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.3312581181526184, + "max": 0.3296850621700287, + "mean": -4.723461188405054e-06, + "std": 0.04279135540127754, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.2866402864456177, + "max": 0.11266554147005081, + "mean": -0.0012074881233274937, + "std": 0.04703830927610397, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.3.g": { + "min": 0.4860897958278656, + "max": 0.8950455784797668, + "mean": 0.7378093004226685, + "std": 0.039171766489744186, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.3630240857601166, + "max": 0.2759678065776825, + "mean": 5.1290608098497614e-05, + "std": 0.04064415767788887, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.2490600198507309, + "max": 0.04639717563986778, + "mean": -0.03930266201496124, + "std": 0.023369962349534035, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.6307172775268555, + "max": 0.6014147996902466, + "mean": -6.16723409621045e-05, + "std": 0.05311626195907593, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.7142688035964966, + "max": 0.267661988735199, + "mean": 0.0009166492964141071, + "std": 0.051358189433813095, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.0.weight": { + "min": -0.3435579240322113, + "max": 0.3038428723812103, + "mean": 1.3023259270994458e-07, + "std": 0.019134989008307457, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.19.1.g": { + "min": 0.3500676155090332, + "max": 0.7897790670394897, + "mean": 0.6390184760093689, + "std": 0.04962107539176941, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.2066265493631363, + "max": 0.20817363262176514, + "mean": -5.989617056911811e-05, + "std": 0.037695348262786865, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.2602774202823639, + "max": 0.2698180377483368, + "mean": -0.00039462913991883397, + "std": 0.04474588483572006, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.3561350107192993, + "max": 0.32447537779808044, + "mean": -6.916588063177187e-06, + "std": 0.03720375522971153, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.291650295257568, + "max": 4.228523254394531, + "mean": -0.02643691562116146, + "std": 1.0099413394927979, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.2399577796459198, + "max": 0.24472706019878387, + "mean": -2.5193990950356238e-05, + "std": 0.04320961609482765, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.06267981976270676, + "max": 0.05705071985721588, + "mean": 0.0003437635023146868, + "std": 0.014168186113238335, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.4376278221607208, + "max": 0.3739663064479828, + "mean": 1.456045083614299e-05, + "std": 0.04412108287215233, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.09702851623296738, + "max": 0.17698785662651062, + "mean": -0.0006597189931198955, + "std": 0.03517333045601845, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.3.g": { + "min": 0.4217059910297394, + "max": 1.0791560411453247, + "mean": 0.7486134767532349, + "std": 0.04263925552368164, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.26739904284477234, + "max": 0.298541396856308, + "mean": -7.951692532515153e-05, + "std": 0.040804121643304825, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.18641552329063416, + "max": 0.043663352727890015, + "mean": -0.036861587315797806, + "std": 0.0257096104323864, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.4583725333213806, + "max": 0.4902479946613312, + "mean": 4.34339017374441e-05, + "std": 0.05420944094657898, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.2883600890636444, + "max": 0.5551440119743347, + "mean": -0.0008822724921628833, + "std": 0.04795018211007118, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.0.weight": { + "min": -0.2930268347263336, + "max": 0.3230960965156555, + "mean": 6.1333103076322e-06, + "std": 0.01996854692697525, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.20.1.g": { + "min": 0.29084402322769165, + "max": 0.768223226070404, + "mean": 0.650917649269104, + "std": 0.05231805518269539, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.24454748630523682, + "max": 0.2624610364437103, + "mean": -5.949783371761441e-06, + "std": 0.039611514657735825, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.2689764201641083, + "max": 0.20118767023086548, + "mean": -0.000883190892636776, + "std": 0.05189211666584015, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.27367931604385376, + "max": 0.25521987676620483, + "mean": 4.683277438743971e-06, + "std": 0.038708530366420746, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_k.bias": { + "min": -13.039263725280762, + "max": 16.03864097595215, + "mean": 0.03343699499964714, + "std": 1.9974913597106934, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.2084328532218933, + "max": 0.2273532599210739, + "mean": -7.200734398793429e-05, + "std": 0.040553417056798935, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06970705837011337, + "max": 0.06357143819332123, + "mean": 0.00015784359129611403, + "std": 0.014761138707399368, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.46569308638572693, + "max": 0.3209618628025055, + "mean": 1.970405901374761e-05, + "std": 0.04058854654431343, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06452719122171402, + "max": 0.11591468751430511, + "mean": 0.0011942506534978747, + "std": 0.024729805067181587, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.3.g": { + "min": 0.37459689378738403, + "max": 0.9426000118255615, + "mean": 0.7511058449745178, + "std": 0.040696173906326294, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.2817957103252411, + "max": 0.27507483959198, + "mean": -0.00016845125355757773, + "std": 0.040994707494974136, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.19982005655765533, + "max": 0.05116043612360954, + "mean": -0.03206067159771919, + "std": 0.025184709578752518, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.6629015207290649, + "max": 0.5394555330276489, + "mean": -4.886999522568658e-05, + "std": 0.052846018224954605, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.1941312849521637, + "max": 0.5856620669364929, + "mean": -0.0005102052818983793, + "std": 0.04117872565984726, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.0.weight": { + "min": -0.41802144050598145, + "max": 0.37218335270881653, + "mean": 6.143730843177764e-06, + "std": 0.021620716899633408, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.21.1.g": { + "min": 0.214231476187706, + "max": 0.7551652193069458, + "mean": 0.6496015787124634, + "std": 0.05449988320469856, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.21102380752563477, + "max": 0.19707706570625305, + "mean": 4.027696923003532e-05, + "std": 0.03946160152554512, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.3312985599040985, + "max": 0.2609282433986664, + "mean": -0.0032433252781629562, + "std": 0.05640969052910805, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.20687410235404968, + "max": 0.25594964623451233, + "mean": 5.426290590548888e-05, + "std": 0.038564227521419525, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.281450271606445, + "max": 6.974554538726807, + "mean": 0.04850253462791443, + "std": 1.3900896310806274, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.2110043168067932, + "max": 0.23172873258590698, + "mean": -5.136051640874939e-06, + "std": 0.04131242260336876, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.04407680407166481, + "max": 0.03620957210659981, + "mean": 5.837064236402512e-07, + "std": 0.012804933823645115, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.3980613648891449, + "max": 0.34518715739250183, + "mean": -5.568802953348495e-05, + "std": 0.04238880053162575, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.0554049089550972, + "max": 0.06314343214035034, + "mean": 0.00036526317126117647, + "std": 0.01868700049817562, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.3.g": { + "min": 0.35041460394859314, + "max": 1.054603099822998, + "mean": 0.7895448207855225, + "std": 0.04915067180991173, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.33399659395217896, + "max": 0.3868362009525299, + "mean": -0.00016958778724074364, + "std": 0.04147977754473686, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.15840038657188416, + "max": 0.059087082743644714, + "mean": -0.03186880797147751, + "std": 0.02521045319736004, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6981510519981384, + "max": 0.47227516770362854, + "mean": -8.876612992025912e-05, + "std": 0.05179238319396973, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.2498706579208374, + "max": 0.33086034655570984, + "mean": -0.0002500821719877422, + "std": 0.04153008759021759, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.0.weight": { + "min": -0.2874675989151001, + "max": 0.3506753444671631, + "mean": -2.142998255294515e-06, + "std": 0.024235961958765984, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.22.1.g": { + "min": 0.19644968211650848, + "max": 0.7875264883041382, + "mean": 0.6702861189842224, + "std": 0.058757346123456955, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.2307407557964325, + "max": 0.23255716264247894, + "mean": -1.9847611838486046e-05, + "std": 0.04043736308813095, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.22115467488765717, + "max": 0.24231739342212677, + "mean": 0.0007812330732122064, + "std": 0.05595459043979645, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.21687255799770355, + "max": 0.22770829498767853, + "mean": -7.165952411014587e-05, + "std": 0.03937350586056709, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.959362030029297, + "max": 9.123239517211914, + "mean": -0.0011855876073241234, + "std": 1.8560608625411987, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.2711891233921051, + "max": 0.2605840563774109, + "mean": 4.364762571640313e-05, + "std": 0.038405757397413254, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.05802099406719208, + "max": 0.05812212452292442, + "mean": 0.0003513882402330637, + "std": 0.014736738055944443, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.26627787947654724, + "max": 0.28912854194641113, + "mean": -6.142335041658953e-05, + "std": 0.03907188028097153, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.04412651062011719, + "max": 0.03752894699573517, + "mean": -9.05310153029859e-05, + "std": 0.013374187983572483, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.3.g": { + "min": 0.339313268661499, + "max": 1.1022799015045166, + "mean": 0.8638956546783447, + "std": 0.06418420374393463, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.42381733655929565, + "max": 0.41949865221977234, + "mean": 0.0003125929506495595, + "std": 0.04350028932094574, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.2159820944070816, + "max": 0.1717892736196518, + "mean": -0.02952037751674652, + "std": 0.0320223867893219, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.6032647490501404, + "max": 0.5633653998374939, + "mean": -0.00015064005856402218, + "std": 0.053445085883140564, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17956292629241943, + "max": 0.37900540232658386, + "mean": 0.0013650960754603148, + "std": 0.03737950697541237, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.0.weight": { + "min": -0.3949747383594513, + "max": 0.36959531903266907, + "mean": 3.693038524943404e-05, + "std": 0.028617311269044876, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.23.1.g": { + "min": 0.2902548313140869, + "max": 0.835411548614502, + "mean": 0.7055742740631104, + "std": 0.06795050203800201, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.9264549016952515, + "max": 1.0266518592834473, + "mean": -2.6062916731461883e-05, + "std": 0.047624703496694565, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.8848392963409424, + "max": 0.8210154175758362, + "mean": -0.00031388079514726996, + "std": 0.09599340707063675, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.2704119086265564, + "max": 0.24200940132141113, + "mean": -2.2776041078031994e-05, + "std": 0.03895159065723419, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.909391403198242, + "max": 23.011491775512695, + "mean": -0.09215216338634491, + "std": 4.095620155334473, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.2288811355829239, + "max": 0.24590590596199036, + "mean": -2.564151509432122e-05, + "std": 0.03863710165023804, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.060657572001218796, + "max": 0.04613931104540825, + "mean": -0.00014338521577883512, + "std": 0.014703062362968922, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.33906009793281555, + "max": 0.37649407982826233, + "mean": 7.5478201324585825e-06, + "std": 0.04081288352608681, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.04671977460384369, + "max": 0.19674423336982727, + "mean": 0.0002734751324169338, + "std": 0.013588963076472282, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.3.g": { + "min": 0.3744518756866455, + "max": 1.1423423290252686, + "mean": 0.890155553817749, + "std": 0.0642639547586441, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.44847023487091064, + "max": 0.5443573594093323, + "mean": 2.4567927539465018e-05, + "std": 0.04556553065776825, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.2254226952791214, + "max": 0.08823559433221817, + "mean": -0.0320654921233654, + "std": 0.03788232430815697, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7300624251365662, + "max": 0.6936558485031128, + "mean": 3.439782449277118e-05, + "std": 0.05177776888012886, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.1755923330783844, + "max": 0.21977680921554565, + "mean": 4.2144907638430595e-05, + "std": 0.03183648735284805, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.0.weight": { + "min": -0.3417545258998871, + "max": 0.3754495084285736, + "mean": 4.2937641410389915e-05, + "std": 0.03413964807987213, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.24.1.g": { + "min": 0.3177294135093689, + "max": 1.2977259159088135, + "mean": 0.6017159223556519, + "std": 0.08427947759628296, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.2838163673877716, + "max": 0.2612304091453552, + "mean": -2.8361523618514184e-06, + "std": 0.03598065674304962, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23691536486148834, + "max": 0.20665380358695984, + "mean": 0.0002377421478740871, + "std": 0.05610164627432823, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.4367288649082184, + "max": 0.326652467250824, + "mean": 2.422912439214997e-05, + "std": 0.034131284803152084, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.582788944244385, + "max": 7.362354278564453, + "mean": -0.007508529350161552, + "std": 0.7035665512084961, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.34583720564842224, + "max": 0.3661332130432129, + "mean": 0.00010320795263396576, + "std": 0.04782785847783089, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07427486777305603, + "max": 0.060801248997449875, + "mean": 0.0009337762021459639, + "std": 0.014963135123252869, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.25689101219177246, + "max": 0.28821247816085815, + "mean": 4.153083864366636e-06, + "std": 0.04155467450618744, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.05564720183610916, + "max": 0.0631924495100975, + "mean": 0.0001379186287522316, + "std": 0.007182796951383352, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.3.g": { + "min": 0.49357107281684875, + "max": 1.2338876724243164, + "mean": 1.0134950876235962, + "std": 0.11754289269447327, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.0940601825714111, + "max": 1.0474328994750977, + "mean": -4.88213227072265e-05, + "std": 0.05240841209888458, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.2248232364654541, + "max": 0.17388059198856354, + "mean": -0.02729785442352295, + "std": 0.036497559398412704, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8899852633476257, + "max": 0.9281743168830872, + "mean": -0.00014587071200367063, + "std": 0.05328153818845749, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.17224453389644623, + "max": 0.38245582580566406, + "mean": 0.0033820997923612595, + "std": 0.04001828283071518, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.0.weight": { + "min": -0.7799473404884338, + "max": 0.7260819673538208, + "mean": 1.8725522750173695e-05, + "std": 0.046160738915205, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ] + }, + "transformer.layers.25.1.g": { + "min": 0.33860552310943604, + "max": 1.442690134048462, + "mean": 0.9484557509422302, + "std": 0.20696218311786652, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.7459073066711426, + "max": 1.704575538635254, + "mean": 0.00022730980708729476, + "std": 0.15868498384952545, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.2076622247695923, + "max": 1.1073572635650635, + "mean": -0.00959145836532116, + "std": 0.20509476959705353, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.4218980371952057, + "max": 0.4278029203414917, + "mean": 6.46372718620114e-05, + "std": 0.048015668988227844, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.878219604492188, + "max": 19.671934127807617, + "mean": -0.24954606592655182, + "std": 4.8062262535095215, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.3252118229866028, + "max": 0.44012102484703064, + "mean": -1.1724467185558751e-05, + "std": 0.04616120085120201, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.03427257761359215, + "max": 0.03733307123184204, + "mean": 0.0006422841688618064, + "std": 0.012923721224069595, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.7051200270652771, + "max": 0.6666434407234192, + "mean": 4.353695476311259e-05, + "std": 0.0578814335167408, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ] + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.07273512333631516, + "max": 0.06799687445163727, + "mean": -0.0001354652486043051, + "std": 0.012961134314537048, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.3.g": { + "min": 0.3802323043346405, + "max": 1.392055869102478, + "mean": 1.0665756464004517, + "std": 0.2197023183107376, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6175218224525452, + "max": 0.7191157341003418, + "mean": 0.00011173778329975903, + "std": 0.058020252734422684, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ] + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.22093473374843597, + "max": 0.22644445300102234, + "mean": 0.006260717287659645, + "std": 0.04986373335123062, + "sparsity": 0.0, + "shape": [ + 4096 + ] + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6302544474601746, + "max": 0.8900287747383118, + "mean": 1.1643458492471837e-05, + "std": 0.023527663201093674, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ] + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.5102453231811523, + "max": 0.4771297872066498, + "mean": -0.0030403323471546173, + "std": 0.06969437003135681, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.norm_out.g": { + "min": 0.5377517342567444, + "max": 1.1850762367248535, + "mean": 0.7829766273498535, + "std": 0.09934176504611969, + "sparsity": 0.0, + "shape": [ + 1024 + ] + }, + "transformer.proj_out.weight": { + "min": -0.26876378059387207, + "max": 0.21405881643295288, + "mean": -0.00022433605045080185, + "std": 0.053995925933122635, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ] + }, + "transformer.proj_out.bias": { + "min": -0.23968708515167236, + "max": 0.014838683418929577, + "mean": -0.0440097339451313, + "std": 0.03449948504567146, + "sparsity": 0.0, + "shape": [ + 100 + ] + } + } +} \ No newline at end of file