visobert-normalizer-mix100 / state_dict_report.json

Upload ViSoNorm trained model

86731c2 4 months ago

2.16 kB

	{
	"base_model": "visobert",
	"total_params": 213,
	"expected_heads_present": {
	"cls_decoder.weight": false,
	"cls_decoder.bias": false,
	"cls_dense.weight": false,
	"cls_dense.bias": false,
	"cls_layer_norm.weight": false,
	"cls_layer_norm.bias": false,
	"mask_n_predictor.mask_predictor_dense.weight": true,
	"mask_n_predictor.mask_predictor_dense.bias": true,
	"mask_n_predictor.mask_predictor_proj.weight": true,
	"mask_n_predictor.mask_predictor_proj.bias": true,
	"nsw_detector.dense.weight": true,
	"nsw_detector.dense.bias": true,
	"nsw_detector.predictor.weight": true,
	"nsw_detector.predictor.bias": true
	},
	"alt_common_heads_present": {
	"lm_head.weight": false,
	"lm_head.bias": false,
	"cls.decoder.weight": true,
	"cls.decoder.bias": true,
	"cls.dense.weight": true,
	"cls.dense.bias": true,
	"cls.layer_norm.weight": true,
	"cls.layer_norm.bias": true
	},
	"aux_heads_present": {
	"nsw_detector.": true,
	"mask_n_predictor.": true
	},
	"example_keys": [
	"roberta.embeddings.word_embeddings.weight",
	"roberta.embeddings.position_embeddings.weight",
	"roberta.embeddings.token_type_embeddings.weight",
	"roberta.embeddings.LayerNorm.weight",
	"roberta.embeddings.LayerNorm.bias",
	"roberta.encoder.layer.0.attention.self.query.weight",
	"roberta.encoder.layer.0.attention.self.query.bias",
	"roberta.encoder.layer.0.attention.self.key.weight",
	"roberta.encoder.layer.0.attention.self.key.bias",
	"roberta.encoder.layer.0.attention.self.value.weight",
	"roberta.encoder.layer.0.attention.self.value.bias",
	"roberta.encoder.layer.0.attention.output.dense.weight",
	"roberta.encoder.layer.0.attention.output.dense.bias",
	"roberta.encoder.layer.0.attention.output.LayerNorm.weight",
	"roberta.encoder.layer.0.attention.output.LayerNorm.bias",
	"roberta.encoder.layer.0.intermediate.dense.weight",
	"roberta.encoder.layer.0.intermediate.dense.bias",
	"roberta.encoder.layer.0.output.dense.weight",
	"roberta.encoder.layer.0.output.dense.bias",
	"roberta.encoder.layer.0.output.LayerNorm.weight"
	]
	}