fashn-human-parser / config.json
Dan Bochman
Initial release: FASHN Human Parser - SegFormer-B4 for human parsing
a92eba4
{
"_name_or_path": "nvidia/mit-b4",
"architectures": [
"SegformerForSemanticSegmentation"
],
"attention_probs_dropout_prob": 0.0,
"classifier_dropout_prob": 0.1,
"decoder_hidden_size": 768,
"depths": [
3,
8,
27,
3
],
"downsampling_rates": [
1,
4,
8,
16
],
"drop_path_rate": 0.1,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_sizes": [
64,
128,
320,
512
],
"id2label": {
"0": "background",
"1": "face",
"2": "hair",
"3": "top",
"4": "dress",
"5": "skirt",
"6": "pants",
"7": "belt",
"8": "bag",
"9": "hat",
"10": "scarf",
"11": "glasses",
"12": "arms",
"13": "hands",
"14": "legs",
"15": "feet",
"16": "torso",
"17": "jewelry"
},
"image_size": 224,
"initializer_range": 0.02,
"label2id": {
"arms": 12,
"background": 0,
"bag": 8,
"belt": 7,
"dress": 4,
"face": 1,
"feet": 15,
"glasses": 11,
"hair": 2,
"hands": 13,
"hat": 9,
"jewelry": 17,
"legs": 14,
"pants": 6,
"scarf": 10,
"skirt": 5,
"top": 3,
"torso": 16
},
"layer_norm_eps": 1e-06,
"mlp_ratios": [
4,
4,
4,
4
],
"model_type": "segformer",
"num_attention_heads": [
1,
2,
5,
8
],
"num_channels": 3,
"num_encoder_blocks": 4,
"patch_sizes": [
7,
3,
3,
3
],
"reshape_last_stage": true,
"semantic_loss_ignore_index": 255,
"sr_ratios": [
8,
4,
2,
1
],
"strides": [
4,
2,
2,
2
],
"torch_dtype": "float32",
"transformers_version": "4.42.4"
}