Add license, pipeline tag, and library_name

#1
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +313 -1
README.md CHANGED
@@ -1,7 +1,11 @@
1
  ---
2
  base_model:
3
  - stabilityai/stable-diffusion-2
 
 
 
4
  ---
 
5
  # gen2seg: Generative Models Enable Generalizable Instance Segmentation
6
  <img src='teaser.png'/>
7
 
@@ -13,4 +17,312 @@ Please see our website https://reachomk.github.io/gen2seg for demos and addition
13
 
14
  If you are looking for our MAE-H variant, you can find that at https://huggingface.co/reachomk/gen2seg-mae-h
15
 
16
- You can run this model at our GitHub: https://github.com/UCDVision/gen2seg or our Huggingface Space: https://huggingface.co/spaces/reachomk/gen2seg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  base_model:
3
  - stabilityai/stable-diffusion-2
4
+ pipeline_tag: image-segmentation
5
+ library_name: diffusers
6
+ license: mit
7
  ---
8
+
9
  # gen2seg: Generative Models Enable Generalizable Instance Segmentation
10
  <img src='teaser.png'/>
11
 
 
17
 
18
  If you are looking for our MAE-H variant, you can find that at https://huggingface.co/reachomk/gen2seg-mae-h
19
 
20
+ You can run this model at our GitHub: https://github.com/UCDVision/gen2seg or our Huggingface Space: https://huggingface.co/spaces/reachomk/gen2seg
21
+
22
+ # File information
23
+
24
+ The repository contains the following file information:
25
+
26
+ Filename: model_index.json
27
+ Content: {
28
+ "_class_name": "StableDiffusionPipeline",
29
+ "_diffusers_version": "0.31.0",
30
+ "_name_or_path": "stabilityai/stable-diffusion-2",
31
+ "feature_extractor": [
32
+ "transformers",
33
+ "CLIPImageProcessor"
34
+ ],
35
+ "image_encoder": [
36
+ null,
37
+ null
38
+ ],
39
+ "requires_safety_checker": false,
40
+ "safety_checker": [
41
+ null,
42
+ null
43
+ ],
44
+ "scheduler": [
45
+ "diffusers",
46
+ "DDPMScheduler"
47
+ ],
48
+ "text_encoder": [
49
+ "transformers",
50
+ "CLIPTextModel"
51
+ ],
52
+ "tokenizer": [
53
+ "transformers",
54
+ "CLIPTokenizer"
55
+ ],
56
+ "unet": [
57
+ "diffusers",
58
+ "UNet2DConditionModel"
59
+ ],
60
+ "vae": [
61
+ "AutoencoderKL"
62
+ ]
63
+ }
64
+
65
+ Filename: preprocessor_config.json
66
+ Content: {
67
+ "crop_size": {
68
+ "height": 224,
69
+ "width": 224
70
+ },
71
+ "do_center_crop": true,
72
+ "do_convert_rgb": true,
73
+ "do_normalize": true,
74
+ "do_rescale": true,
75
+ "do_resize": true,
76
+ "image_mean": [
77
+ 0.48145466,
78
+ 0.4578275,
79
+ 0.40821073
80
+ ],
81
+ "image_processor_type": "CLIPImageProcessor",
82
+ "image_std": [
83
+ 0.26862954,
84
+ 0.26130258,
85
+ 0.27577711
86
+ ],
87
+ "resample": 3,
88
+ "rescale_factor": 0.00392156862745098,
89
+ "size": {
90
+ "shortest_edge": 224
91
+ }
92
+ }
93
+
94
+ Filename: config.json
95
+ Content: {
96
+ "_class_name": "AutoencoderKL",
97
+ "_diffusers_version": "0.31.0",
98
+ "_name_or_path": "stabilityai/stable-diffusion-2",
99
+ "act_fn": "silu",
100
+ "block_out_channels": [
101
+ 128,
102
+ 256,
103
+ 512,
104
+ 512
105
+ ],
106
+ "down_block_types": [
107
+ "DownEncoderBlock2D",
108
+ "DownEncoderBlock2D",
109
+ "DownEncoderBlock2D",
110
+ "DownEncoderBlock2D"
111
+ ],
112
+ "force_upcast": true,
113
+ "in_channels": 3,
114
+ "latent_channels": 4,
115
+ "latents_mean": null,
116
+ "latents_std": null,
117
+ "layers_per_block": 2,
118
+ "mid_block_add_attention": true,
119
+ "norm_num_groups": 32,
120
+ "out_channels": 3,
121
+ "sample_size": 768,
122
+ "scaling_factor": 0.18215,
123
+ "shift_factor": null,
124
+ "up_block_types": [
125
+ "UpDecoderBlock2D",
126
+ "UpDecoderBlock2D",
127
+ "UpDecoderBlock2D",
128
+ "UpDecoderBlock2D"
129
+ ],
130
+ "use_post_quant_conv": true,
131
+ "use_quant_conv": true
132
+ }
133
+
134
+ Filename: vocab.json
135
+ Content: Content of the file is larger than 50 KB, too long to display.
136
+
137
+ Filename: special_tokens_map.json
138
+ Content: {
139
+ "bos_token": {
140
+ "content": "<|startoftext|>",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false
145
+ },
146
+ "eos_token": {
147
+ "content": "<|endoftext|>",
148
+ "lstrip": false,
149
+ "normalized": true,
150
+ "rstrip": false,
151
+ "single_word": false
152
+ },
153
+ "pad_token": "!",
154
+ "unk_token": {
155
+ "content": "<|endoftext|>",
156
+ "lstrip": false,
157
+ "normalized": true,
158
+ "rstrip": false,
159
+ "single_word": false
160
+ }
161
+ }
162
+
163
+ Filename: tokenizer_config.json
164
+ Content: {
165
+ "add_prefix_space": false,
166
+ "added_tokens_decoder": {
167
+ "0": {
168
+ "content": "!",
169
+ "lstrip": false,
170
+ "normalized": false,
171
+ "rstrip": false,
172
+ "single_word": false,
173
+ "special": true
174
+ },
175
+ "49406": {
176
+ "content": "<|startoftext|>",
177
+ "lstrip": false,
178
+ "normalized": true,
179
+ "rstrip": false,
180
+ "single_word": false,
181
+ "special": true
182
+ },
183
+ "49407": {
184
+ "content": "<|endoftext|>",
185
+ "lstrip": false,
186
+ "normalized": true,
187
+ "rstrip": false,
188
+ "single_word": false,
189
+ "special": true
190
+ }
191
+ },
192
+ "bos_token": "<|startoftext|>",
193
+ "clean_up_tokenization_spaces": false,
194
+ "do_lower_case": true,
195
+ "eos_token": "<|endoftext|>\",
196
+ \"errors\": \"replace\",
197
+ \"extra_special_tokens\": {},
198
+ \"model_max_length\": 77,
199
+ \"pad_token\": \"!\",
200
+ \"tokenizer_class\": \"CLIPTokenizer\",
201
+ \"unk_token\": \"<|endoftext|>\"
202
+ }
203
+
204
+ Filename: config.json
205
+ Content: {
206
+ "_class_name": "UNet2DConditionModel",
207
+ "_diffusers_version": "0.31.0",
208
+ "_name_or_path": "stabilityai/stable-diffusion-2",
209
+ "act_fn": "silu",
210
+ "addition_embed_type": null,
211
+ "addition_embed_type_num_heads": 64,
212
+ "addition_time_embed_dim": null,
213
+ "attention_head_dim": [
214
+ 5,
215
+ 10,
216
+ 20,
217
+ 20
218
+ ],
219
+ "attention_type": "default",
220
+ "block_out_channels": [
221
+ 320,
222
+ 640,
223
+ 1280,
224
+ 1280
225
+ ],
226
+ "center_input_sample": false,
227
+ "class_embed_type": null,
228
+ "class_embeddings_concat": false,
229
+ "conv_in_kernel": 3,
230
+ "conv_out_kernel": 3,
231
+ "cross_attention_dim": 1024,
232
+ "cross_attention_norm": null,
233
+ "down_block_types": [
234
+ "CrossAttnDownBlock2D",
235
+ "CrossAttnDownBlock2D",
236
+ "CrossAttnDownBlock2D",
237
+ "DownBlock2D"
238
+ ],
239
+ "downsample_padding": 1,
240
+ "dropout": 0.0,
241
+ "dual_cross_attention": false,
242
+ "encoder_hid_dim": null,
243
+ "encoder_hid_dim_type": null,
244
+ "flip_sin_to_cos": true,
245
+ "freq_shift": 0,
246
+ "in_channels": 4,
247
+ "layers_per_block": 2,
248
+ "mid_block_only_cross_attention": null,
249
+ "mid_block_scale_factor": 1,
250
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
251
+ "norm_eps": 1e-05,
252
+ "norm_num_groups": 32,
253
+ "num_attention_heads": null,
254
+ "num_class_embeds": null,
255
+ "only_cross_attention": false,
256
+ "out_channels": 4,
257
+ "projection_class_embeddings_input_dim": null,
258
+ "resnet_out_scale_factor": 1.0,
259
+ "resnet_skip_time_act": false,
260
+ "resnet_time_scale_shift": "default",
261
+ "reverse_transformer_layers_per_block": null,
262
+ "sample_size": 96,
263
+ "time_cond_proj_dim": null,
264
+ "time_embedding_act_fn": null,
265
+ "time_embedding_dim": null,
266
+ "time_embedding_type": "positional",
267
+ "timestep_post_act": null,
268
+ "transformer_layers_per_block": 1,
269
+ "up_block_types": [
270
+ "UpBlock2D",
271
+ "CrossAttnUpBlock2D",
272
+ "CrossAttnUpBlock2D",
273
+ "CrossAttnUpBlock2D"
274
+ ],
275
+ "upcast_attention": false,
276
+ "use_linear_projection": true
277
+ }
278
+
279
+ Filename: scheduler_config.json
280
+ Content: {
281
+ "_class_name": "DDPMScheduler",
282
+ "_diffusers_version": "0.31.0",
283
+ "_name_or_path": "stabilityai/stable-diffusion-2",
284
+ "beta_end": 0.012,
285
+ "beta_schedule": "scaled_linear",
286
+ "beta_start": 0.00085,
287
+ "clip_sample": false,
288
+ "clip_sample_range": 1.0,
289
+ "dynamic_thresholding_ratio": 0.995,
290
+ "num_train_timesteps": 1000,
291
+ "prediction_type": "v_prediction",
292
+ "rescale_betas_zero_snr": false,
293
+ "sample_max_value": 1.0,
294
+ "set_alpha_to_one": false,
295
+ "skip_prk_steps": true,
296
+ "steps_offset": 1,
297
+ "thresholding": false,
298
+ "timestep_spacing": "trailing",
299
+ "trained_betas": null,
300
+ "variance_type": "fixed_small"
301
+ }
302
+
303
+ Filename: config.json
304
+ Content: {
305
+ "_name_or_path": "stabilityai/stable-diffusion-2",
306
+ "architectures": [
307
+ "CLIPTextModel"
308
+ ],
309
+ "attention_dropout": 0.0,
310
+ "bos_token_id": 0,
311
+ "dropout": 0.0,
312
+ "eos_token_id": 2,
313
+ "hidden_act": "gelu",
314
+ "hidden_size": 1024,
315
+ "initializer_factor": 1.0,
316
+ "initializer_range": 0.02,
317
+ "intermediate_size": 4096,
318
+ "layer_norm_eps": 1e-05,
319
+ "max_position_embeddings": 77,
320
+ "model_type": "clip_text_model",
321
+ "num_attention_heads": 16,
322
+ "num_hidden_layers": 23,
323
+ "pad_token_id": 1,
324
+ "projection_dim": 512,
325
+ "torch_dtype": "float32",
326
+ "transformers_version": "4.47.0",
327
+ "vocab_size": 49408
328
+ }