head-mvimage-diffuser / README.md

nielsr HF Staff

Add library name, pipeline tag, link to Github

f4c4a1b verified 9 months ago

preview code

raw

history blame

5.15 kB

metadata

license: mit
library_name: diffusers
pipeline_tag: image-to-3d

File information

The repository contains the following file information:

Filename: model_index.json Content: { "_class_name": "MVDiffusionImagePipeline", "_diffusers_version": "0.30.3", "feature_extractor": [ "transformers", "CLIPImageProcessor" ], "image_encoder": [ "transformers", "CLIPVisionModelWithProjection" ], "requires_safety_checker": true, "safety_checker": [ null, null ], "scheduler": [ "diffusers", "DDIMScheduler" ], "unet": [ "mv_unet", "UnifieldWrappedUNet" ], "vae": [ "diffusers", "AutoencoderKL" ] }

Filename: config.json Content: { "_class_name": "AutoencoderKL", "_diffusers_version": "0.30.3", "_name_or_path": "Luffuly/unique3d-mvimage-diffuser", "act_fn": "silu", "block_out_channels": [ 128, 256, 512, 512 ], "down_block_types": [ "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D" ], "force_upcast": true, "in_channels": 3, "latent_channels": 4, "latents_mean": null, "latents_std": null, "layers_per_block": 2, "mid_block_add_attention": true, "norm_num_groups": 32, "out_channels": 3, "sample_size": 256, "scaling_factor": 0.18215, "shift_factor": null, "up_block_types": [ "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D" ], "use_post_quant_conv": true, "use_quant_conv": true }

Filename: scheduler_config.json Content: { "_class_name": "DDIMScheduler", "_diffusers_version": "0.30.3", "beta_end": 0.012, "beta_schedule": "scaled_linear", "beta_start": 0.00085, "clip_sample": false, "clip_sample_range": 1.0, "dynamic_thresholding_ratio": 0.995, "num_train_timesteps": 1000, "prediction_type": "epsilon", "rescale_betas_zero_snr": false, "sample_max_value": 1.0, "set_alpha_to_one": false, "skip_prk_steps": true, "steps_offset": 1, "thresholding": false, "timestep_spacing": "leading", "trained_betas": null }

Filename: config.json Content: { "_class_name": "UnifieldWrappedUNet", "_diffusers_version": "0.30.3", "_name_or_path": "outputs/vroid-mvimage-6view/checkpoint", "act_fn": "silu", "addition_embed_type": null, "addition_embed_type_num_heads": 64, "addition_time_embed_dim": null, "attention_head_dim": 8, "attention_type": "default", "block_out_channels": [ 320, 640, 1280, 1280 ], "center_input_sample": false, "class_embed_type": null, "class_embeddings_concat": false, "conv_in_kernel": 3, "conv_out_kernel": 3, "cross_attention_dim": 768, "cross_attention_norm": null, "down_block_types": [ "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D" ], "downsample_padding": 1, "dropout": 0.0, "dual_cross_attention": false, "encoder_hid_dim": null, "encoder_hid_dim_type": null, "flip_sin_to_cos": true, "freq_shift": 0, "in_channels": 8, "layers_per_block": 2, "mid_block_only_cross_attention": null, "mid_block_scale_factor": 1, "mid_block_type": "UNetMidBlock2DCrossAttn", "norm_eps": 1e-05, "norm_num_groups": 32, "num_attention_heads": null, "num_class_embeds": 8, "only_cross_attention": false, "out_channels": 4, "projection_class_embeddings_input_dim": null, "resnet_out_scale_factor": 1.0, "resnet_skip_time_act": false, "resnet_time_scale_shift": "default", "reverse_transformer_layers_per_block": null, "sample_size": 64, "time_cond_proj_dim": null, "time_embedding_act_fn": null, "time_embedding_dim": null, "time_embedding_type": "positional", "timestep_post_act": null, "transformer_layers_per_block": 1, "up_block_types": [ "UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D" ], "n_views": 6, "upcast_attention": false, "use_linear_projection": false }

Filename: preprocessor_config.json Content: { "crop_size": { "height": 224, "width": 224 }, "do_center_crop": true, "do_convert_rgb": true, "do_normalize": true, "do_rescale": true, "do_resize": true, "image_mean": [ 0.48145466, 0.4578275, 0.40821073 ], "image_processor_type": "CLIPImageProcessor", "image_std": [ 0.26862954, 0.26130258, 0.27577711 ], "resample": 3, "rescale_factor": 0.00392156862745098, "size": { "shortest_edge": 224 } }

Filename: config.json Content: { "_name_or_path": "Luffuly/unique3d-mvimage-diffuser", "architectures": [ "CLIPVisionModelWithProjection" ], "attention_dropout": 0.0, "dropout": 0.0, "hidden_act": "quick_gelu", "hidden_size": 1024, "image_size": 224, "initializer_factor": 1.0, "initializer_range": 0.02, "intermediate_size": 4096, "layer_norm_eps": 1e-05, "model_type": "clip_vision_model", "num_attention_heads": 16, "num_channels": 3, "num_hidden_layers": 24, "patch_size": 14, "projection_dim": 768, "torch_dtype": "float16", "transformers_version": "4.45.2" }

Code: https://github.com/TingtingLiao/soap