| | --- |
| | license: cc-by-nc-4.0 |
| | pipeline_tag: text-to-image |
| | library_name: diffusers |
| | --- |
| | |
| | # File information |
| |
|
| | The repository contains the following file information: |
| |
|
| | Filename: model_index.json |
| | Content: { |
| | "_class_name": "StableDiffusionPipeline", |
| | "_diffusers_version": "0.6.0", |
| | "feature_extractor": [ |
| | "transformers", |
| | "CLIPImageProcessor" |
| | ], |
| | "safety_checker": [ |
| | "stable_diffusion", |
| | "StableDiffusionSafetyChecker" |
| | ], |
| | "scheduler": [ |
| | "diffusers", |
| | "PNDMScheduler" |
| | ], |
| | "text_encoder": [ |
| | "transformers", |
| | "CLIPTextModel" |
| | ], |
| | "tokenizer": [ |
| | "transformers", |
| | "CLIPTokenizer" |
| | ], |
| | "unet": [ |
| | "diffusers", |
| | "UNet2DConditionModel" |
| | ], |
| | "vae": [ |
| | "diffusers", |
| | "AutoencoderKL" |
| | ] |
| | } |
| | |
| | Filename: preprocessor_config.json |
| | Content: { |
| | "crop_size": 224, |
| | "do_center_crop": true, |
| | "do_convert_rgb": true, |
| | "do_normalize": true, |
| | "do_resize": true, |
| | "feature_extractor_type": "CLIPFeatureExtractor", |
| | "image_mean": [ |
| | 0.48145466, |
| | 0.4578275, |
| | "0.40821073" |
| | ], |
| | "image_std": [ |
| | "0.26862954", |
| | "0.26130258", |
| | "0.27577711" |
| | ], |
| | "resample": 3, |
| | "size": 224 |
| | } |
| | |
| | Filename: config.json |
| | Content: { |
| | "_commit_hash": "4bb648a606ef040e7685bde262611766a5fdd67b", |
| | "_name_or_path": "CompVis/stable-diffusion-safety-checker", |
| | "architectures": [ |
| | "StableDiffusionSafetyChecker" |
| | ], |
| | "initializer_factor": 1.0, |
| | "logit_scale_init_value": 2.6592, |
| | "model_type": "clip", |
| | "projection_dim": 768, |
| | "text_config": { |
| | "_name_or_path": "", |
| | "add_cross_attention": false, |
| | "architectures": null, |
| | "attention_dropout": 0.0, |
| | "bad_words_ids": null, |
| | "bos_token_id": 0, |
| | "chunk_size_feed_forward": 0, |
| | "cross_attention_hidden_size": null, |
| | "decoder_start_token_id": null, |
| | "diversity_penalty": 0.0, |
| | "do_sample": false, |
| | "dropout": 0.0, |
| | "early_stopping": false, |
| | "encoder_no_repeat_ngram_size": 0, |
| | "eos_token_id": 2, |
| | "exponential_decay_length_penalty": null, |
| | "finetuning_task": null, |
| | "forced_bos_token_id": null, |
| | "forced_eos_token_id": null, |
| | "hidden_act": "quick_gelu", |
| | "hidden_size": 768, |
| | "id2label": { |
| | "0": "LABEL_0", |
| | "1": "LABEL_1" |
| | }, |
| | "initializer_factor": 1.0, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 3072, |
| | "is_decoder": false, |
| | "is_encoder_decoder": false, |
| | "label2id": { |
| | "LABEL_0": 0, |
| | "LABEL_1": 1 |
| | }, |
| | "layer_norm_eps": 1e-05, |
| | "length_penalty": 1.0, |
| | "max_length": 20, |
| | "max_position_embeddings": 77, |
| | "min_length": 0, |
| | "model_type": "clip_text_model", |
| | "no_repeat_ngram_size": 0, |
| | "num_attention_heads": 12, |
| | "num_beam_groups": 1, |
| | "num_beams": 1, |
| | "num_hidden_layers": 12, |
| | "num_return_sequences": 1, |
| | "output_attentions": false, |
| | "output_hidden_states": false, |
| | "output_scores": false, |
| | "pad_token_id": 1, |
| | "prefix": null, |
| | "problem_type": null, |
| | "pruned_heads": {}, |
| | "remove_invalid_values": false, |
| | "repetition_penalty": 1.0, |
| | "return_dict": true, |
| | "return_dict_in_generate": false, |
| | "sep_token_id": null, |
| | "task_specific_params": null, |
| | "temperature": 1.0, |
| | "tf_legacy_loss": false, |
| | "tie_encoder_decoder": false, |
| | "tie_word_embeddings": true, |
| | "tokenizer_class": null, |
| | "top_k": 50, |
| | "top_p": 1.0, |
| | "torch_dtype": null, |
| | "torchscript": false, |
| | "transformers_version": "4.22.0.dev0", |
| | "typical_p": 1.0, |
| | "use_bfloat16": false, |
| | "vocab_size": 49408 |
| | }, |
| | "text_config_dict": { |
| | "hidden_size": 768, |
| | "intermediate_size": 3072, |
| | "num_attention_heads": 12, |
| | "num_hidden_layers": 12 |
| | }, |
| | "torch_dtype": "float32", |
| | "transformers_version": null, |
| | "vision_config": { |
| | "_name_or_path": "", |
| | "add_cross_attention": false, |
| | "architectures": null, |
| | "attention_dropout": 0.0, |
| | "bad_words_ids": null, |
| | "bos_token_id": null, |
| | "chunk_size_feed_forward": 0, |
| | "cross_attention_hidden_size": null, |
| | "decoder_start_token_id": null, |
| | "diversity_penalty": 0.0, |
| | "do_sample": false, |
| | "dropout": 0.0, |
| | "early_stopping": false, |
| | "encoder_no_repeat_ngram_size": 0, |
| | "eos_token_id": null, |
| | "exponential_decay_length_penalty": null, |
| | "finetuning_task": null, |
| | "forced_bos_token_id": null, |
| | "forced_eos_token_id": null, |
| | "hidden_act": "quick_gelu", |
| | "hidden_size": 1024, |
| | "id2label": { |
| | "0": "LABEL_0", |
| | "1": "LABEL_1" |
| | }, |
| | "image_size": 224, |
| | "initializer_factor": 1.0, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 4096, |
| | "is_decoder": false, |
| | "is_encoder_decoder": false, |
| | "label2id": { |
| | "LABEL_0": 0, |
| | "LABEL_1": 1 |
| | }, |
| | "layer_norm_eps": 1e-05, |
| | "length_penalty": 1.0, |
| | "max_length": 20, |
| | "min_length": 0, |
| | "model_type": "clip_vision_model", |
| | "no_repeat_ngram_size": 0, |
| | "num_attention_heads": 16, |
| | "num_beam_groups": 1, |
| | "num_beams": 1, |
| | "num_channels": 3, |
| | "num_hidden_layers": 24, |
| | "num_return_sequences": 1, |
| | "output_attentions": false, |
| | "output_hidden_states": false, |
| | "output_scores": false, |
| | "pad_token_id": null, |
| | "patch_size": 14, |
| | "prefix": null, |
| | "problem_type": null, |
| | "pruned_heads": {}, |
| | "remove_invalid_values": false, |
| | "repetition_penalty": 1.0, |
| | "return_dict": true, |
| | "return_dict_in_generate": false, |
| | "sep_token_id": null, |
| | "task_specific_params": null, |
| | "temperature": 1.0, |
| | "tf_legacy_loss": false, |
| | "tie_encoder_decoder": false, |
| | "tie_word_embeddings": true, |
| | "tokenizer_class": null, |
| | "top_k": 50, |
| | "top_p": 1.0, |
| | "torch_dtype": null, |
| | "torchscript": false, |
| | "transformers_version": "4.22.0.dev0", |
| | "typical_p": 1.0, |
| | "use_bfloat16": false |
| | }, |
| | "vision_config_dict": { |
| | "hidden_size": 1024, |
| | "intermediate_size": 4096, |
| | "num_attention_heads": 16, |
| | "num_hidden_layers": 24, |
| | "patch_size": 14 |
| | } |
| | } |
| | |
| | Filename: config.json |
| | Content: { |
| | "_class_name": "AutoencoderKL", |
| | "_diffusers_version": "0.6.0", |
| | "act_fn": "silu", |
| | "block_out_channels": [ |
| | 128, |
| | 256, |
| | 512, |
| | 512 |
| | ], |
| | "down_block_types": [ |
| | "DownEncoderBlock2D", |
| | "DownEncoderBlock2D", |
| | "DownEncoderBlock2D", |
| | "DownEncoderBlock2D" |
| | ], |
| | "in_channels": 3, |
| | "latent_channels": 4, |
| | "layers_per_block": 2, |
| | "norm_num_groups": 32, |
| | "out_channels": 3, |
| | "sample_size": 512, |
| | "up_block_types": [ |
| | "UpDecoderBlock2D", |
| | "UpDecoderBlock2D", |
| | "UpDecoderBlock2D", |
| | "UpDecoderBlock2D" |
| | ] |
| | } |
| | |
| | Filename: vocab.json |
| | Content: Content of the file is larger than 50 KB, too long to display. |
| | |
| | Filename: special_tokens_map.json |
| | Content: { |
| | "bos_token": { |
| | "content": "<|startoftext|>", |
| | "lstrip": false, |
| | "normalized": true, |
| | "rstrip": false, |
| | "single_word": false |
| | }, |
| | "eos_token": { |
| | "content": "<|endoftext|>", |
| | "lstrip": false, |
| | "normalized": true, |
| | "rstrip": false, |
| | "single_word": false |
| | }, |
| | "pad_token": "<|endoftext|>", |
| | "unk_token": { |
| | "content": "<|endoftext|>", |
| | "lstrip": false, |
| | "normalized": true, |
| | "rstrip": false, |
| | "single_word": false |
| | } |
| | } |
| | |
| | Filename: tokenizer_config.json |
| | Content: { |
| | "add_prefix_space": false, |
| | "bos_token": { |
| | "__type": "AddedToken", |
| | "content": "<|startoftext|>", |
| | "lstrip": false, |
| | "normalized": true, |
| | "rstrip": false, |
| | "single_word": false |
| | }, |
| | "do_lower_case": true, |
| | "eos_token": { |
| | "__type": "AddedToken", |
| | "content": "<|endoftext|>\", |
| | \"lstrip\": false, |
| | \"normalized\": true, |
| | \"rstrip\": false, |
| | \"single_word\": false |
| | }, |
| | "errors": "replace", |
| | "model_max_length": 77, |
| | "name_or_path": "openai/clip-vit-large-patch14", |
| | "pad_token": "<|endoftext|>", |
| | "special_tokens_map_file": "./special_tokens_map.json", |
| | "tokenizer_class": "CLIPTokenizer", |
| | "unk_token": { |
| | "__type": "AddedToken", |
| | "content": "<|endoftext|>", |
| | "lstrip": false, |
| | "normalized": true, |
| | "rstrip": false, |
| | "single_word": false |
| | } |
| | } |
| | |
| | Filename: config.json |
| | Content: { |
| | "_class_name": "UNet2DConditionModel", |
| | "_diffusers_version": "0.28.2", |
| | "_name_or_path": "/home/share/Loopfree/pretrained/loopfree-step4-sd15", |
| | "act_fn": "silu", |
| | "addition_embed_type": null, |
| | "addition_embed_type_num_heads": 64, |
| | "addition_time_embed_dim": null, |
| | "attention_head_dim": 8, |
| | "attention_type": "default", |
| | "block_out_channels": [ |
| | 320, |
| | 640, |
| | 1280, |
| | 1280 |
| | ], |
| | "center_input_sample": false, |
| | "class_embed_type": null, |
| | "class_embeddings_concat": false, |
| | "conv_in_kernel": 3, |
| | "conv_out_kernel": 3, |
| | "cross_attention_dim": 768, |
| | "cross_attention_norm": null, |
| | "decay": 0.9999, |
| | "down_block_types": [ |
| | "CrossAttnDownBlock2D", |
| | "CrossAttnDownBlock2D", |
| | "CrossAttnDownBlock2D", |
| | "DownBlock2D" |
| | ], |
| | "downsample_padding": 1, |
| | "dropout": 0.0, |
| | "dual_cross_attention": false, |
| | "encoder_hid_dim": null, |
| | "encoder_hid_dim_type": null, |
| | "flip_sin_to_cos": true, |
| | "freq_shift": 0, |
| | "in_channels": 4, |
| | "inv_gamma": 1.0, |
| | "layers_per_block": 2, |
| | "mid_block_only_cross_attention": null, |
| | "mid_block_scale_factor": 1, |
| | "mid_block_type": "UNetMidBlock2DCrossAttn", |
| | "min_decay": 0.0, |
| | "norm_eps": 1e-05, |
| | "norm_num_groups": 32, |
| | "num_attention_heads": null, |
| | "num_class_embeds": null, |
| | "only_cross_attention": false, |
| | "optimization_step": 10000, |
| | "out_channels": 4, |
| | "power": 0.6666666666666666, |
| | "projection_class_embeddings_input_dim": null, |
| | "resnet_out_scale_factor": 1.0, |
| | "resnet_skip_time_act": false, |
| | "resnet_time_scale_shift": "default", |
| | "reverse_transformer_layers_per_block": null, |
| | "sample_size": 64, |
| | "time_cond_proj_dim": null, |
| | "time_embedding_act_fn": null, |
| | "time_embedding_dim": null, |
| | "time_embedding_type": "positional", |
| | "timestep_post_act": null, |
| | "transformer_layers_per_block": 1, |
| | "up_block_types": [ |
| | "UpBlock2D", |
| | "CrossAttnUpBlock2D", |
| | "CrossAttnUpBlock2D", |
| | "CrossAttnUpBlock2D" |
| | ], |
| | "upcast_attention": false, |
| | "update_after_step": 0, |
| | "use_ema_warmup": false, |
| | "use_linear_projection": false |
| | } |
| | |
| | Filename: scheduler_config.json |
| | Content: { |
| | "_class_name": "PNDMScheduler", |
| | "_diffusers_version": "0.6.0", |
| | "beta_end": 0.012, |
| | "beta_schedule": "scaled_linear", |
| | "beta_start": 0.00085, |
| | "num_train_timesteps": 1000, |
| | "set_alpha_to_one": false, |
| | "skip_prk_steps": true, |
| | "steps_offset": 1, |
| | "trained_betas": null, |
| | "clip_sample": false |
| | } |
| | |
| | Filename: config.json |
| | Content: { |
| | "_name_or_path": "openai/clip-vit-large-patch14", |
| | "architectures": [ |
| | "CLIPTextModel" |
| | ], |
| | "attention_dropout": 0.0, |
| | "bos_token_id": 0, |
| | "dropout": 0.0, |
| | "eos_token_id": 2, |
| | "hidden_act": "quick_gelu", |
| | "hidden_size": 768, |
| | "initializer_factor": 1.0, |
| | "initializer_range": 0.02, |
| | "intermediate_size": 3072, |
| | "layer_norm_eps": 1e-05, |
| | "max_position_embeddings": 77, |
| | "model_type": "clip_text_model", |
| | "num_attention_heads": 12, |
| | "num_hidden_layers": 12, |
| | "pad_token_id": 1, |
| | "projection_dim": 768, |
| | "torch_dtype": "float32", |
| | "transformers_version": "4.22.0.dev0", |
| | "vocab_size": 49408 |
| | } |
| | |
| | This model implements the method described in the paper [One-Way Ticket:Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models](https://huggingface.co/papers/2505.21960). |
| | Github repository: https://github.com/sen-mao/Loopfree |