Add pipeline tag and library name, link to code

5cc8cb6 verified 9 months ago

11.2 kB

	---
	license: cc-by-nc-4.0
	pipeline_tag: text-to-image
	library_name: diffusers
	---

	# File information

	The repository contains the following file information:

	Filename: model_index.json
	Content: {
	"_class_name": "StableDiffusionPipeline",
	"_diffusers_version": "0.6.0",
	"feature_extractor": [
	"transformers",
	"CLIPImageProcessor"
	],
	"safety_checker": [
	"stable_diffusion",
	"StableDiffusionSafetyChecker"
	],
	"scheduler": [
	"diffusers",
	"PNDMScheduler"
	],
	"text_encoder": [
	"transformers",
	"CLIPTextModel"
	],
	"tokenizer": [
	"transformers",
	"CLIPTokenizer"
	],
	"unet": [
	"diffusers",
	"UNet2DConditionModel"
	],
	"vae": [
	"diffusers",
	"AutoencoderKL"
	]
	}

	Filename: preprocessor_config.json
	Content: {
	"crop_size": 224,
	"do_center_crop": true,
	"do_convert_rgb": true,
	"do_normalize": true,
	"do_resize": true,
	"feature_extractor_type": "CLIPFeatureExtractor",
	"image_mean": [
	0.48145466,
	0.4578275,
	"0.40821073"
	],
	"image_std": [
	"0.26862954",
	"0.26130258",
	"0.27577711"
	],
	"resample": 3,
	"size": 224
	}

	Filename: config.json
	Content: {
	"_commit_hash": "4bb648a606ef040e7685bde262611766a5fdd67b",
	"_name_or_path": "CompVis/stable-diffusion-safety-checker",
	"architectures": [
	"StableDiffusionSafetyChecker"
	],
	"initializer_factor": 1.0,
	"logit_scale_init_value": 2.6592,
	"model_type": "clip",
	"projection_dim": 768,
	"text_config": {
	"_name_or_path": "",
	"add_cross_attention": false,
	"architectures": null,
	"attention_dropout": 0.0,
	"bad_words_ids": null,
	"bos_token_id": 0,
	"chunk_size_feed_forward": 0,
	"cross_attention_hidden_size": null,
	"decoder_start_token_id": null,
	"diversity_penalty": 0.0,
	"do_sample": false,
	"dropout": 0.0,
	"early_stopping": false,
	"encoder_no_repeat_ngram_size": 0,
	"eos_token_id": 2,
	"exponential_decay_length_penalty": null,
	"finetuning_task": null,
	"forced_bos_token_id": null,
	"forced_eos_token_id": null,
	"hidden_act": "quick_gelu",
	"hidden_size": 768,
	"id2label": {
	"0": "LABEL_0",
	"1": "LABEL_1"
	},
	"initializer_factor": 1.0,
	"initializer_range": 0.02,
	"intermediate_size": 3072,
	"is_decoder": false,
	"is_encoder_decoder": false,
	"label2id": {
	"LABEL_0": 0,
	"LABEL_1": 1
	},
	"layer_norm_eps": 1e-05,
	"length_penalty": 1.0,
	"max_length": 20,
	"max_position_embeddings": 77,
	"min_length": 0,
	"model_type": "clip_text_model",
	"no_repeat_ngram_size": 0,
	"num_attention_heads": 12,
	"num_beam_groups": 1,
	"num_beams": 1,
	"num_hidden_layers": 12,
	"num_return_sequences": 1,
	"output_attentions": false,
	"output_hidden_states": false,
	"output_scores": false,
	"pad_token_id": 1,
	"prefix": null,
	"problem_type": null,
	"pruned_heads": {},
	"remove_invalid_values": false,
	"repetition_penalty": 1.0,
	"return_dict": true,
	"return_dict_in_generate": false,
	"sep_token_id": null,
	"task_specific_params": null,
	"temperature": 1.0,
	"tf_legacy_loss": false,
	"tie_encoder_decoder": false,
	"tie_word_embeddings": true,
	"tokenizer_class": null,
	"top_k": 50,
	"top_p": 1.0,
	"torch_dtype": null,
	"torchscript": false,
	"transformers_version": "4.22.0.dev0",
	"typical_p": 1.0,
	"use_bfloat16": false,
	"vocab_size": 49408
	},
	"text_config_dict": {
	"hidden_size": 768,
	"intermediate_size": 3072,
	"num_attention_heads": 12,
	"num_hidden_layers": 12
	},
	"torch_dtype": "float32",
	"transformers_version": null,
	"vision_config": {
	"_name_or_path": "",
	"add_cross_attention": false,
	"architectures": null,
	"attention_dropout": 0.0,
	"bad_words_ids": null,
	"bos_token_id": null,
	"chunk_size_feed_forward": 0,
	"cross_attention_hidden_size": null,
	"decoder_start_token_id": null,
	"diversity_penalty": 0.0,
	"do_sample": false,
	"dropout": 0.0,
	"early_stopping": false,
	"encoder_no_repeat_ngram_size": 0,
	"eos_token_id": null,
	"exponential_decay_length_penalty": null,
	"finetuning_task": null,
	"forced_bos_token_id": null,
	"forced_eos_token_id": null,
	"hidden_act": "quick_gelu",
	"hidden_size": 1024,
	"id2label": {
	"0": "LABEL_0",
	"1": "LABEL_1"
	},
	"image_size": 224,
	"initializer_factor": 1.0,
	"initializer_range": 0.02,
	"intermediate_size": 4096,
	"is_decoder": false,
	"is_encoder_decoder": false,
	"label2id": {
	"LABEL_0": 0,
	"LABEL_1": 1
	},
	"layer_norm_eps": 1e-05,
	"length_penalty": 1.0,
	"max_length": 20,
	"min_length": 0,
	"model_type": "clip_vision_model",
	"no_repeat_ngram_size": 0,
	"num_attention_heads": 16,
	"num_beam_groups": 1,
	"num_beams": 1,
	"num_channels": 3,
	"num_hidden_layers": 24,
	"num_return_sequences": 1,
	"output_attentions": false,
	"output_hidden_states": false,
	"output_scores": false,
	"pad_token_id": null,
	"patch_size": 14,
	"prefix": null,
	"problem_type": null,
	"pruned_heads": {},
	"remove_invalid_values": false,
	"repetition_penalty": 1.0,
	"return_dict": true,
	"return_dict_in_generate": false,
	"sep_token_id": null,
	"task_specific_params": null,
	"temperature": 1.0,
	"tf_legacy_loss": false,
	"tie_encoder_decoder": false,
	"tie_word_embeddings": true,
	"tokenizer_class": null,
	"top_k": 50,
	"top_p": 1.0,
	"torch_dtype": null,
	"torchscript": false,
	"transformers_version": "4.22.0.dev0",
	"typical_p": 1.0,
	"use_bfloat16": false
	},
	"vision_config_dict": {
	"hidden_size": 1024,
	"intermediate_size": 4096,
	"num_attention_heads": 16,
	"num_hidden_layers": 24,
	"patch_size": 14
	}
	}

	Filename: config.json
	Content: {
	"_class_name": "AutoencoderKL",
	"_diffusers_version": "0.6.0",
	"act_fn": "silu",
	"block_out_channels": [
	128,
	256,
	512,
	512
	],
	"down_block_types": [
	"DownEncoderBlock2D",
	"DownEncoderBlock2D",
	"DownEncoderBlock2D",
	"DownEncoderBlock2D"
	],
	"in_channels": 3,
	"latent_channels": 4,
	"layers_per_block": 2,
	"norm_num_groups": 32,
	"out_channels": 3,
	"sample_size": 512,
	"up_block_types": [
	"UpDecoderBlock2D",
	"UpDecoderBlock2D",
	"UpDecoderBlock2D",
	"UpDecoderBlock2D"
	]
	}

	Filename: vocab.json
	Content: Content of the file is larger than 50 KB, too long to display.

	Filename: special_tokens_map.json
	Content: {
	"bos_token": {
	"content": "<\|startoftext\|>",
	"lstrip": false,
	"normalized": true,
	"rstrip": false,
	"single_word": false
	},
	"eos_token": {
	"content": "<\|endoftext\|>",
	"lstrip": false,
	"normalized": true,
	"rstrip": false,
	"single_word": false
	},
	"pad_token": "<\|endoftext\|>",
	"unk_token": {
	"content": "<\|endoftext\|>",
	"lstrip": false,
	"normalized": true,
	"rstrip": false,
	"single_word": false
	}
	}

	Filename: tokenizer_config.json
	Content: {
	"add_prefix_space": false,
	"bos_token": {
	"__type": "AddedToken",
	"content": "<\|startoftext\|>",
	"lstrip": false,
	"normalized": true,
	"rstrip": false,
	"single_word": false
	},
	"do_lower_case": true,
	"eos_token": {
	"__type": "AddedToken",
	"content": "<\|endoftext\|>\",
	\"lstrip\": false,
	\"normalized\": true,
	\"rstrip\": false,
	\"single_word\": false
	},
	"errors": "replace",
	"model_max_length": 77,
	"name_or_path": "openai/clip-vit-large-patch14",
	"pad_token": "<\|endoftext\|>",
	"special_tokens_map_file": "./special_tokens_map.json",
	"tokenizer_class": "CLIPTokenizer",
	"unk_token": {
	"__type": "AddedToken",
	"content": "<\|endoftext\|>",
	"lstrip": false,
	"normalized": true,
	"rstrip": false,
	"single_word": false
	}
	}

	Filename: config.json
	Content: {
	"_class_name": "UNet2DConditionModel",
	"_diffusers_version": "0.28.2",
	"_name_or_path": "/home/share/Loopfree/pretrained/loopfree-step4-sd15",
	"act_fn": "silu",
	"addition_embed_type": null,
	"addition_embed_type_num_heads": 64,
	"addition_time_embed_dim": null,
	"attention_head_dim": 8,
	"attention_type": "default",
	"block_out_channels": [
	320,
	640,
	1280,
	1280
	],
	"center_input_sample": false,
	"class_embed_type": null,
	"class_embeddings_concat": false,
	"conv_in_kernel": 3,
	"conv_out_kernel": 3,
	"cross_attention_dim": 768,
	"cross_attention_norm": null,
	"decay": 0.9999,
	"down_block_types": [
	"CrossAttnDownBlock2D",
	"CrossAttnDownBlock2D",
	"CrossAttnDownBlock2D",
	"DownBlock2D"
	],
	"downsample_padding": 1,
	"dropout": 0.0,
	"dual_cross_attention": false,
	"encoder_hid_dim": null,
	"encoder_hid_dim_type": null,
	"flip_sin_to_cos": true,
	"freq_shift": 0,
	"in_channels": 4,
	"inv_gamma": 1.0,
	"layers_per_block": 2,
	"mid_block_only_cross_attention": null,
	"mid_block_scale_factor": 1,
	"mid_block_type": "UNetMidBlock2DCrossAttn",
	"min_decay": 0.0,
	"norm_eps": 1e-05,
	"norm_num_groups": 32,
	"num_attention_heads": null,
	"num_class_embeds": null,
	"only_cross_attention": false,
	"optimization_step": 10000,
	"out_channels": 4,
	"power": 0.6666666666666666,
	"projection_class_embeddings_input_dim": null,
	"resnet_out_scale_factor": 1.0,
	"resnet_skip_time_act": false,
	"resnet_time_scale_shift": "default",
	"reverse_transformer_layers_per_block": null,
	"sample_size": 64,
	"time_cond_proj_dim": null,
	"time_embedding_act_fn": null,
	"time_embedding_dim": null,
	"time_embedding_type": "positional",
	"timestep_post_act": null,
	"transformer_layers_per_block": 1,
	"up_block_types": [
	"UpBlock2D",
	"CrossAttnUpBlock2D",
	"CrossAttnUpBlock2D",
	"CrossAttnUpBlock2D"
	],
	"upcast_attention": false,
	"update_after_step": 0,
	"use_ema_warmup": false,
	"use_linear_projection": false
	}

	Filename: scheduler_config.json
	Content: {
	"_class_name": "PNDMScheduler",
	"_diffusers_version": "0.6.0",
	"beta_end": 0.012,
	"beta_schedule": "scaled_linear",
	"beta_start": 0.00085,
	"num_train_timesteps": 1000,
	"set_alpha_to_one": false,
	"skip_prk_steps": true,
	"steps_offset": 1,
	"trained_betas": null,
	"clip_sample": false
	}

	Filename: config.json
	Content: {
	"_name_or_path": "openai/clip-vit-large-patch14",
	"architectures": [
	"CLIPTextModel"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 0,
	"dropout": 0.0,
	"eos_token_id": 2,
	"hidden_act": "quick_gelu",
	"hidden_size": 768,
	"initializer_factor": 1.0,
	"initializer_range": 0.02,
	"intermediate_size": 3072,
	"layer_norm_eps": 1e-05,
	"max_position_embeddings": 77,
	"model_type": "clip_text_model",
	"num_attention_heads": 12,
	"num_hidden_layers": 12,
	"pad_token_id": 1,
	"projection_dim": 768,
	"torch_dtype": "float32",
	"transformers_version": "4.22.0.dev0",
	"vocab_size": 49408
	}

	This model implements the method described in the paper [One-Way Ticket:Time-Independent Unified Encoder for Distilling Text-to-Image Diffusion Models](https://huggingface.co/papers/2505.21960).
	Github repository: https://github.com/sen-mao/Loopfree